npcpy 1.3.15__py3-none-any.whl → 1.3.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
npcpy/ft/rl.py CHANGED
@@ -6,27 +6,40 @@ import glob
6
6
  import json
7
7
  import os
8
8
  import pandas as pd
9
+ # Core imports that should always work
9
10
  try:
10
- from datasets import Dataset
11
-
12
- from peft import LoraConfig, PeftModel
13
11
  import torch
14
- from transformers import (
15
- AutoModelForCausalLM,
16
- AutoTokenizer,
17
- BitsAndBytesConfig
18
- )
19
- from trl import DPOTrainer, DPOConfig
20
- except:
21
- Dataset = None
22
- PeftModel = None
23
- DPOConfig = None
24
- DPOTrainer = None
12
+ except ImportError:
25
13
  torch = None
14
+
15
+ try:
16
+ from transformers import AutoModelForCausalLM, AutoTokenizer
17
+ except ImportError:
26
18
  AutoModelForCausalLM = None
27
19
  AutoTokenizer = None
20
+
21
+ try:
22
+ from transformers import BitsAndBytesConfig
23
+ except ImportError:
28
24
  BitsAndBytesConfig = None
29
25
 
26
+ try:
27
+ from datasets import Dataset
28
+ except ImportError:
29
+ Dataset = None
30
+
31
+ try:
32
+ from peft import LoraConfig, PeftModel
33
+ except ImportError:
34
+ LoraConfig = None
35
+ PeftModel = None
36
+
37
+ try:
38
+ from trl import DPOTrainer, DPOConfig
39
+ except ImportError:
40
+ DPOTrainer = None
41
+ DPOConfig = None
42
+
30
43
 
31
44
  import random
32
45
  from typing import List, Dict, Any, Optional, Callable
npcpy/gen/response.py CHANGED
@@ -588,6 +588,148 @@ def get_ollama_response(
588
588
  import time
589
589
 
590
590
 
591
+ def get_lora_response(
592
+ prompt: str = None,
593
+ model: str = None,
594
+ tools: list = None,
595
+ tool_map: Dict = None,
596
+ format: str = None,
597
+ messages: List[Dict[str, str]] = None,
598
+ stream: bool = False,
599
+ auto_process_tool_calls: bool = False,
600
+ **kwargs,
601
+ ) -> Dict[str, Any]:
602
+ """
603
+ Generate response using a LoRA adapter on top of a base model.
604
+ The adapter path should contain adapter_config.json with base_model_name_or_path.
605
+ """
606
+ print(f"🎯 get_lora_response called with model={model}, prompt={prompt[:50] if prompt else 'None'}...")
607
+
608
+ result = {
609
+ "response": None,
610
+ "messages": messages.copy() if messages else [],
611
+ "raw_response": None,
612
+ "tool_calls": [],
613
+ "tool_results": []
614
+ }
615
+
616
+ try:
617
+ import torch
618
+ from transformers import AutoTokenizer, AutoModelForCausalLM
619
+ from peft import PeftModel
620
+ print("🎯 Successfully imported torch, transformers, peft")
621
+ except ImportError as e:
622
+ print(f"🎯 Import error: {e}")
623
+ return {
624
+ "response": "",
625
+ "messages": messages or [],
626
+ "error": f"Missing dependencies for LoRA. Install with: pip install transformers peft torch. Error: {e}"
627
+ }
628
+
629
+ adapter_path = os.path.expanduser(model)
630
+ adapter_config_path = os.path.join(adapter_path, 'adapter_config.json')
631
+
632
+ if not os.path.exists(adapter_config_path):
633
+ return {
634
+ "response": "",
635
+ "messages": messages or [],
636
+ "error": f"No adapter_config.json found at {adapter_path}"
637
+ }
638
+
639
+ # Read base model from adapter config
640
+ try:
641
+ with open(adapter_config_path, 'r') as f:
642
+ adapter_config = json.load(f)
643
+ base_model_id = adapter_config.get('base_model_name_or_path')
644
+ if not base_model_id:
645
+ return {
646
+ "response": "",
647
+ "messages": messages or [],
648
+ "error": "adapter_config.json missing base_model_name_or_path"
649
+ }
650
+ except Exception as e:
651
+ return {
652
+ "response": "",
653
+ "messages": messages or [],
654
+ "error": f"Failed to read adapter config: {e}"
655
+ }
656
+
657
+ if prompt:
658
+ if result['messages'] and result['messages'][-1]["role"] == "user":
659
+ result['messages'][-1]["content"] = prompt
660
+ else:
661
+ result['messages'].append({"role": "user", "content": prompt})
662
+
663
+ if format == "json":
664
+ json_instruction = """If you are returning a json object, begin directly with the opening {.
665
+ Do not include any additional markdown formatting or leading ```json tags in your response."""
666
+ if result["messages"] and result["messages"][-1]["role"] == "user":
667
+ result["messages"][-1]["content"] += "\n" + json_instruction
668
+
669
+ try:
670
+ logger.info(f"Loading base model: {base_model_id}")
671
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
672
+ base_model = AutoModelForCausalLM.from_pretrained(
673
+ base_model_id,
674
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
675
+ device_map="auto" if torch.cuda.is_available() else None,
676
+ trust_remote_code=True
677
+ )
678
+
679
+ if tokenizer.pad_token is None:
680
+ tokenizer.pad_token = tokenizer.eos_token
681
+
682
+ logger.info(f"Loading LoRA adapter: {adapter_path}")
683
+ model_with_adapter = PeftModel.from_pretrained(base_model, adapter_path)
684
+
685
+ # Apply chat template
686
+ chat_text = tokenizer.apply_chat_template(
687
+ result["messages"],
688
+ tokenize=False,
689
+ add_generation_prompt=True
690
+ )
691
+ device = next(model_with_adapter.parameters()).device
692
+ inputs = tokenizer(chat_text, return_tensors="pt", padding=True, truncation=True)
693
+ inputs = {k: v.to(device) for k, v in inputs.items()}
694
+
695
+ max_new_tokens = kwargs.get("max_tokens", 512)
696
+ temperature = kwargs.get("temperature", 0.7)
697
+
698
+ with torch.no_grad():
699
+ outputs = model_with_adapter.generate(
700
+ **inputs,
701
+ max_new_tokens=max_new_tokens,
702
+ temperature=temperature,
703
+ do_sample=True,
704
+ pad_token_id=tokenizer.eos_token_id,
705
+ )
706
+
707
+ response_content = tokenizer.decode(
708
+ outputs[0][inputs['input_ids'].shape[1]:],
709
+ skip_special_tokens=True
710
+ ).strip()
711
+
712
+ result["response"] = response_content
713
+ result["raw_response"] = response_content
714
+ result["messages"].append({"role": "assistant", "content": response_content})
715
+
716
+ if format == "json":
717
+ try:
718
+ if response_content.startswith("```json"):
719
+ response_content = response_content.replace("```json", "").replace("```", "").strip()
720
+ parsed_response = json.loads(response_content)
721
+ result["response"] = parsed_response
722
+ except json.JSONDecodeError:
723
+ result["error"] = f"Invalid JSON response: {response_content}"
724
+
725
+ except Exception as e:
726
+ logger.error(f"LoRA inference error: {e}")
727
+ result["error"] = f"LoRA inference error: {str(e)}"
728
+ result["response"] = ""
729
+
730
+ return result
731
+
732
+
591
733
  def get_llamacpp_response(
592
734
  prompt: str = None,
593
735
  model: str = None,
@@ -730,7 +872,7 @@ def get_litellm_response(
730
872
  auto_process_tool_calls=auto_process_tool_calls,
731
873
  **kwargs
732
874
  )
733
- elif provider=='transformers':
875
+ elif provider == 'transformers':
734
876
  return get_transformers_response(
735
877
  prompt,
736
878
  model,
@@ -745,8 +887,24 @@ def get_litellm_response(
745
887
  attachments=attachments,
746
888
  auto_process_tool_calls=auto_process_tool_calls,
747
889
  **kwargs
748
-
749
890
  )
891
+ elif provider == 'lora':
892
+ print(f"🔧 LoRA provider detected, calling get_lora_response with model: {model}")
893
+ result = get_lora_response(
894
+ prompt=prompt,
895
+ model=model,
896
+ tools=tools,
897
+ tool_map=tool_map,
898
+ format=format,
899
+ messages=messages,
900
+ stream=stream,
901
+ auto_process_tool_calls=auto_process_tool_calls,
902
+ **kwargs
903
+ )
904
+ print(f"🔧 LoRA response: {result.get('response', 'NO RESPONSE')[:200] if result.get('response') else 'EMPTY'}")
905
+ if result.get('error'):
906
+ print(f"🔧 LoRA error: {result.get('error')}")
907
+ return result
750
908
  elif provider == 'llamacpp':
751
909
  return get_llamacpp_response(
752
910
  prompt,
npcpy/npc_sysenv.py CHANGED
@@ -358,6 +358,25 @@ def get_locally_available_models(project_directory, airplane_mode=False):
358
358
  except Exception as e:
359
359
  logging.debug(f"llama.cpp server not available: {e}")
360
360
 
361
+ # Scan for LoRA adapters (fine-tuned models with adapter_config.json)
362
+ lora_dirs = [
363
+ os.path.expanduser('~/.npcsh/models'),
364
+ ]
365
+ for scan_dir in lora_dirs:
366
+ if not os.path.isdir(scan_dir):
367
+ continue
368
+ try:
369
+ for item in os.listdir(scan_dir):
370
+ item_path = os.path.join(scan_dir, item)
371
+ if os.path.isdir(item_path):
372
+ adapter_config = os.path.join(item_path, 'adapter_config.json')
373
+ if os.path.exists(adapter_config):
374
+ # This is a LoRA adapter
375
+ available_models[item_path] = "lora"
376
+ logging.debug(f"Found LoRA adapter: {item_path}")
377
+ except Exception as e:
378
+ logging.debug(f"Error scanning LoRA directory {scan_dir}: {e}")
379
+
361
380
  return available_models
362
381
 
363
382
 
@@ -959,13 +978,19 @@ def lookup_provider(model: str) -> str:
959
978
  """
960
979
  Determine the provider based on the model name.
961
980
  Checks custom providers first, then falls back to known providers.
962
-
981
+
963
982
  Args:
964
983
  model: The model name
965
-
984
+
966
985
  Returns:
967
986
  The provider name or None if not found
968
987
  """
988
+ # Check if model is a LoRA adapter path
989
+ if model and os.path.isdir(os.path.expanduser(model)):
990
+ adapter_config = os.path.join(os.path.expanduser(model), 'adapter_config.json')
991
+ if os.path.exists(adapter_config):
992
+ return "lora"
993
+
969
994
  custom_providers = load_custom_providers()
970
995
 
971
996
  for provider_name, config in custom_providers.items():
npcpy/serve.py CHANGED
@@ -447,30 +447,46 @@ def get_db_session():
447
447
 
448
448
  def resolve_mcp_server_path(current_path=None, explicit_path=None, force_global=False):
449
449
  """
450
- Resolve an MCP server path using npcsh.corca's helper when available.
451
- Falls back to ~/.npcsh/npc_team/mcp_server.py.
450
+ Resolve an MCP server path.
451
+ 1. Use explicit_path if provided and exists
452
+ 2. Check if ~/.npcsh/npc_team/mcp_server.py exists
453
+ 3. If not, find mcp_server.py in npcsh package, copy it, and return the path
452
454
  """
455
+ import shutil
456
+
457
+ # 1. Check explicit path first
453
458
  if explicit_path:
454
459
  abs_path = os.path.abspath(os.path.expanduser(explicit_path))
455
460
  if os.path.exists(abs_path):
456
461
  return abs_path
462
+
463
+ # 2. Check if global mcp_server.py already exists
464
+ global_mcp_path = os.path.expanduser("~/.npcsh/npc_team/mcp_server.py")
465
+ if os.path.exists(global_mcp_path):
466
+ return global_mcp_path
467
+
468
+ # 3. Find mcp_server.py in npcsh package and copy it
457
469
  try:
458
- from npcsh.corca import _resolve_and_copy_mcp_server_path
459
- resolved = _resolve_and_copy_mcp_server_path(
460
- explicit_path=explicit_path,
461
- current_path=current_path,
462
- team_ctx_mcp_servers=None,
463
- interactive=False,
464
- auto_copy_bypass=True,
465
- force_global=force_global,
466
- )
467
- if resolved:
468
- return os.path.abspath(resolved)
470
+ import npcsh
471
+ npcsh_package_dir = os.path.dirname(npcsh.__file__)
472
+ package_mcp_server = os.path.join(npcsh_package_dir, "mcp_server.py")
473
+
474
+ if os.path.exists(package_mcp_server):
475
+ # Ensure the target directory exists
476
+ target_dir = os.path.dirname(global_mcp_path)
477
+ os.makedirs(target_dir, exist_ok=True)
478
+
479
+ # Copy the mcp_server.py to the global location
480
+ shutil.copy2(package_mcp_server, global_mcp_path)
481
+ print(f"[MCP] Copied mcp_server.py from {package_mcp_server} to {global_mcp_path}")
482
+ return global_mcp_path
483
+ else:
484
+ print(f"[MCP] mcp_server.py not found in npcsh package at {package_mcp_server}")
469
485
  except Exception as e:
470
- print(f"resolve_mcp_server_path: fallback path due to error: {e}")
471
-
472
- fallback = os.path.expanduser("~/.npcsh/npc_team/mcp_server.py")
473
- return fallback
486
+ print(f"[MCP] Error finding/copying mcp_server.py from npcsh package: {e}")
487
+
488
+ # Return the global path anyway (caller will handle if it doesn't exist)
489
+ return global_mcp_path
474
490
 
475
491
  extension_map = {
476
492
  "PNG": "images",
@@ -1512,6 +1528,9 @@ def get_models():
1512
1528
  if m.endswith(('.gguf', '.ggml')):
1513
1529
  # For local GGUF/GGML files, show just the filename
1514
1530
  display_model = os.path.basename(m)
1531
+ elif p == 'lora':
1532
+ # For LoRA adapters, show just the folder name
1533
+ display_model = os.path.basename(m.rstrip('/'))
1515
1534
 
1516
1535
  display_name = f"{display_model} | {p} {text_only}".strip()
1517
1536
 
@@ -1991,6 +2010,669 @@ def finetune_status(job_id):
1991
2010
  'start_time': job.get('start_time')
1992
2011
  })
1993
2012
 
2013
+
2014
+ # Instruction fine-tuning jobs storage
2015
+ instruction_finetune_jobs = {}
2016
+
2017
+
2018
+ @app.route('/api/finetune_instruction', methods=['POST'])
2019
+ def finetune_instruction():
2020
+ """
2021
+ Fine-tune an LLM on instruction/conversation data.
2022
+
2023
+ Request body:
2024
+ {
2025
+ "trainingData": [
2026
+ {"input": "user prompt", "output": "assistant response"},
2027
+ // For DPO: include "reward" or "quality" score (0-1)
2028
+ // For memory_classifier: include "status" as "approved"/"rejected"
2029
+ ...
2030
+ ],
2031
+ "outputName": "my_instruction_model",
2032
+ "baseModel": "google/gemma-3-270m-it",
2033
+ "strategy": "sft", // "sft", "usft", "dpo", or "memory_classifier"
2034
+ "epochs": 20,
2035
+ "learningRate": 3e-5,
2036
+ "batchSize": 2,
2037
+ "loraR": 8,
2038
+ "loraAlpha": 16,
2039
+ "outputPath": "~/.npcsh/models",
2040
+ "systemPrompt": "optional system prompt to prepend",
2041
+ "npc": "optional npc name",
2042
+ "formatStyle": "gemma" // "gemma", "llama", or "default"
2043
+ }
2044
+
2045
+ Strategies:
2046
+ - sft: Supervised Fine-Tuning with input/output pairs
2047
+ - usft: Unsupervised Fine-Tuning on raw text (domain adaptation)
2048
+ - dpo: Direct Preference Optimization using quality/reward scores
2049
+ - memory_classifier: Train memory approval classifier
2050
+ """
2051
+ from npcpy.ft.sft import run_sft, SFTConfig
2052
+ from npcpy.ft.usft import run_usft, USFTConfig
2053
+ from npcpy.ft.rl import train_with_dpo, RLConfig
2054
+
2055
+ data = request.json
2056
+ training_data = data.get('trainingData', [])
2057
+ output_name = data.get('outputName', 'my_instruction_model')
2058
+ base_model = data.get('baseModel', 'google/gemma-3-270m-it')
2059
+ strategy = data.get('strategy', 'sft') # sft, usft, dpo, memory_classifier
2060
+ num_epochs = data.get('epochs', 20)
2061
+ learning_rate = data.get('learningRate', 3e-5)
2062
+ batch_size = data.get('batchSize', 2)
2063
+ lora_r = data.get('loraR', 8)
2064
+ lora_alpha = data.get('loraAlpha', 16)
2065
+ output_path = data.get('outputPath', '~/.npcsh/models')
2066
+ system_prompt = data.get('systemPrompt', '')
2067
+ format_style = data.get('formatStyle', 'gemma')
2068
+ npc_name = data.get('npc', None)
2069
+
2070
+ print(f"🎓 Instruction Fine-tune Request Received!")
2071
+ print(f" Training examples: {len(training_data)}")
2072
+ print(f" Strategy: {strategy}")
2073
+ print(f" Base model: {base_model}")
2074
+ print(f" Output name: {output_name}")
2075
+ print(f" Epochs: {num_epochs}, LR: {learning_rate}, Batch: {batch_size}")
2076
+
2077
+ if not training_data:
2078
+ print("🎓 Error: No training data provided.")
2079
+ return jsonify({'error': 'No training data provided'}), 400
2080
+
2081
+ min_examples = 10 if strategy == 'memory_classifier' else 3
2082
+ if len(training_data) < min_examples:
2083
+ print(f"🎓 Error: Need at least {min_examples} training examples for {strategy}.")
2084
+ return jsonify({'error': f'Need at least {min_examples} training examples for {strategy}'}), 400
2085
+
2086
+ expanded_output_dir = os.path.expanduser(os.path.join(output_path, output_name))
2087
+
2088
+ job_id = f"ift_{int(time.time())}"
2089
+ instruction_finetune_jobs[job_id] = {
2090
+ 'status': 'running',
2091
+ 'strategy': strategy,
2092
+ 'output_dir': expanded_output_dir,
2093
+ 'base_model': base_model,
2094
+ 'epochs': num_epochs,
2095
+ 'current_epoch': 0,
2096
+ 'current_step': 0,
2097
+ 'total_steps': 0,
2098
+ 'current_loss': None,
2099
+ 'loss_history': [],
2100
+ 'start_time': datetime.datetime.now().isoformat(),
2101
+ 'npc': npc_name,
2102
+ 'num_examples': len(training_data)
2103
+ }
2104
+ print(f"🎓 Instruction fine-tuning job {job_id} initialized. Output: {expanded_output_dir}")
2105
+
2106
+ def run_training_async():
2107
+ print(f"🎓 Job {job_id}: Starting {strategy.upper()} training thread...")
2108
+ try:
2109
+ if strategy == 'sft':
2110
+ # Supervised Fine-Tuning with input/output pairs
2111
+ X = []
2112
+ y = []
2113
+ for example in training_data:
2114
+ inp = example.get('input', example.get('prompt', ''))
2115
+ out = example.get('output', example.get('response', example.get('completion', '')))
2116
+ if system_prompt:
2117
+ inp = f"{system_prompt}\n\n{inp}"
2118
+ X.append(inp)
2119
+ y.append(out)
2120
+
2121
+ config = SFTConfig(
2122
+ base_model_name=base_model,
2123
+ output_model_path=expanded_output_dir,
2124
+ num_train_epochs=num_epochs,
2125
+ learning_rate=learning_rate,
2126
+ per_device_train_batch_size=batch_size,
2127
+ lora_r=lora_r,
2128
+ lora_alpha=lora_alpha
2129
+ )
2130
+
2131
+ print(f"🎓 Job {job_id}: Running SFT with config: {config}")
2132
+ model_path = run_sft(
2133
+ X=X,
2134
+ y=y,
2135
+ config=config,
2136
+ format_style=format_style
2137
+ )
2138
+
2139
+ instruction_finetune_jobs[job_id]['status'] = 'complete'
2140
+ instruction_finetune_jobs[job_id]['model_path'] = model_path
2141
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2142
+ print(f"🎓 Job {job_id}: SFT complete! Model saved to: {model_path}")
2143
+
2144
+ elif strategy == 'usft':
2145
+ # Unsupervised Fine-Tuning - domain adaptation on raw text
2146
+ texts = []
2147
+ for example in training_data:
2148
+ # Combine input and output as training text, or just use text field
2149
+ if 'text' in example:
2150
+ texts.append(example['text'])
2151
+ else:
2152
+ inp = example.get('input', example.get('prompt', ''))
2153
+ out = example.get('output', example.get('response', ''))
2154
+ if inp and out:
2155
+ texts.append(f"{inp}\n{out}")
2156
+ elif inp:
2157
+ texts.append(inp)
2158
+ elif out:
2159
+ texts.append(out)
2160
+
2161
+ config = USFTConfig(
2162
+ base_model_name=base_model,
2163
+ output_model_path=expanded_output_dir,
2164
+ num_train_epochs=num_epochs,
2165
+ learning_rate=learning_rate,
2166
+ per_device_train_batch_size=batch_size,
2167
+ lora_r=lora_r,
2168
+ lora_alpha=lora_alpha
2169
+ )
2170
+
2171
+ print(f"🎓 Job {job_id}: Running USFT with {len(texts)} texts")
2172
+ model_path = run_usft(texts=texts, config=config)
2173
+
2174
+ instruction_finetune_jobs[job_id]['status'] = 'complete'
2175
+ instruction_finetune_jobs[job_id]['model_path'] = model_path
2176
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2177
+ print(f"🎓 Job {job_id}: USFT complete! Model saved to: {model_path}")
2178
+
2179
+ elif strategy == 'dpo':
2180
+ # Direct Preference Optimization - needs quality/reward scores
2181
+ traces = []
2182
+ for example in training_data:
2183
+ traces.append({
2184
+ 'task_prompt': example.get('input', example.get('prompt', '')),
2185
+ 'final_output': example.get('output', example.get('response', '')),
2186
+ 'reward': example.get('reward', example.get('quality', 0.5))
2187
+ })
2188
+
2189
+ config = RLConfig(
2190
+ base_model_name=base_model,
2191
+ adapter_path=expanded_output_dir,
2192
+ num_train_epochs=num_epochs,
2193
+ learning_rate=learning_rate,
2194
+ per_device_train_batch_size=batch_size,
2195
+ lora_r=lora_r,
2196
+ lora_alpha=lora_alpha
2197
+ )
2198
+
2199
+ print(f"🎓 Job {job_id}: Running DPO with {len(traces)} traces")
2200
+ adapter_path = train_with_dpo(traces, config)
2201
+
2202
+ if adapter_path:
2203
+ instruction_finetune_jobs[job_id]['status'] = 'complete'
2204
+ instruction_finetune_jobs[job_id]['model_path'] = adapter_path
2205
+ else:
2206
+ instruction_finetune_jobs[job_id]['status'] = 'error'
2207
+ instruction_finetune_jobs[job_id]['error_msg'] = 'Not enough valid preference pairs for DPO training'
2208
+
2209
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2210
+ print(f"🎓 Job {job_id}: DPO complete! Adapter saved to: {adapter_path}")
2211
+
2212
+ elif strategy == 'memory_classifier':
2213
+ # Train memory approval/rejection classifier
2214
+ from npcpy.ft.memory_trainer import MemoryTrainer
2215
+
2216
+ approved_memories = []
2217
+ rejected_memories = []
2218
+
2219
+ for example in training_data:
2220
+ status = example.get('status', 'approved')
2221
+ memory_data = {
2222
+ 'initial_memory': example.get('input', example.get('memory', '')),
2223
+ 'final_memory': example.get('output', example.get('final_memory', '')),
2224
+ 'context': example.get('context', '')
2225
+ }
2226
+ if status in ['approved', 'model-approved']:
2227
+ approved_memories.append(memory_data)
2228
+ else:
2229
+ rejected_memories.append(memory_data)
2230
+
2231
+ if len(approved_memories) < 10 or len(rejected_memories) < 10:
2232
+ instruction_finetune_jobs[job_id]['status'] = 'error'
2233
+ instruction_finetune_jobs[job_id]['error_msg'] = 'Need at least 10 approved and 10 rejected memories'
2234
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2235
+ return
2236
+
2237
+ trainer = MemoryTrainer(model_name=base_model)
2238
+ success = trainer.train(
2239
+ approved_memories=approved_memories,
2240
+ rejected_memories=rejected_memories,
2241
+ output_dir=expanded_output_dir,
2242
+ epochs=num_epochs
2243
+ )
2244
+
2245
+ if success:
2246
+ instruction_finetune_jobs[job_id]['status'] = 'complete'
2247
+ instruction_finetune_jobs[job_id]['model_path'] = expanded_output_dir
2248
+ else:
2249
+ instruction_finetune_jobs[job_id]['status'] = 'error'
2250
+ instruction_finetune_jobs[job_id]['error_msg'] = 'Memory classifier training failed'
2251
+
2252
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2253
+ print(f"🎓 Job {job_id}: Memory classifier complete!")
2254
+
2255
+ else:
2256
+ raise ValueError(f"Unknown strategy: {strategy}. Supported: sft, usft, dpo, memory_classifier")
2257
+
2258
+ except Exception as e:
2259
+ instruction_finetune_jobs[job_id]['status'] = 'error'
2260
+ instruction_finetune_jobs[job_id]['error_msg'] = str(e)
2261
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2262
+ print(f"🎓 Job {job_id}: ERROR during training: {e}")
2263
+ traceback.print_exc()
2264
+
2265
+ print(f"🎓 Job {job_id}: Training thread finished.")
2266
+
2267
+ thread = threading.Thread(target=run_training_async)
2268
+ thread.daemon = True
2269
+ thread.start()
2270
+
2271
+ print(f"🎓 Job {job_id} launched in background.")
2272
+ return jsonify({
2273
+ 'status': 'started',
2274
+ 'jobId': job_id,
2275
+ 'strategy': strategy,
2276
+ 'message': f"Instruction fine-tuning job '{job_id}' started. Check /api/finetune_instruction_status/{job_id} for updates."
2277
+ })
2278
+
2279
+
2280
+ @app.route('/api/finetune_instruction_status/<job_id>', methods=['GET'])
2281
+ def finetune_instruction_status(job_id):
2282
+ """Get the status of an instruction fine-tuning job."""
2283
+ if job_id not in instruction_finetune_jobs:
2284
+ return jsonify({'error': 'Job not found'}), 404
2285
+
2286
+ job = instruction_finetune_jobs[job_id]
2287
+
2288
+ if job['status'] == 'complete':
2289
+ return jsonify({
2290
+ 'status': 'complete',
2291
+ 'complete': True,
2292
+ 'outputPath': job.get('model_path', job['output_dir']),
2293
+ 'strategy': job.get('strategy'),
2294
+ 'loss_history': job.get('loss_history', []),
2295
+ 'start_time': job.get('start_time'),
2296
+ 'end_time': job.get('end_time')
2297
+ })
2298
+ elif job['status'] == 'error':
2299
+ return jsonify({
2300
+ 'status': 'error',
2301
+ 'error': job.get('error_msg', 'Unknown error'),
2302
+ 'start_time': job.get('start_time'),
2303
+ 'end_time': job.get('end_time')
2304
+ })
2305
+
2306
+ return jsonify({
2307
+ 'status': 'running',
2308
+ 'strategy': job.get('strategy'),
2309
+ 'epoch': job.get('current_epoch', 0),
2310
+ 'total_epochs': job.get('epochs', 0),
2311
+ 'step': job.get('current_step', 0),
2312
+ 'total_steps': job.get('total_steps', 0),
2313
+ 'loss': job.get('current_loss'),
2314
+ 'loss_history': job.get('loss_history', []),
2315
+ 'start_time': job.get('start_time'),
2316
+ 'num_examples': job.get('num_examples', 0)
2317
+ })
2318
+
2319
+
2320
+ @app.route('/api/instruction_models', methods=['GET'])
2321
+ def get_instruction_models():
2322
+ """Get list of available instruction-tuned models."""
2323
+ current_path = request.args.get("currentPath")
2324
+
2325
+ potential_root_paths = [
2326
+ os.path.expanduser('~/.npcsh/models'),
2327
+ ]
2328
+ if current_path:
2329
+ project_models_path = os.path.join(current_path, 'models')
2330
+ potential_root_paths.append(project_models_path)
2331
+
2332
+ instruction_models = []
2333
+
2334
+ print(f"🎓 Searching for instruction models in: {set(potential_root_paths)}")
2335
+
2336
+ for root_path in set(potential_root_paths):
2337
+ if not os.path.exists(root_path) or not os.path.isdir(root_path):
2338
+ continue
2339
+
2340
+ for model_dir_name in os.listdir(root_path):
2341
+ full_model_path = os.path.join(root_path, model_dir_name)
2342
+
2343
+ if not os.path.isdir(full_model_path):
2344
+ continue
2345
+
2346
+ # Check for adapter_config.json (LoRA models) or config.json (full models)
2347
+ has_adapter_config = os.path.exists(os.path.join(full_model_path, 'adapter_config.json'))
2348
+ has_config = os.path.exists(os.path.join(full_model_path, 'config.json'))
2349
+ has_tokenizer = os.path.exists(os.path.join(full_model_path, 'tokenizer_config.json'))
2350
+
2351
+ if has_adapter_config or (has_config and has_tokenizer):
2352
+ model_type = 'lora_adapter' if has_adapter_config else 'full_model'
2353
+ print(f"🎓 Found instruction model: {model_dir_name} ({model_type})")
2354
+ instruction_models.append({
2355
+ "value": full_model_path,
2356
+ "name": model_dir_name,
2357
+ "type": model_type,
2358
+ "display_name": f"{model_dir_name} | Instruction Model"
2359
+ })
2360
+
2361
+ print(f"🎓 Found {len(instruction_models)} instruction models.")
2362
+ return jsonify({"models": instruction_models, "error": None})
2363
+
2364
+
2365
+ # Genetic Evolution jobs storage
2366
+ ge_jobs = {}
2367
+ ge_populations = {} # Store active populations by ID
2368
+
2369
+
2370
+ @app.route('/api/genetic/create_population', methods=['POST'])
2371
+ def create_genetic_population():
2372
+ """
2373
+ Create a new genetic evolution population.
2374
+
2375
+ Request body:
2376
+ {
2377
+ "populationId": "optional_id",
2378
+ "populationType": "prompt" | "npc_config" | "model_ensemble" | "custom",
2379
+ "populationSize": 20,
2380
+ "config": {
2381
+ "mutationRate": 0.15,
2382
+ "crossoverRate": 0.7,
2383
+ "tournamentSize": 3,
2384
+ "elitismCount": 2
2385
+ },
2386
+ "initialPopulation": [...], // Optional initial individuals
2387
+ "fitnessEndpoint": "/api/evaluate_fitness" // Optional custom fitness endpoint
2388
+ }
2389
+ """
2390
+ from npcpy.ft.ge import GeneticEvolver, GAConfig
2391
+
2392
+ data = request.json
2393
+ population_id = data.get('populationId', f"pop_{int(time.time())}")
2394
+ population_type = data.get('populationType', 'prompt')
2395
+ population_size = data.get('populationSize', 20)
2396
+ config_data = data.get('config', {})
2397
+ initial_population = data.get('initialPopulation', [])
2398
+ npc_name = data.get('npc', None)
2399
+
2400
+ config = GAConfig(
2401
+ population_size=population_size,
2402
+ mutation_rate=config_data.get('mutationRate', 0.15),
2403
+ crossover_rate=config_data.get('crossoverRate', 0.7),
2404
+ tournament_size=config_data.get('tournamentSize', 3),
2405
+ elitism_count=config_data.get('elitismCount', 2),
2406
+ generations=config_data.get('generations', 50)
2407
+ )
2408
+
2409
+ print(f"🧬 Creating genetic population {population_id} (type: {population_type})")
2410
+
2411
+ # Define type-specific functions based on population type
2412
+ if population_type == 'prompt':
2413
+ # Evolve prompts for better responses
2414
+ import random
2415
+
2416
+ def initialize_fn():
2417
+ if initial_population:
2418
+ return random.choice(initial_population)
2419
+ return f"You are a helpful assistant. {random.choice(['Be concise.', 'Be detailed.', 'Be creative.', 'Be precise.'])}"
2420
+
2421
+ def mutate_fn(individual):
2422
+ mutations = [
2423
+ lambda s: s + " Think step by step.",
2424
+ lambda s: s + " Be specific.",
2425
+ lambda s: s.replace("helpful", "expert"),
2426
+ lambda s: s.replace("assistant", "specialist"),
2427
+ lambda s: s + " Provide examples.",
2428
+ ]
2429
+ return random.choice(mutations)(individual)
2430
+
2431
+ def crossover_fn(p1, p2):
2432
+ words1 = p1.split()
2433
+ words2 = p2.split()
2434
+ mid = len(words1) // 2
2435
+ return ' '.join(words1[:mid] + words2[mid:])
2436
+
2437
+ def fitness_fn(individual):
2438
+ # Placeholder - should be overridden with actual evaluation
2439
+ return len(individual) / 100.0 # Longer prompts score higher (placeholder)
2440
+
2441
+ elif population_type == 'npc_config':
2442
+ # Evolve NPC configurations
2443
+ import random
2444
+
2445
+ def initialize_fn():
2446
+ if initial_population:
2447
+ return random.choice(initial_population)
2448
+ return {
2449
+ 'temperature': random.uniform(0.1, 1.0),
2450
+ 'top_p': random.uniform(0.7, 1.0),
2451
+ 'system_prompt_modifier': random.choice(['detailed', 'concise', 'creative']),
2452
+ }
2453
+
2454
+ def mutate_fn(individual):
2455
+ mutated = individual.copy()
2456
+ key = random.choice(list(mutated.keys()))
2457
+ if key == 'temperature':
2458
+ mutated[key] = max(0.1, min(2.0, mutated[key] + random.gauss(0, 0.1)))
2459
+ elif key == 'top_p':
2460
+ mutated[key] = max(0.5, min(1.0, mutated[key] + random.gauss(0, 0.05)))
2461
+ return mutated
2462
+
2463
+ def crossover_fn(p1, p2):
2464
+ child = {}
2465
+ for key in p1:
2466
+ child[key] = random.choice([p1.get(key), p2.get(key)])
2467
+ return child
2468
+
2469
+ def fitness_fn(individual):
2470
+ return 0.5 # Placeholder
2471
+
2472
+ else:
2473
+ # Custom type - use simple string evolution
2474
+ import random
2475
+
2476
+ def initialize_fn():
2477
+ if initial_population:
2478
+ return random.choice(initial_population)
2479
+ return {"value": random.random()}
2480
+
2481
+ def mutate_fn(individual):
2482
+ if isinstance(individual, dict):
2483
+ mutated = individual.copy()
2484
+ mutated['value'] = individual.get('value', 0) + random.gauss(0, 0.1)
2485
+ return mutated
2486
+ return individual
2487
+
2488
+ def crossover_fn(p1, p2):
2489
+ if isinstance(p1, dict) and isinstance(p2, dict):
2490
+ return {'value': (p1.get('value', 0) + p2.get('value', 0)) / 2}
2491
+ return p1
2492
+
2493
+ def fitness_fn(individual):
2494
+ if isinstance(individual, dict):
2495
+ return 1.0 - abs(individual.get('value', 0) - 0.5) # Closer to 0.5 is better
2496
+ return 0.5
2497
+
2498
+ evolver = GeneticEvolver(
2499
+ fitness_fn=fitness_fn,
2500
+ mutate_fn=mutate_fn,
2501
+ crossover_fn=crossover_fn,
2502
+ initialize_fn=initialize_fn,
2503
+ config=config
2504
+ )
2505
+
2506
+ evolver.initialize_population()
2507
+
2508
+ ge_populations[population_id] = {
2509
+ 'evolver': evolver,
2510
+ 'type': population_type,
2511
+ 'config': config,
2512
+ 'generation': 0,
2513
+ 'history': [],
2514
+ 'npc': npc_name,
2515
+ 'created_at': datetime.datetime.now().isoformat()
2516
+ }
2517
+
2518
+ return jsonify({
2519
+ 'populationId': population_id,
2520
+ 'populationType': population_type,
2521
+ 'populationSize': population_size,
2522
+ 'generation': 0,
2523
+ 'message': f"Population '{population_id}' created with {population_size} individuals"
2524
+ })
2525
+
2526
+
2527
+ @app.route('/api/genetic/evolve', methods=['POST'])
2528
+ def evolve_population():
2529
+ """
2530
+ Run evolution for N generations.
2531
+
2532
+ Request body:
2533
+ {
2534
+ "populationId": "pop_123",
2535
+ "generations": 10,
2536
+ "fitnessScores": [...] // Optional: external fitness scores for current population
2537
+ }
2538
+ """
2539
+ data = request.json
2540
+ population_id = data.get('populationId')
2541
+ generations = data.get('generations', 1)
2542
+ fitness_scores = data.get('fitnessScores', None)
2543
+
2544
+ if population_id not in ge_populations:
2545
+ return jsonify({'error': f"Population '{population_id}' not found"}), 404
2546
+
2547
+ pop_data = ge_populations[population_id]
2548
+ evolver = pop_data['evolver']
2549
+
2550
+ print(f"🧬 Evolving population {population_id} for {generations} generations")
2551
+
2552
+ # If external fitness scores provided, inject them
2553
+ if fitness_scores and len(fitness_scores) == len(evolver.population):
2554
+ # Override the fitness function temporarily
2555
+ original_fitness = evolver.fitness_fn
2556
+ score_iter = iter(fitness_scores)
2557
+ evolver.fitness_fn = lambda x: next(score_iter, 0.5)
2558
+
2559
+ results = []
2560
+ for gen in range(generations):
2561
+ gen_stats = evolver.evolve_generation()
2562
+ pop_data['generation'] += 1
2563
+ pop_data['history'].append(gen_stats)
2564
+ results.append({
2565
+ 'generation': pop_data['generation'],
2566
+ 'bestFitness': gen_stats['best_fitness'],
2567
+ 'avgFitness': gen_stats['avg_fitness'],
2568
+ 'bestIndividual': gen_stats['best_individual']
2569
+ })
2570
+
2571
+ # Restore original fitness function
2572
+ if fitness_scores:
2573
+ evolver.fitness_fn = original_fitness
2574
+
2575
+ return jsonify({
2576
+ 'populationId': population_id,
2577
+ 'generationsRun': generations,
2578
+ 'currentGeneration': pop_data['generation'],
2579
+ 'results': results,
2580
+ 'bestIndividual': results[-1]['bestIndividual'] if results else None,
2581
+ 'population': evolver.population[:5] # Return top 5 individuals
2582
+ })
2583
+
2584
+
2585
+ @app.route('/api/genetic/population/<population_id>', methods=['GET'])
2586
+ def get_population(population_id):
2587
+ """Get current state of a population."""
2588
+ if population_id not in ge_populations:
2589
+ return jsonify({'error': f"Population '{population_id}' not found"}), 404
2590
+
2591
+ pop_data = ge_populations[population_id]
2592
+ evolver = pop_data['evolver']
2593
+
2594
+ return jsonify({
2595
+ 'populationId': population_id,
2596
+ 'type': pop_data['type'],
2597
+ 'generation': pop_data['generation'],
2598
+ 'populationSize': len(evolver.population),
2599
+ 'population': evolver.population,
2600
+ 'history': pop_data['history'][-50:], # Last 50 generations
2601
+ 'createdAt': pop_data['created_at'],
2602
+ 'npc': pop_data.get('npc')
2603
+ })
2604
+
2605
+
2606
+ @app.route('/api/genetic/populations', methods=['GET'])
2607
+ def list_populations():
2608
+ """List all active populations."""
2609
+ populations = []
2610
+ for pop_id, pop_data in ge_populations.items():
2611
+ populations.append({
2612
+ 'populationId': pop_id,
2613
+ 'type': pop_data['type'],
2614
+ 'generation': pop_data['generation'],
2615
+ 'populationSize': len(pop_data['evolver'].population),
2616
+ 'createdAt': pop_data['created_at'],
2617
+ 'npc': pop_data.get('npc')
2618
+ })
2619
+
2620
+ return jsonify({'populations': populations})
2621
+
2622
+
2623
+ @app.route('/api/genetic/population/<population_id>', methods=['DELETE'])
2624
+ def delete_population(population_id):
2625
+ """Delete a population."""
2626
+ if population_id not in ge_populations:
2627
+ return jsonify({'error': f"Population '{population_id}' not found"}), 404
2628
+
2629
+ del ge_populations[population_id]
2630
+ print(f"🧬 Deleted population {population_id}")
2631
+
2632
+ return jsonify({'message': f"Population '{population_id}' deleted"})
2633
+
2634
+
2635
+ @app.route('/api/genetic/inject', methods=['POST'])
2636
+ def inject_individuals():
2637
+ """
2638
+ Inject new individuals into a population.
2639
+
2640
+ Request body:
2641
+ {
2642
+ "populationId": "pop_123",
2643
+ "individuals": [...],
2644
+ "replaceWorst": true // Replace worst individuals or append
2645
+ }
2646
+ """
2647
+ data = request.json
2648
+ population_id = data.get('populationId')
2649
+ individuals = data.get('individuals', [])
2650
+ replace_worst = data.get('replaceWorst', True)
2651
+
2652
+ if population_id not in ge_populations:
2653
+ return jsonify({'error': f"Population '{population_id}' not found"}), 404
2654
+
2655
+ pop_data = ge_populations[population_id]
2656
+ evolver = pop_data['evolver']
2657
+
2658
+ if replace_worst:
2659
+ # Evaluate and sort population, replace worst with new individuals
2660
+ fitness_scores = evolver.evaluate_population()
2661
+ sorted_pop = sorted(zip(evolver.population, fitness_scores), key=lambda x: x[1], reverse=True)
2662
+ keep_count = len(sorted_pop) - len(individuals)
2663
+ evolver.population = [ind for ind, _ in sorted_pop[:keep_count]] + individuals
2664
+ else:
2665
+ evolver.population.extend(individuals)
2666
+
2667
+ print(f"🧬 Injected {len(individuals)} individuals into {population_id}")
2668
+
2669
+ return jsonify({
2670
+ 'populationId': population_id,
2671
+ 'injectedCount': len(individuals),
2672
+ 'newPopulationSize': len(evolver.population)
2673
+ })
2674
+
2675
+
1994
2676
  @app.route("/api/ml/train", methods=["POST"])
1995
2677
  def train_ml_model():
1996
2678
  import pickle
@@ -2969,6 +3651,11 @@ def get_attachment_response():
2969
3651
 
2970
3652
 
2971
3653
  IMAGE_MODELS = {
3654
+ "diffusers": [
3655
+ {"value": "runwayml/stable-diffusion-v1-5", "display_name": "Stable Diffusion v1.5"},
3656
+ {"value": "stabilityai/stable-diffusion-xl-base-1.0", "display_name": "SDXL Base 1.0"},
3657
+ {"value": "black-forest-labs/FLUX.1-schnell", "display_name": "FLUX.1 Schnell"},
3658
+ ],
2972
3659
  "openai": [
2973
3660
  {"value": "gpt-image-1.5", "display_name": "GPT-Image-1.5"},
2974
3661
  {"value": "gpt-image-1", "display_name": "GPT-Image-1"},
@@ -2980,9 +3667,69 @@ IMAGE_MODELS = {
2980
3667
  {"value": "gemini-2.5-flash-image-preview", "display_name": "Gemini 2.5 Flash Image"},
2981
3668
  {"value": "imagen-3.0-generate-002", "display_name": "Imagen 3.0 Generate (Preview)"},
2982
3669
  ],
2983
- "diffusers": [
2984
- {"value": "runwayml/stable-diffusion-v1-5", "display_name": "Stable Diffusion v1.5"},
3670
+ "stability": [
3671
+ {"value": "stable-diffusion-xl-1024-v1-0", "display_name": "SDXL 1.0"},
3672
+ {"value": "stable-diffusion-v1-6", "display_name": "SD 1.6"},
3673
+ {"value": "stable-image-core", "display_name": "Stable Image Core"},
3674
+ {"value": "stable-image-ultra", "display_name": "Stable Image Ultra"},
3675
+ ],
3676
+ "replicate": [
3677
+ {"value": "stability-ai/sdxl", "display_name": "SDXL (Replicate)"},
3678
+ {"value": "black-forest-labs/flux-schnell", "display_name": "FLUX Schnell"},
3679
+ {"value": "black-forest-labs/flux-dev", "display_name": "FLUX Dev"},
3680
+ {"value": "black-forest-labs/flux-pro", "display_name": "FLUX Pro"},
3681
+ ],
3682
+ "fal": [
3683
+ {"value": "fal-ai/flux/schnell", "display_name": "FLUX Schnell"},
3684
+ {"value": "fal-ai/flux/dev", "display_name": "FLUX Dev"},
3685
+ {"value": "fal-ai/flux-pro", "display_name": "FLUX Pro"},
3686
+ {"value": "fal-ai/stable-diffusion-v3-medium", "display_name": "SD3 Medium"},
3687
+ ],
3688
+ "together": [
3689
+ {"value": "stabilityai/stable-diffusion-xl-base-1.0", "display_name": "SDXL Base"},
3690
+ {"value": "black-forest-labs/FLUX.1-schnell", "display_name": "FLUX.1 Schnell"},
3691
+ {"value": "black-forest-labs/FLUX.1.1-pro", "display_name": "FLUX 1.1 Pro"},
2985
3692
  ],
3693
+ "fireworks": [
3694
+ {"value": "stable-diffusion-xl-1024-v1-0", "display_name": "SDXL 1.0"},
3695
+ {"value": "playground-v2-1024px-aesthetic", "display_name": "Playground v2"},
3696
+ ],
3697
+ "deepinfra": [
3698
+ {"value": "stability-ai/sdxl", "display_name": "SDXL"},
3699
+ {"value": "black-forest-labs/FLUX-1-schnell", "display_name": "FLUX Schnell"},
3700
+ ],
3701
+ "bfl": [
3702
+ {"value": "flux-pro-1.1", "display_name": "FLUX Pro 1.1"},
3703
+ {"value": "flux-pro", "display_name": "FLUX Pro"},
3704
+ {"value": "flux-dev", "display_name": "FLUX Dev"},
3705
+ ],
3706
+ "bagel": [
3707
+ {"value": "bagel-image-v1", "display_name": "Bagel Image v1"},
3708
+ ],
3709
+ "leonardo": [
3710
+ {"value": "leonardo-diffusion-xl", "display_name": "Leonardo Diffusion XL"},
3711
+ {"value": "leonardo-vision-xl", "display_name": "Leonardo Vision XL"},
3712
+ ],
3713
+ "ideogram": [
3714
+ {"value": "ideogram-v2", "display_name": "Ideogram v2"},
3715
+ {"value": "ideogram-v2-turbo", "display_name": "Ideogram v2 Turbo"},
3716
+ ],
3717
+ }
3718
+
3719
+ # Map provider names to their environment variable keys
3720
+ IMAGE_PROVIDER_API_KEYS = {
3721
+ "openai": "OPENAI_API_KEY",
3722
+ "gemini": "GEMINI_API_KEY",
3723
+ "stability": "STABILITY_API_KEY",
3724
+ "replicate": "REPLICATE_API_TOKEN",
3725
+ "fal": "FAL_KEY",
3726
+ "together": "TOGETHER_API_KEY",
3727
+ "fireworks": "FIREWORKS_API_KEY",
3728
+ "deepinfra": "DEEPINFRA_API_KEY",
3729
+ "bfl": "BFL_API_KEY",
3730
+ "bagel": "BAGEL_API_KEY",
3731
+ "leonardo": "LEONARDO_API_KEY",
3732
+ "ideogram": "IDEOGRAM_API_KEY",
2986
3733
  }
2987
3734
  # In npcpy/serve.py, find the @app.route('/api/finetuned_models', methods=['GET'])
2988
3735
  # and replace the entire function with this:
@@ -3058,25 +3805,22 @@ def get_available_image_models(current_path=None):
3058
3805
  "display_name": f"{env_image_model} | {env_image_provider} (Configured)"
3059
3806
  })
3060
3807
 
3061
- # Add predefined models (OpenAI, Gemini, and standard Diffusers)
3808
+ # Add predefined models - diffusers always available, others require API keys
3062
3809
  for provider_key, models_list in IMAGE_MODELS.items():
3063
- if provider_key == "openai":
3064
- if os.environ.get("OPENAI_API_KEY"):
3065
- all_image_models.extend([
3066
- {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
3067
- for model in models_list
3068
- ])
3069
- elif provider_key == "gemini":
3070
- if os.environ.get("GEMINI_API_KEY"):
3071
- all_image_models.extend([
3072
- {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
3073
- for model in models_list
3074
- ])
3075
- elif provider_key == "diffusers": # This entry in IMAGE_MODELS is for standard diffusers
3810
+ if provider_key == "diffusers":
3811
+ # Diffusers (local) is always available
3076
3812
  all_image_models.extend([
3077
3813
  {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
3078
3814
  for model in models_list
3079
3815
  ])
3816
+ else:
3817
+ # Check if API key is present for this provider
3818
+ api_key_env = IMAGE_PROVIDER_API_KEYS.get(provider_key)
3819
+ if api_key_env and os.environ.get(api_key_env):
3820
+ all_image_models.extend([
3821
+ {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
3822
+ for model in models_list
3823
+ ])
3080
3824
 
3081
3825
  # <--- CRITICAL FIX: Directly call the internal helper function for fine-tuned models
3082
3826
  try:
@@ -3804,8 +4548,10 @@ def stream():
3804
4548
  return jsonify({"error": "conversationId is required"}), 400
3805
4549
  model = data.get("model", None)
3806
4550
  provider = data.get("provider", None)
4551
+ print(f"🔍 Stream request - model: {model}, provider from request: {provider}")
3807
4552
  if provider is None:
3808
4553
  provider = available_models.get(model)
4554
+ print(f"🔍 Provider looked up from available_models: {provider}")
3809
4555
 
3810
4556
  npc_name = data.get("npc", None)
3811
4557
  npc_source = data.get("npcSource", "global")
@@ -3824,8 +4570,10 @@ def stream():
3824
4570
 
3825
4571
  npc_object = None
3826
4572
  team_object = None
3827
- team = None
4573
+ team = None
3828
4574
  tool_results_for_db = []
4575
+ # Initialize stream_response early to ensure it's always defined for closures
4576
+ stream_response = {"output": "", "messages": []}
3829
4577
  if npc_name:
3830
4578
  if hasattr(app, 'registered_teams'):
3831
4579
  for team_name, team_object in app.registered_teams.items():
@@ -4449,7 +5197,30 @@ def stream():
4449
5197
  yield f"data: {json.dumps(chunk_data)}\n\n"
4450
5198
 
4451
5199
  elif isinstance(stream_response, dict):
4452
- for response_chunk in stream_response.get('response', stream_response.get('output')):
5200
+ # Handle LoRA responses - they return the full response at once, not streaming
5201
+ if provider == 'lora':
5202
+ lora_text = stream_response.get('response', stream_response.get('output', ''))
5203
+ if lora_text:
5204
+ complete_response.append(lora_text)
5205
+ chunk_data = {
5206
+ "id": None,
5207
+ "object": None,
5208
+ "created": datetime.datetime.now().strftime('YYYY-DD-MM-HHMMSS'),
5209
+ "model": model,
5210
+ "choices": [
5211
+ {
5212
+ "index": 0,
5213
+ "delta": {
5214
+ "content": lora_text,
5215
+ "role": "assistant"
5216
+ },
5217
+ "finish_reason": "stop"
5218
+ }
5219
+ ]
5220
+ }
5221
+ yield f"data: {json.dumps(chunk_data)}\n\n"
5222
+ else:
5223
+ for response_chunk in stream_response.get('response', stream_response.get('output')):
4453
5224
  with cancellation_lock:
4454
5225
  if cancellation_flags.get(current_stream_id, False):
4455
5226
  print(f"Cancellation flag triggered for {current_stream_id}. Breaking loop.")
@@ -5273,7 +6044,7 @@ def openai_chat_completions():
5273
6044
  current_path = request.headers.get("X-Current-Path", os.getcwd())
5274
6045
 
5275
6046
  # Load team and NPC
5276
- db_path = app.config.get('DB_PATH') or os.path.expanduser("~/.npcsh/npcsh_history.db")
6047
+ db_path = app.config.get('DB_PATH') or os.path.expanduser("~/npcsh_history.db")
5277
6048
  db_conn = create_engine(f'sqlite:///{db_path}')
5278
6049
 
5279
6050
  npc = None
@@ -6110,8 +6881,8 @@ if __name__ == "__main__":
6110
6881
 
6111
6882
  SETTINGS_FILE = Path(os.path.expanduser("~/.npcshrc"))
6112
6883
 
6113
- # Use standard npcsh paths
6114
- db_path = os.path.expanduser("~/.npcsh/npcsh_history.db")
6884
+ # Use environment variable for DB path, or fall back to home directory path (matching Electron app)
6885
+ db_path = os.environ.get('INCOGNIDE_DB_PATH', os.path.expanduser("~/npcsh_history.db"))
6115
6886
  user_npc_directory = os.path.expanduser("~/.npcsh/npc_team")
6116
6887
 
6117
6888
  # Ensure directories exist
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: npcpy
3
- Version: 1.3.15
3
+ Version: 1.3.17
4
4
  Summary: npcpy is the premier open-source library for integrating LLMs and Agents into python systems.
5
5
  Home-page: https://github.com/NPC-Worldwide/npcpy
6
6
  Author: Christopher Agostino
@@ -5,9 +5,9 @@ npcpy/main.py,sha256=RWoRIj6VQLxKdOKvdVyaq2kwG35oRpeXPvp1CAAoG-w,81
5
5
  npcpy/ml_funcs.py,sha256=UI7k7JR4XOH_VXR-xxLaO4r9Kyx_jBaEnp3TUIY7ZLQ,22657
6
6
  npcpy/npc_array.py,sha256=fVTxcMiXV-lvltmuwaRnTU9D3ikPq3-7k5wzp7MA5OY,40224
7
7
  npcpy/npc_compiler.py,sha256=W1umvhsbyCYoRYajPUKa642FcsX5Fcadh78n-Vzu2hM,120983
8
- npcpy/npc_sysenv.py,sha256=VH7le3xwxHvO55ZYCG1e-gj8X5YTSIqbIiU6ifSqhss,38917
8
+ npcpy/npc_sysenv.py,sha256=JjsvQaeMNeL3bDEbNrrldrRM8X9cMBI9j-W_1phb5TA,39995
9
9
  npcpy/npcs.py,sha256=eExuVsbTfrRobTRRptRpDm46jCLWUgbvy4_U7IUQo-c,744
10
- npcpy/serve.py,sha256=wx5pG5SRQbB3WBH1KAoOG1twpd4qtDh29c8TMvt9xT8,244481
10
+ npcpy/serve.py,sha256=ACGhxr96H5VqGw1KWkp7WcW25Ok_aAwDTYhYg4jkHRQ,275626
11
11
  npcpy/tools.py,sha256=A5_oVmZkzGnI3BI-NmneuxeXQq-r29PbpAZP4nV4jrc,5303
12
12
  npcpy/data/__init__.py,sha256=1tcoChR-Hjn905JDLqaW9ElRmcISCTJdE7BGXPlym2Q,642
13
13
  npcpy/data/audio.py,sha256=o4auV8DQrAmZ4y84U3SofiwEuq5-ZBjGEZipQ9zPpGQ,22816
@@ -22,7 +22,7 @@ npcpy/ft/diff.py,sha256=0ScRR4AxXtVX2bgZ-Jr_dSwv3LAlU1JXDUq4F4n1Ea4,12839
22
22
  npcpy/ft/ge.py,sha256=0VzIiXq2wCzGcK1x0Wd-myJ3xRf-FNaPg0GkHEZegUM,3552
23
23
  npcpy/ft/memory_trainer.py,sha256=QZPznxEEwXbOGroHdMUMa5xpqlNwgV6nqOazI2xgrnQ,6635
24
24
  npcpy/ft/model_ensembler.py,sha256=BRX4hJ_rvF1vKTzjMhlahZqPttUgc3PqmzUJDqIfIps,10038
25
- npcpy/ft/rl.py,sha256=yItNqBuyZ7PGDVIojG5CC_0GnTlgILQc5diGQCvc7FY,12233
25
+ npcpy/ft/rl.py,sha256=JYmOCKJcralU7uEMgrTz-saBunrPY5eBSx8gDCoa-M0,12431
26
26
  npcpy/ft/sft.py,sha256=74gRaJTTrZcO4np4DqRMr79ADkGhPcDKutR74rag03E,6659
27
27
  npcpy/ft/usft.py,sha256=O025GGYGZQf2ZVLowyAmBwh5bJyuy2dUAM6v03YcboY,3435
28
28
  npcpy/gen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -30,7 +30,7 @@ npcpy/gen/audio_gen.py,sha256=RoSElPUGfQimPBUcl9SP-ziIJxeI6XAr0A1882BZxXE,20646
30
30
  npcpy/gen/embeddings.py,sha256=QStTJ2ELiC379OEZsLEgGGIIFD267Y8zQchs7HRn2Zg,2089
31
31
  npcpy/gen/image_gen.py,sha256=SOZYpvlxSiAdDK9j750OEBKjm22OUNdXg1kQ10sJSy0,21853
32
32
  npcpy/gen/ocr.py,sha256=rgmXWHrCYX1Po-qG_LrNFbVYEZ8aaupxFTgparcoB_Y,6554
33
- npcpy/gen/response.py,sha256=fLd-ORRMI_s3yRNMH1TQodGk17u_G0xofS1lqfqH4r0,51121
33
+ npcpy/gen/response.py,sha256=EYsIOvNOmn6dBs-4j3SyZNMvDf5N9lW-QxMbpjnF7Kw,57081
34
34
  npcpy/gen/video_gen.py,sha256=RFi3Zcq_Hn3HIcfoF3mijQ6G7RYFZaM_9pjPTh-8E64,3239
35
35
  npcpy/gen/world_gen.py,sha256=_8ytE7E3QVQ5qiX8DmOby-xd0d9zV20rRI6Wkpf-qcY,18922
36
36
  npcpy/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -53,8 +53,8 @@ npcpy/work/browser.py,sha256=p2PeaoZdAXipFuAgKCCB3aXXLE_p3yIRqC87KlZKZWc,679
53
53
  npcpy/work/desktop.py,sha256=F3I8mUtJp6LAkXodsh8hGZIncoads6c_2Utty-0EdDA,2986
54
54
  npcpy/work/plan.py,sha256=QyUwg8vElWiHuoS-xK4jXTxxHvkMD3VkaCEsCmrEPQk,8300
55
55
  npcpy/work/trigger.py,sha256=P1Y8u1wQRsS2WACims_2IdkBEar-iBQix-2TDWoW0OM,9948
56
- npcpy-1.3.15.dist-info/licenses/LICENSE,sha256=j0YPvce7Ng9e32zYOu0EmXjXeJ0Nwawd0RA3uSGGH4E,1070
57
- npcpy-1.3.15.dist-info/METADATA,sha256=0yYF_u7W9iC1a4fk5jNiOnTpycKziPNu16uCQYIReQQ,37870
58
- npcpy-1.3.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
59
- npcpy-1.3.15.dist-info/top_level.txt,sha256=g1pbSvrOOncB74Bg5-J0Olg4V0A5VzDw-Xz5YObq8BU,6
60
- npcpy-1.3.15.dist-info/RECORD,,
56
+ npcpy-1.3.17.dist-info/licenses/LICENSE,sha256=j0YPvce7Ng9e32zYOu0EmXjXeJ0Nwawd0RA3uSGGH4E,1070
57
+ npcpy-1.3.17.dist-info/METADATA,sha256=oK8MXksfeMCM5acIJioPKaBjep1Yimzhn6rjl1Nbm44,37870
58
+ npcpy-1.3.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
59
+ npcpy-1.3.17.dist-info/top_level.txt,sha256=g1pbSvrOOncB74Bg5-J0Olg4V0A5VzDw-Xz5YObq8BU,6
60
+ npcpy-1.3.17.dist-info/RECORD,,
File without changes