npcpy 1.1.28__py3-none-any.whl → 1.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. npcpy/data/audio.py +16 -38
  2. npcpy/data/image.py +29 -29
  3. npcpy/data/load.py +4 -3
  4. npcpy/data/text.py +28 -28
  5. npcpy/data/video.py +6 -6
  6. npcpy/data/web.py +49 -21
  7. npcpy/ft/__init__.py +0 -0
  8. npcpy/ft/diff.py +110 -0
  9. npcpy/ft/ge.py +115 -0
  10. npcpy/ft/memory_trainer.py +171 -0
  11. npcpy/ft/model_ensembler.py +357 -0
  12. npcpy/ft/rl.py +360 -0
  13. npcpy/ft/sft.py +248 -0
  14. npcpy/ft/usft.py +128 -0
  15. npcpy/gen/audio_gen.py +24 -0
  16. npcpy/gen/embeddings.py +13 -13
  17. npcpy/gen/image_gen.py +37 -15
  18. npcpy/gen/response.py +287 -111
  19. npcpy/gen/video_gen.py +10 -9
  20. npcpy/llm_funcs.py +447 -79
  21. npcpy/memory/command_history.py +201 -48
  22. npcpy/memory/kg_vis.py +74 -74
  23. npcpy/memory/knowledge_graph.py +482 -115
  24. npcpy/memory/memory_processor.py +81 -0
  25. npcpy/memory/search.py +70 -70
  26. npcpy/mix/debate.py +192 -3
  27. npcpy/npc_compiler.py +1541 -879
  28. npcpy/npc_sysenv.py +250 -78
  29. npcpy/serve.py +1036 -321
  30. npcpy/sql/ai_function_tools.py +257 -0
  31. npcpy/sql/database_ai_adapters.py +186 -0
  32. npcpy/sql/database_ai_functions.py +163 -0
  33. npcpy/sql/model_runner.py +19 -19
  34. npcpy/sql/npcsql.py +706 -507
  35. npcpy/sql/sql_model_compiler.py +156 -0
  36. npcpy/tools.py +20 -20
  37. npcpy/work/plan.py +8 -8
  38. npcpy/work/trigger.py +3 -3
  39. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/METADATA +169 -9
  40. npcpy-1.2.32.dist-info/RECORD +54 -0
  41. npcpy-1.1.28.dist-info/RECORD +0 -40
  42. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/WHEEL +0 -0
  43. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/licenses/LICENSE +0 -0
  44. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/top_level.txt +0 -0
npcpy/ft/diff.py ADDED
@@ -0,0 +1,110 @@
1
+ # finetuning diffuser models
2
+ try:
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ from torch.utils.data import DataLoader, Dataset as TorchDataset
7
+ from transformers import CLIPTextModel, CLIPTokenizer
8
+ except:
9
+ torch = None
10
+ nn = None
11
+ F = None
12
+ DataLoader = None
13
+ TorchDataset = None
14
+ CLIPTextModel = None
15
+ CLIPTokenizer = None
16
+ import math
17
+ from dataclasses import dataclass, field
18
+ from typing import List, Optional, Callable
19
+ import numpy as np
20
+ from PIL import Image
21
+ import os
22
+ from tqdm import tqdm
23
+ import gc
24
+
25
+
26
+ @dataclass
27
+ class DiffusionConfig:
28
+ image_size: int = 128
29
+ channels: int = 256
30
+ time_emb_dim: int = 128
31
+ timesteps: int = 1000
32
+ beta_start: float = 1e-4
33
+ beta_end: float = 0.02
34
+ num_epochs: int = 100
35
+ batch_size: int = 4
36
+ learning_rate: float = 1e-5
37
+ checkpoint_frequency: int = 1000
38
+ output_dir: str = "diffusion_model"
39
+ use_clip: bool = True
40
+ num_channels: int = 1
41
+
42
+
43
+ class SinusoidalPositionEmbeddings(nn.Module):
44
+
45
+ def __init__(self, dim):
46
+ super().__init__()
47
+ self.dim = dim
48
+
49
+ def forward(self, time):
50
+ device = time.device
51
+ half_dim = self.dim // 2
52
+ embeddings = math.log(10000) / (half_dim - 1)
53
+ embeddings = torch.exp(
54
+ torch.arange(half_dim, device=device) * -embeddings
55
+ )
56
+ embeddings = time[:, None] * embeddings[None, :]
57
+ embeddings = torch.cat(
58
+ (embeddings.sin(), embeddings.cos()),
59
+ dim=-1
60
+ )
61
+ return embeddings
62
+
63
+
64
+ class SimpleUNet(nn.Module):
65
+
66
+ def __init__(
67
+ self,
68
+ image_size=128,
69
+ channels=256,
70
+ time_emb_dim=128,
71
+ num_channels=1
72
+ ):
73
+ super().__init__()
74
+
75
+ self.image_size = image_size
76
+
77
+ self.time_mlp = nn.Sequential(
78
+ SinusoidalPositionEmbeddings(time_emb_dim),
79
+ nn.Linear(time_emb_dim, time_emb_dim * 4),
80
+ nn.GELU(),
81
+ nn.Linear(time_emb_dim * 4, channels),
82
+ )
83
+
84
+ self.text_mlp = nn.Sequential(
85
+ nn.Linear(768, time_emb_dim),
86
+ nn.GELU(),
87
+ nn.Linear(time_emb_dim, time_emb_dim),
88
+ nn.GELU(),
89
+ nn.Linear(time_emb_dim, channels),
90
+ )
91
+
92
+ self.conv_in = nn.Conv2d(num_channels, channels, 1, padding=0)
93
+
94
+ self.down1 = nn.Sequential(
95
+ nn.Conv2d(channels, channels * 2, 4, 2, 1),
96
+ nn.GroupNorm(8, channels * 2),
97
+ nn.GELU(),
98
+ )
99
+
100
+ self.down2 = nn.Sequential(
101
+ nn.Conv2d(channels * 2, channels * 4, 4, 2, 1),
102
+ nn.GroupNorm(8, channels * 4),
103
+ nn.GELU(),
104
+ )
105
+
106
+ self.down3 = nn.Sequential(
107
+ nn.Conv2d(channels * 4, channels * 8, 4, 2, 1),
108
+ nn.GroupNorm(8, channels * 8),
109
+ nn.GELU(),
110
+ )
npcpy/ft/ge.py ADDED
@@ -0,0 +1,115 @@
1
+ import random
2
+ from dataclasses import dataclass
3
+ from typing import Callable, Optional, List
4
+
5
+
6
+ @dataclass
7
+ class GAConfig:
8
+ population_size: int = 20
9
+ mutation_rate: float = 0.15
10
+ crossover_rate: float = 0.7
11
+ tournament_size: int = 3
12
+ elitism_count: int = 2
13
+ generations: int = 50
14
+
15
+
16
+ class GeneticEvolver:
17
+ """
18
+ Generic GA that takes fitness, mutation, crossover
19
+ and initialization functions to evolve any population
20
+ """
21
+ def __init__(
22
+ self,
23
+ fitness_fn: Callable,
24
+ mutate_fn: Callable,
25
+ crossover_fn: Callable,
26
+ initialize_fn: Callable,
27
+ config: Optional[GAConfig] = None
28
+ ):
29
+ self.fitness_fn = fitness_fn
30
+ self.mutate_fn = mutate_fn
31
+ self.crossover_fn = crossover_fn
32
+ self.initialize_fn = initialize_fn
33
+ self.config = config or GAConfig()
34
+ self.population = []
35
+ self.history = []
36
+
37
+ def initialize_population(self):
38
+ self.population = [
39
+ self.initialize_fn()
40
+ for _ in range(self.config.population_size)
41
+ ]
42
+
43
+ def evaluate_population(self) -> List[float]:
44
+ return [
45
+ self.fitness_fn(individual)
46
+ for individual in self.population
47
+ ]
48
+
49
+ def tournament_select(self, fitness_scores: List[float]):
50
+ indices = random.sample(
51
+ range(len(self.population)),
52
+ self.config.tournament_size
53
+ )
54
+ tournament_fitness = [fitness_scores[i] for i in indices]
55
+ winner_idx = indices[
56
+ tournament_fitness.index(max(tournament_fitness))
57
+ ]
58
+ return self.population[winner_idx]
59
+
60
+ def evolve_generation(self):
61
+ fitness_scores = self.evaluate_population()
62
+
63
+ sorted_pop = sorted(
64
+ zip(self.population, fitness_scores),
65
+ key=lambda x: x[1],
66
+ reverse=True
67
+ )
68
+
69
+ new_population = [
70
+ ind for ind, _ in sorted_pop[:self.config.elitism_count]
71
+ ]
72
+
73
+ while len(new_population) < self.config.population_size:
74
+ parent1 = self.tournament_select(fitness_scores)
75
+ parent2 = self.tournament_select(fitness_scores)
76
+
77
+ if random.random() < self.config.crossover_rate:
78
+ child = self.crossover_fn(parent1, parent2)
79
+ else:
80
+ child = parent1
81
+
82
+ if random.random() < self.config.mutation_rate:
83
+ child = self.mutate_fn(child)
84
+
85
+ new_population.append(child)
86
+
87
+ self.population = new_population[:self.config.population_size]
88
+
89
+ best_fitness = max(fitness_scores)
90
+ avg_fitness = sum(fitness_scores) / len(fitness_scores)
91
+
92
+ return {
93
+ 'best_fitness': best_fitness,
94
+ 'avg_fitness': avg_fitness,
95
+ 'best_individual': sorted_pop[0][0]
96
+ }
97
+
98
+ def run(self, generations: Optional[int] = None):
99
+ if not self.population:
100
+ self.initialize_population()
101
+
102
+ gens = generations or self.config.generations
103
+
104
+ for gen in range(gens):
105
+ gen_stats = self.evolve_generation()
106
+ self.history.append(gen_stats)
107
+
108
+ if gen % 10 == 0:
109
+ print(
110
+ f"Gen {gen}: "
111
+ f"Best={gen_stats['best_fitness']:.3f}, "
112
+ f"Avg={gen_stats['avg_fitness']:.3f}"
113
+ )
114
+
115
+ return self.history[-1]['best_individual']
@@ -0,0 +1,171 @@
1
+ try:
2
+ from torch.utils.data import Dataset
3
+ import torch
4
+ import torch.nn as nn
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
6
+
7
+ import json
8
+ from typing import List, Dict, Tuple
9
+ import random
10
+
11
+ class MemoryDataset(Dataset):
12
+ def __init__(self, examples: List[Dict], tokenizer, max_length=512):
13
+ self.examples = examples
14
+ self.tokenizer = tokenizer
15
+ self.max_length = max_length
16
+
17
+ def __len__(self):
18
+ return len(self.examples)
19
+
20
+ def __getitem__(self, idx):
21
+ example = self.examples[idx]
22
+
23
+
24
+ text = f"Memory: {example['memory']}\nContext: {example.get('context', '')}"
25
+
26
+ encoding = self.tokenizer(
27
+ text,
28
+ truncation=True,
29
+ padding='max_length',
30
+ max_length=self.max_length,
31
+ return_tensors='pt'
32
+ )
33
+
34
+ return {
35
+ 'input_ids': encoding['input_ids'].flatten(),
36
+ 'attention_mask': encoding['attention_mask'].flatten(),
37
+ 'labels': torch.tensor(example['label'], dtype=torch.long)
38
+ }
39
+
40
+ class MemoryTrainer:
41
+ def __init__(self, model_name="google/gemma-2b", device="cpu"):
42
+ self.device = device
43
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
44
+ if self.tokenizer.pad_token is None:
45
+ self.tokenizer.pad_token = self.tokenizer.eos_token
46
+
47
+
48
+ self.model = AutoModelForSequenceClassification.from_pretrained(
49
+ model_name,
50
+ num_labels=3
51
+ ).to(device)
52
+
53
+ def prepare_training_data(self, approved_memories: List[Dict],
54
+ rejected_memories: List[Dict]) -> List[Dict]:
55
+ """Prepare training data from memory examples"""
56
+ examples = []
57
+
58
+
59
+ for memory in approved_memories:
60
+ examples.append({
61
+ "memory": memory.get("final_memory") or memory.get("initial_memory"),
62
+ "context": memory.get("context", ""),
63
+ "label": 1
64
+ })
65
+
66
+
67
+ for memory in rejected_memories:
68
+ examples.append({
69
+ "memory": memory.get("initial_memory"),
70
+ "context": memory.get("context", ""),
71
+ "label": 0
72
+ })
73
+
74
+
75
+ edited_examples = []
76
+ for memory in approved_memories[:len(rejected_memories)//2]:
77
+ if memory.get("final_memory") and memory.get("initial_memory"):
78
+
79
+ edited_examples.append({
80
+ "memory": memory.get("initial_memory"),
81
+ "context": memory.get("context", ""),
82
+ "label": 2
83
+ })
84
+
85
+ examples.extend(edited_examples)
86
+ random.shuffle(examples)
87
+ return examples
88
+
89
+ def train(self, approved_memories: List[Dict], rejected_memories: List[Dict],
90
+ output_dir: str = "./memory_model", epochs: int = 3):
91
+ """Train the memory classification model"""
92
+
93
+ if len(approved_memories) < 10 or len(rejected_memories) < 10:
94
+ print("Not enough training data. Need at least 10 approved and 10 rejected memories.")
95
+ return False
96
+
97
+ training_data = self.prepare_training_data(approved_memories, rejected_memories)
98
+
99
+
100
+ split_idx = int(0.8 * len(training_data))
101
+ train_data = training_data[:split_idx]
102
+ val_data = training_data[split_idx:]
103
+
104
+ train_dataset = MemoryDataset(train_data, self.tokenizer)
105
+ val_dataset = MemoryDataset(val_data, self.tokenizer)
106
+
107
+ training_args = TrainingArguments(
108
+ output_dir=output_dir,
109
+ num_train_epochs=epochs,
110
+ per_device_train_batch_size=4,
111
+ per_device_eval_batch_size=4,
112
+ warmup_steps=100,
113
+ weight_decay=0.01,
114
+ logging_dir='./logs',
115
+ evaluation_strategy="epoch",
116
+ save_strategy="epoch",
117
+ load_best_model_at_end=True,
118
+ )
119
+
120
+ trainer = Trainer(
121
+ model=self.model,
122
+ args=training_args,
123
+ train_dataset=train_dataset,
124
+ eval_dataset=val_dataset,
125
+ )
126
+
127
+ trainer.train()
128
+ trainer.save_model()
129
+ self.tokenizer.save_pretrained(output_dir)
130
+
131
+ print(f"Model trained and saved to {output_dir}")
132
+ return True
133
+
134
+ def predict_memory_action(self, memory_content: str, context: str = "") -> Tuple[str, float]:
135
+ """Predict what action to take on a memory"""
136
+ text = f"Memory: {memory_content}\nContext: {context}"
137
+
138
+ encoding = self.tokenizer(
139
+ text,
140
+ truncation=True,
141
+ padding=True,
142
+ max_length=512,
143
+ return_tensors='pt'
144
+ ).to(self.device)
145
+
146
+ with torch.no_grad():
147
+ outputs = self.model(**encoding)
148
+ probabilities = torch.softmax(outputs.logits, dim=-1)
149
+ predicted_class = torch.argmax(probabilities, dim=-1).item()
150
+ confidence = probabilities[0][predicted_class].item()
151
+
152
+ actions = {0: "model-rejected", 1: "model-approved", 2: "needs-editing"}
153
+ return actions[predicted_class], confidence
154
+
155
+ def auto_approve_memory(self, memory_content: str, context: str = "",
156
+ confidence_threshold: float = 0.8) -> Dict:
157
+ """Auto-approve memory if confidence is high enough"""
158
+ action, confidence = self.predict_memory_action(memory_content, context)
159
+
160
+ if confidence >= confidence_threshold:
161
+ return {"action": action, "confidence": confidence, "auto_processed": True}
162
+ else:
163
+ return {"action": "pending_approval", "confidence": confidence, "auto_processed": False}
164
+ except:
165
+ Dataset = None
166
+ nn = None
167
+ Trainer = None
168
+ TrainingArguments = None
169
+
170
+ MemoryDataset = None
171
+ MemoryTrainer = None