PyPI - npcpy - Versions diffs - 1.2.19__py3-none-any.whl → 1.2.20__py3-none-any.whl - Mend

npcpy 1.2.19py3-none-any.whl → 1.2.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

npcpy/ft/memory_trainer.py CHANGED Viewed

@@ -3,162 +3,169 @@ try:
     import torch
     import torch.nn as nn
     from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
-except:
-    pass
-import json
-from typing import List, Dict, Tuple
-import random
-class MemoryDataset(Dataset):
-    def __init__(self, examples: List[Dict], tokenizer, max_length=512):
-        self.examples = examples
-        self.tokenizer = tokenizer
-        self.max_length = max_length
-    def __len__(self):
-        return len(self.examples)
-    def __getitem__(self, idx):
-        example = self.examples[idx]
-        text = f"Memory: {example['memory']}\nContext: {example.get('context', '')}"
-        encoding = self.tokenizer(
-            text,
-            truncation=True,
-            padding='max_length',
-            max_length=self.max_length,
-            return_tensors='pt'
-        )
-        return {
-            'input_ids': encoding['input_ids'].flatten(),
-            'attention_mask': encoding['attention_mask'].flatten(),
-            'labels': torch.tensor(example['label'], dtype=torch.long)
-        }
+    import json
+    from typing import List, Dict, Tuple
+    import random
-class MemoryTrainer:
-    def __init__(self, model_name="google/gemma-2b", device="cpu"):
-        self.device = device
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            model_name,
-            num_labels=3
-        ).to(device)
-    def prepare_training_data(self, approved_memories: List[Dict],
-                            rejected_memories: List[Dict]) -> List[Dict]:
-        """Prepare training data from memory examples"""
-        examples = []
-        for memory in approved_memories:
-            examples.append({
-                "memory": memory.get("final_memory") or memory.get("initial_memory"),
-                "context": memory.get("context", ""),
-                "label": 1
-            })
-        for memory in rejected_memories:
-            examples.append({
-                "memory": memory.get("initial_memory"),
-                "context": memory.get("context", ""),
-                "label": 0
-            })
-        edited_examples = []
-        for memory in approved_memories[:len(rejected_memories)//2]:
-            if memory.get("final_memory") and memory.get("initial_memory"):
-                edited_examples.append({
+    class MemoryDataset(Dataset):
+        def __init__(self, examples: List[Dict], tokenizer, max_length=512):
+            self.examples = examples
+            self.tokenizer = tokenizer
+            self.max_length = max_length
+        def __len__(self):
+            return len(self.examples)
+        def __getitem__(self, idx):
+            example = self.examples[idx]
+            text = f"Memory: {example['memory']}\nContext: {example.get('context', '')}"
+            encoding = self.tokenizer(
+                text,
+                truncation=True,
+                padding='max_length',
+                max_length=self.max_length,
+                return_tensors='pt'
+            )
+            return {
+                'input_ids': encoding['input_ids'].flatten(),
+                'attention_mask': encoding['attention_mask'].flatten(),
+                'labels': torch.tensor(example['label'], dtype=torch.long)
+            }
+    class MemoryTrainer:
+        def __init__(self, model_name="google/gemma-2b", device="cpu"):
+            self.device = device
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = AutoModelForSequenceClassification.from_pretrained(
+                model_name,
+                num_labels=3
+            ).to(device)
+        def prepare_training_data(self, approved_memories: List[Dict],
+                                rejected_memories: List[Dict]) -> List[Dict]:
+            """Prepare training data from memory examples"""
+            examples = []
+            for memory in approved_memories:
+                examples.append({
+                    "memory": memory.get("final_memory") or memory.get("initial_memory"),
+                    "context": memory.get("context", ""),
+                    "label": 1
+                })
+            for memory in rejected_memories:
+                examples.append({
                     "memory": memory.get("initial_memory"),
                     "context": memory.get("context", ""),
-                    "label": 2
+                    "label": 0
                 })
-        examples.extend(edited_examples)
-        random.shuffle(examples)
-        return examples
+            edited_examples = []
+            for memory in approved_memories[:len(rejected_memories)//2]:
+                if memory.get("final_memory") and memory.get("initial_memory"):
+                    edited_examples.append({
+                        "memory": memory.get("initial_memory"),
+                        "context": memory.get("context", ""),
+                        "label": 2
+                    })
+            examples.extend(edited_examples)
+            random.shuffle(examples)
+            return examples
-    def train(self, approved_memories: List[Dict], rejected_memories: List[Dict],
-              output_dir: str = "./memory_model", epochs: int = 3):
-        """Train the memory classification model"""
-        if len(approved_memories) < 10 or len(rejected_memories) < 10:
-            print("Not enough training data. Need at least 10 approved and 10 rejected memories.")
-            return False
-        training_data = self.prepare_training_data(approved_memories, rejected_memories)
-        split_idx = int(0.8 * len(training_data))
-        train_data = training_data[:split_idx]
-        val_data = training_data[split_idx:]
-        train_dataset = MemoryDataset(train_data, self.tokenizer)
-        val_dataset = MemoryDataset(val_data, self.tokenizer)
-        training_args = TrainingArguments(
-            output_dir=output_dir,
-            num_train_epochs=epochs,
-            per_device_train_batch_size=4,
-            per_device_eval_batch_size=4,
-            warmup_steps=100,
-            weight_decay=0.01,
-            logging_dir='./logs',
-            evaluation_strategy="epoch",
-            save_strategy="epoch",
-            load_best_model_at_end=True,
-        )
-        trainer = Trainer(
-            model=self.model,
-            args=training_args,
-            train_dataset=train_dataset,
-            eval_dataset=val_dataset,
-        )
-        trainer.train()
-        trainer.save_model()
-        self.tokenizer.save_pretrained(output_dir)
-        print(f"Model trained and saved to {output_dir}")
-        return True
+        def train(self, approved_memories: List[Dict], rejected_memories: List[Dict],
+                output_dir: str = "./memory_model", epochs: int = 3):
+            """Train the memory classification model"""
+            if len(approved_memories) < 10 or len(rejected_memories) < 10:
+                print("Not enough training data. Need at least 10 approved and 10 rejected memories.")
+                return False
+            training_data = self.prepare_training_data(approved_memories, rejected_memories)
+            split_idx = int(0.8 * len(training_data))
+            train_data = training_data[:split_idx]
+            val_data = training_data[split_idx:]
+            train_dataset = MemoryDataset(train_data, self.tokenizer)
+            val_dataset = MemoryDataset(val_data, self.tokenizer)
+            training_args = TrainingArguments(
+                output_dir=output_dir,
+                num_train_epochs=epochs,
+                per_device_train_batch_size=4,
+                per_device_eval_batch_size=4,
+                warmup_steps=100,
+                weight_decay=0.01,
+                logging_dir='./logs',
+                evaluation_strategy="epoch",
+                save_strategy="epoch",
+                load_best_model_at_end=True,
+            )
+            trainer = Trainer(
+                model=self.model,
+                args=training_args,
+                train_dataset=train_dataset,
+                eval_dataset=val_dataset,
+            )
+            trainer.train()
+            trainer.save_model()
+            self.tokenizer.save_pretrained(output_dir)
+            print(f"Model trained and saved to {output_dir}")
+            return True
-    def predict_memory_action(self, memory_content: str, context: str = "") -> Tuple[str, float]:
-        """Predict what action to take on a memory"""
-        text = f"Memory: {memory_content}\nContext: {context}"
-        encoding = self.tokenizer(
-            text,
-            truncation=True,
-            padding=True,
-            max_length=512,
-            return_tensors='pt'
-        ).to(self.device)
-        with torch.no_grad():
-            outputs = self.model(**encoding)
-            probabilities = torch.softmax(outputs.logits, dim=-1)
-            predicted_class = torch.argmax(probabilities, dim=-1).item()
-            confidence = probabilities[0][predicted_class].item()
-        actions = {0: "model-rejected", 1: "model-approved", 2: "needs-editing"}
-        return actions[predicted_class], confidence
+        def predict_memory_action(self, memory_content: str, context: str = "") -> Tuple[str, float]:
+            """Predict what action to take on a memory"""
+            text = f"Memory: {memory_content}\nContext: {context}"
+            encoding = self.tokenizer(
+                text,
+                truncation=True,
+                padding=True,
+                max_length=512,
+                return_tensors='pt'
+            ).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**encoding)
+                probabilities = torch.softmax(outputs.logits, dim=-1)
+                predicted_class = torch.argmax(probabilities, dim=-1).item()
+                confidence = probabilities[0][predicted_class].item()
+            actions = {0: "model-rejected", 1: "model-approved", 2: "needs-editing"}
+            return actions[predicted_class], confidence
-    def auto_approve_memory(self, memory_content: str, context: str = "",
-                          confidence_threshold: float = 0.8) -> Dict:
-        """Auto-approve memory if confidence is high enough"""
-        action, confidence = self.predict_memory_action(memory_content, context)
-        if confidence >= confidence_threshold:
-            return {"action": action, "confidence": confidence, "auto_processed": True}
-        else:
-            return {"action": "pending_approval", "confidence": confidence, "auto_processed": False}
+        def auto_approve_memory(self, memory_content: str, context: str = "",
+                            confidence_threshold: float = 0.8) -> Dict:
+            """Auto-approve memory if confidence is high enough"""
+            action, confidence = self.predict_memory_action(memory_content, context)
+            if confidence >= confidence_threshold:
+                return {"action": action, "confidence": confidence, "auto_processed": True}
+            else:
+                return {"action": "pending_approval", "confidence": confidence, "auto_processed": False}
+except:
+    Dataset = None
+    nn = None
+    Trainer = None
+    TrainingArguments = None
+    MemoryDataset = None
+    MemoryTrainer = None

{npcpy-1.2.19.dist-info → npcpy-1.2.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: npcpy
-Version: 1.2.19
+Version: 1.2.20
 Summary: npcpy is the premier open-source library for integrating LLMs and Agents into python systems.
 Home-page: https://github.com/NPC-Worldwide/npcpy
 Author: Christopher Agostino

{npcpy-1.2.19.dist-info → npcpy-1.2.20.dist-info}/RECORD RENAMED Viewed

@@ -17,7 +17,7 @@ npcpy/data/web.py,sha256=ARGoVKUlQmaiX0zJbSvvFmRCwOv_Z7Pcan9c5GxYObQ,5117
 npcpy/ft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 npcpy/ft/diff.py,sha256=R3Qo6v0-6M1iI0wiXhUzyuYI2ja0q_0i9bE0z3coxzU,28
 npcpy/ft/ge.py,sha256=my5LtGyVTT40V0i1h9FR-tFFA1FHSga-PeCCgUX1UUI,61
-npcpy/ft/memory_trainer.py,sha256=410BtNj308c7V45E809ILbDjCnVRy7n0mdIp2DKOCNY,5904
+npcpy/ft/memory_trainer.py,sha256=QZPznxEEwXbOGroHdMUMa5xpqlNwgV6nqOazI2xgrnQ,6635
 npcpy/ft/rl.py,sha256=l3RUkEJe4b2yB6pildveu2LJymtNq0F17COwf_CCq3U,34
 npcpy/ft/sft.py,sha256=i4ENygRPArbLWN4XZZuBnPWaehs8M-J68JB_mewGJHI,62
 npcpy/gen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -41,8 +41,8 @@ npcpy/work/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 npcpy/work/desktop.py,sha256=F3I8mUtJp6LAkXodsh8hGZIncoads6c_2Utty-0EdDA,2986
 npcpy/work/plan.py,sha256=QyUwg8vElWiHuoS-xK4jXTxxHvkMD3VkaCEsCmrEPQk,8300
 npcpy/work/trigger.py,sha256=P1Y8u1wQRsS2WACims_2IdkBEar-iBQix-2TDWoW0OM,9948
-npcpy-1.2.19.dist-info/licenses/LICENSE,sha256=j0YPvce7Ng9e32zYOu0EmXjXeJ0Nwawd0RA3uSGGH4E,1070
-npcpy-1.2.19.dist-info/METADATA,sha256=dnJPjMvml-0yrEJXUt9g-Rf-UT9mIXuQR3HJZzYgm18,26084
-npcpy-1.2.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-npcpy-1.2.19.dist-info/top_level.txt,sha256=g1pbSvrOOncB74Bg5-J0Olg4V0A5VzDw-Xz5YObq8BU,6
-npcpy-1.2.19.dist-info/RECORD,,
+npcpy-1.2.20.dist-info/licenses/LICENSE,sha256=j0YPvce7Ng9e32zYOu0EmXjXeJ0Nwawd0RA3uSGGH4E,1070
+npcpy-1.2.20.dist-info/METADATA,sha256=P5knjysgVTcYCKlMkbJT-iiw_t9cy1SlskD2YuyknHE,26084
+npcpy-1.2.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+npcpy-1.2.20.dist-info/top_level.txt,sha256=g1pbSvrOOncB74Bg5-J0Olg4V0A5VzDw-Xz5YObq8BU,6
+npcpy-1.2.20.dist-info/RECORD,,

{npcpy-1.2.19.dist-info → npcpy-1.2.20.dist-info}/WHEEL RENAMED Viewed

File without changes

{npcpy-1.2.19.dist-info → npcpy-1.2.20.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{npcpy-1.2.19.dist-info → npcpy-1.2.20.dist-info}/top_level.txt RENAMED Viewed

File without changes

npcpy 1.2.19__py3-none-any.whl → 1.2.20__py3-none-any.whl

npcpy 1.2.19py3-none-any.whl → 1.2.20py3-none-any.whl