PyPI - compactpy - Versions diffs - 1.0.0__py3-none-any.whl - Mend

compactpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

compactpy/__init__.py +0 -0
compactpy/compressors/__init__.py +0 -0
compactpy/compressors/attention.py +62 -0
compactpy/compressors/dedup.py +30 -0
compactpy/compressors/semantic.py +50 -0
compactpy/core/__init__.py +0 -0
compactpy/core/chunk.py +29 -0
compactpy/graph_memory.py +52 -0
compactpy/memory.py +37 -0
compactpy/scoring.py +72 -0
compactpy/token_utils.py +44 -0
compactpy-1.0.0.dist-info/METADATA +20 -0
compactpy-1.0.0.dist-info/RECORD +15 -0
compactpy-1.0.0.dist-info/WHEEL +5 -0
compactpy-1.0.0.dist-info/top_level.txt +1 -0

compactpy/__init__.py ADDED Viewed

File without changes

compactpy/compressors/__init__.py ADDED Viewed

File without changes

compactpy/compressors/attention.py ADDED Viewed

@@ -0,0 +1,62 @@
+from sentence_transformers import SentenceTransformer
+import numpy as np
+from compactpy.token_utils import calculate_savings
+class AttentionAwareCompressor:
+    """
+    Version 6: Attention-Aware Dynamic Compression Framework.
+    Predicts and isolates key context dependencies relative to an incoming query token budget.
+    """
+    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
+        self.model = SentenceTransformer(model_name)
+    @staticmethod
+    def _cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float:
+        dot_product = np.dot(v1, v2)
+        norm_v1 = np.linalg.norm(v1)
+        norm_v2 = np.linalg.norm(v2)
+        if norm_v1 == 0 or norm_v2 == 0:
+            return 0.0
+        return float(dot_product / (norm_v1 * norm_v2))
+    def compress_context_for_query(self, query: str, context_pool: list[str], token_budget: int = 40) -> tuple[str, dict]:
+        """
+        Predicts importance weights relative to a live query and filters text down to fit a target token budget.
+        """
+        if not context_pool:
+            return "", {}
+        # 1. Embed the query and the historical context records
+        query_embedding = self.model.encode(query, convert_to_numpy=True)
+        context_embeddings = self.model.encode(context_pool, convert_to_numpy=True)
+        ranked_memories = []
+        # 2. Score relevance using vector closeness metrics
+        for idx, text in enumerate(context_pool):
+            similarity = self._cosine_similarity(query_embedding, context_embeddings[idx])
+            ranked_memories.append((similarity, text))
+        # Sort context blocks dynamically (highest attention relevance first)
+        ranked_memories.sort(key=lambda x: x[0], reverse=True)
+        selected_blocks = []
+        current_tokens = 0
+        # 3. Fill the prompt token budget safely using tiktoken metrics checks
+        from compactpy.token_utils import count_tokens
+        for similarity, text in ranked_memories:
+            block_tokens = count_tokens(text)
+            # Retain high-attention vectors if they fit inside our target token limit
+            if current_tokens + block_tokens <= token_budget:
+                selected_blocks.append(text)
+                current_tokens += block_tokens
+            else:
+                continue
+        original_flat = " ".join(context_pool)
+        compressed_flat = " ".join(selected_blocks)
+        metrics = calculate_savings(original_flat, compressed_flat)
+        return compressed_flat, metrics

compactpy/compressors/dedup.py ADDED Viewed

@@ -0,0 +1,30 @@
+from compactpy.token_utils import calculate_savings
+class DeduplicationEngine:
+    '''
+    v1: deduplication engine that identifies and removes duplicate text segments from a given input.
+    '''
+    def compress(self, text:list[str]) -> tuple[list[str], dict]:
+        """
+        Compress the input text by removing duplicate segments.
+        Args:
+            text (list[str]): A list of text segments to be compressed.
+        Returns:
+            tuple: A tuple containing the compressed text and a report dictionary.
+        """
+        seen = set()
+        deduplicated = []
+        for segment in text:
+            if segment not in seen:
+                seen.add(segment)
+                deduplicated.append(segment)
+        # Generate a report on token savings
+        original_text = " ".join(text)
+        deduplicated_str = " ".join(deduplicated)
+        report = calculate_savings(original_text, deduplicated_str)
+        return deduplicated, report

compactpy/compressors/semantic.py ADDED Viewed

@@ -0,0 +1,50 @@
+from sentence_transformers import SentenceTransformer
+import numpy as np
+from compactpy.token_utils import calculate_savings
+class SemanticCompressor:
+    '''
+    v2: Version 2: Semantic Compression Engine.
+    Filters out semantically redundant text items based on an embedding similarity threshold.'''
+    def __init__(self, threshold: float = 0.75, model_name: str='all-MiniLM-L6-v2'):
+        self.threshold = threshold
+        self.model = SentenceTransformer(model_name)
+    @staticmethod
+    def _cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float:
+        dot_product = np.dot(v1, v2)
+        norm_v1 = np.linalg.norm(v1)
+        norm_v2 = np.linalg.norm(v2)
+        if norm_v1 == 0 or norm_v2 == 0:
+            return 0.0
+        return float(dot_product / (norm_v1 * norm_v2))
+    def compress(self, texts: list[str]) -> tuple[list[str], dict]:
+        if not texts:
+            return [], {"original_length": 0, "compressed_length": 0, "savings_percentage": 0.0}
+        #compute embeddings for all texts
+        embeddings = self.model.encode(texts, convert_to_numpy=True)
+        unique_texts = []
+        unique_embeddings = []
+        for idx, text in enumerate(texts):
+            current_emb = embeddings[idx]
+            is_redundant = False
+            for unique_emb in unique_embeddings:
+                similarity = self._cosine_similarity(current_emb, unique_emb)
+                if similarity >= self.threshold:
+                    is_redundant = True
+                    break
+            if not is_redundant:
+                unique_texts.append(text)
+                unique_embeddings.append(current_emb)
+        # Generate a report on token savings
+        original_text = " ".join(texts)
+        compressed_text = " ".join(unique_texts)
+        report = calculate_savings(original_text, compressed_text)
+        return unique_texts, report

compactpy/core/__init__.py ADDED Viewed

File without changes

compactpy/core/chunk.py ADDED Viewed

@@ -0,0 +1,29 @@
+def chunk_text_by_words(text: str, chunk_size: int = 50, overlap: int=10) -> list[str]:
+    """
+    Chunk text by words.
+    Args:
+        text (str): The input text to be chunked.
+        chunk_size (int, optional): The number of words in each chunk. Defaults to 50.
+        overlap (int, optional): The number of overlapping words between chunks. Defaults to 10.
+    Returns:
+        list[str]: A list of text chunks.
+    """
+    words = text.split()
+    if not words:
+        return []
+    chunks = []
+    i=0
+    while i < len(words):
+        chunk_words = words[i:i+chunk_size]
+        chunks.append(" ".join(chunk_words))
+        #advance the pointer by chunk_size - overlap to create the next chunk
+        i += (chunk_size - overlap)
+        #if the next chunk would go beyond the end of the list, break the loop
+        if chunk_size <= 0:
+            break
+    return chunks

compactpy/graph_memory.py ADDED Viewed

@@ -0,0 +1,52 @@
+import networkx as nx
+class GraphMemorySystem:
+    """
+    Version 5: Graph Memory System.
+    Stores knowledge as explicit structural graphs instead of raw, flat text.
+    """
+    def __init__(self):
+        # Initialize an empty directed graph using NetworkX
+        self.graph = nx.DiGraph()
+    def add_relation(self, source: str, relation: str, target: str):
+        """
+        Extracts and inserts a directional concept dependency triplet into the graph map.
+        """
+        source = source.strip()
+        target = target.strip()
+        relation = relation.strip().lower()
+        # Add nodes if they don't exist, then bridge them with a named edge attribute
+        self.graph.add_node(source)
+        self.graph.add_node(target)
+        self.graph.add_edge(source, target, relation=relation)
+    def get_relationships_as_text(self) -> list[str]:
+        """
+        Flattens the graph edges back into hyper-dense structural string representations.
+        """
+        statements = []
+        for u, v, data in self.graph.edges(data=True):
+            relation_label = data.get("relation", "connected_to")
+            statements.append(f"{u} → {relation_label} → {v}")
+        return statements
+    def find_connected_concepts(self, entity: str) -> list[str]:
+        """
+        Retrieves all immediate relational dependencies connected to a specific concept node.
+        """
+        if not self.graph.has_node(entity):
+            return []
+        connections = []
+        # Find outgoing relations
+        for success in self.graph.successors(entity):
+            rel = self.graph[entity][success]["relation"]
+            connections.append(f"{entity} is {rel} {success}")
+        # Find incoming relations
+        for pred in self.graph.predecessors(entity):
+            rel = self.graph[pred][entity]["relation"]
+            connections.append(f"{pred} is {rel} {entity}")
+        return connections

compactpy/memory.py ADDED Viewed

@@ -0,0 +1,37 @@
+import time
+class HierarchicalMemory:
+    """
+    Version 3: Hierarchical Memory Tier Architecture.
+    Manages text data streams separated into explicit cognitive abstraction layers[cite: 69, 70, 71].
+    """
+    def __init__(self):
+        # Initialize the lists exactly inside the constructor
+        self.raw_memory = []       # Ephemeral input stream log [cite: 71]
+        self.working_memory = []   # Active short-term operational context [cite: 71]
+        self.summary_memory = []   # Abstracted/Summarized context blocks [cite: 71]
+        self.long_term_memory = [] # Consolidated permanent values/rules [cite: 71]
+    def add_memory(self, text: str, importance: float = 0.5, utility: float = 0.5, metadata: dict = None):
+        """
+        Deposits a new raw memory node into the system with initial tracking metrics.
+        """
+        memory_node = {
+            "text": text,
+            "timestamp": time.time(),
+            "frequency": 1,
+            "importance": importance,
+            "utility": utility,
+            "metadata": metadata or {}
+        }
+        self.raw_memory.append(memory_node)
+    def increment_frequency(self, text_content: str):
+        """
+        Simulates memory usage by locating an existing entry and incrementing its frequency tracking score.
+        """
+        for list_tier in [self.raw_memory, self.working_memory, self.long_term_memory]:
+            for node in list_tier:
+                if node["text"] == text_content:
+                    node["frequency"] += 1
+                    return

compactpy/scoring.py ADDED Viewed

@@ -0,0 +1,72 @@
+import time
+class MemoryScoringEngine:
+    '''
+    ver4: memory scoring engine that evaluates memory items based on importance, utility, frequency, and recency.
+    '''
+    def __init__(self, w_imp:float=0.4, w_util:float=0.3, w_freq:float=0.2, w_rec:float=0.2):
+        self.w_imp = w_imp
+        self.w_util = w_util
+        self.w_freq = w_freq
+        self.w_rec = w_rec
+    def calculate_score(self, importance: float, utility: float, frequency: int, recency: float) -> float:
+        """
+        Calculate the memory score based on importance, utility, frequency, and recency.
+        Args:
+            importance (float): Importance score of the memory (0.0 to 1.0).
+            utility (float): Utility score of the memory (0.0 to 1.0).
+            frequency (int): Frequency count of how many times the memory has been accessed.
+            timestamp (float): The time when the memory was last accessed.
+            """
+        return (self.w_imp * importance) + (self.w_util * utility) + (self.w_freq * frequency) + (self.w_rec * recency)
+    def process_lifecycle_cycle(self, memory_system):
+        """
+        Process the lifecycle cycle for memory items, including scoring and tier management.
+        Args:
+            memory_system (HeirarchicalMemory): The memory system containing memory items to be processed.
+        """
+        all_memories = memory_system.raw_memory
+        if not all_memories:
+            return
+        current_time = time.time()
+        timestamps = [m["timestamp"] for m in all_memories]
+        max_t, min_t = max(timestamps), min(timestamps)
+        t_range = max_t - min_t if max_t != min_t else 1.0
+        # Create temporary lists to hold current cycle route allocations
+        promoted_long_term = []
+        kept_working = []
+        for m in all_memories:
+            # 1. Compute dynamic normalized recency (1.0 = newest, 0.0 = oldest)
+            normalized_recency = (m["timestamp"] - min_t) / t_range
+            # 2. Normalize frequency baseline safely
+            normalized_frequency = min(m["frequency"] / 5.0, 1.0)
+            # 3. Compute final lifecycle performance score
+            final_score = self.calculate_score(
+                importance=m["importance"],
+                utility=m["utility"],
+                recency=normalized_recency,
+                frequency=normalized_frequency
+            )
+            m["lifecycle_score"] = round(final_score, 3)
+            # 4. Version 4 Lifecycle Routing Rules
+            if final_score >= 0.7:       # High value -> Promote directly to Long-Term Memory
+                promoted_long_term.append(m)
+            elif final_score >= 0.35:    # Medium value -> Keep in active Working Memory
+                kept_working.append(m)
+            else:                        # Low value -> Evicted automatically to drop token size
+                pass
+        # Sync routed allocations back to the main memory layer arrays
+        memory_system.long_term_memory = promoted_long_term
+        memory_system.working_memory = kept_working

compactpy/token_utils.py ADDED Viewed

@@ -0,0 +1,44 @@
+import tiktoken
+def count_tokens(text: str, model_name: str='gpt-4') -> int:
+    """
+    Count the number of tokens in a given text using the specified model's tokenizer.
+    Args:
+        text (str): The input text to be tokenized and counted.
+        model_name (str, optional): The name of the model whose tokenizer to use. Defaults to 'gpt-4'.
+    Returns:
+        int: The number of tokens in the input text.
+    """
+    try: # get the standard encoding for the specified model
+        encoding = tiktoken.encoding_for_model(model_name)
+    except KeyError: # if the model is not found, use the default encoding
+        encoding = tiktoken.get_encoding("cl100k_base")
+    return len(encoding.encode(text))
+def calculate_savings(original_text: str, compressed_text: str, model_name: str='gpt-4') -> dict:
+    """
+    Calculate the percentage of tokens saved by compressing the original text.
+    Args:
+        original_text (str): The original input text before compression.
+        compressed_text (str): The compressed version of the input text.
+        model_name (str, optional): The name of the model whose tokenizer to use. Defaults to 'gpt-4'.
+    Returns:
+        float: The percentage of tokens saved by compression.
+    """
+    orig_tokens = count_tokens(original_text, model_name)
+    comp_tokens = count_tokens(compressed_text, model_name)
+    tokens_saved = orig_tokens - comp_tokens
+    reduction_percentage = (tokens_saved / orig_tokens * 100) if orig_tokens > 0 else 0.0
+    # CRITICAL: Check that this matches exactly and returns a dictionary
+    return {
+        "original_tokens": orig_tokens,
+        "compressed_tokens": comp_tokens,
+        "tokens_saved": tokens_saved,
+        "reduction_percentage": round(reduction_percentage, 2)
+    }

compactpy-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,20 @@
+Metadata-Version: 2.4
+Name: compactpy
+Version: 1.0.0
+Summary: An intelligent, hierarchical context compression framework for LLM memory systems.
+Author: Priyankar Majumdar
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+Requires-Dist: tiktoken>=0.7.0
+Requires-Dist: sentence-transformers>=3.0.0
+Requires-Dist: networkx>=3.0
+Requires-Dist: numpy>=1.26.4
+Dynamic: requires-python
+# CompactPy
+An intelligent, hierarchical context compression framework for LLM memory systems.

compactpy-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+compactpy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+compactpy/graph_memory.py,sha256=qO8Lk-TnIHscPSgw2j5E7NKg04b80sDnNbOYDZCXaLo,2006
+compactpy/memory.py,sha256=3eWH1tIBK3dac5LdIFWTe6qG32P1M90yDhfuzIJoaRQ,1608
+compactpy/scoring.py,sha256=vuh9JAJ8AhigtKf8nGvPlbizrS6H175Z3oI2WFlx_9I,3148
+compactpy/token_utils.py,sha256=5Wi1eIZI4mpumgwgcKkcZlpMzR0cM9FHBEgDKANBoH0,1824
+compactpy/compressors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+compactpy/compressors/attention.py,sha256=dakzAoycp5V0HBsx2CS25dKN8nHfeV2Z2SbmcH7ST8A,2589
+compactpy/compressors/dedup.py,sha256=3YGFqSBqSoKaRvE_jcI4lQ1HywQPXzQeoM_xVeDf9vs,1023
+compactpy/compressors/semantic.py,sha256=22nIy2nehuAnSGryONkLDKmA156bW63sWm1f8x4zye4,1953
+compactpy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+compactpy/core/chunk.py,sha256=5JM17WwMoV5G1fJgdyAKv-nBiBHSJKjlKvld2V7u5BQ,921
+compactpy-1.0.0.dist-info/METADATA,sha256=NyrErLYaBcScosPIN4racXHqIE8RSiMHCYYVywTii4Y,727
+compactpy-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+compactpy-1.0.0.dist-info/top_level.txt,sha256=47wmV49-78qF28J2mZ1V8Yat8BunmU3m2XXrAMHD0Hk,10
+compactpy-1.0.0.dist-info/RECORD,,

compactpy-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

compactpy-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ compactpy