compactpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
compactpy/__init__.py ADDED
File without changes
File without changes
@@ -0,0 +1,62 @@
1
+ from sentence_transformers import SentenceTransformer
2
+ import numpy as np
3
+ from compactpy.token_utils import calculate_savings
4
+
5
+ class AttentionAwareCompressor:
6
+ """
7
+ Version 6: Attention-Aware Dynamic Compression Framework.
8
+ Predicts and isolates key context dependencies relative to an incoming query token budget.
9
+ """
10
+ def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
11
+ self.model = SentenceTransformer(model_name)
12
+
13
+ @staticmethod
14
+ def _cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float:
15
+ dot_product = np.dot(v1, v2)
16
+ norm_v1 = np.linalg.norm(v1)
17
+ norm_v2 = np.linalg.norm(v2)
18
+ if norm_v1 == 0 or norm_v2 == 0:
19
+ return 0.0
20
+ return float(dot_product / (norm_v1 * norm_v2))
21
+
22
+ def compress_context_for_query(self, query: str, context_pool: list[str], token_budget: int = 40) -> tuple[str, dict]:
23
+ """
24
+ Predicts importance weights relative to a live query and filters text down to fit a target token budget.
25
+ """
26
+ if not context_pool:
27
+ return "", {}
28
+
29
+ # 1. Embed the query and the historical context records
30
+ query_embedding = self.model.encode(query, convert_to_numpy=True)
31
+ context_embeddings = self.model.encode(context_pool, convert_to_numpy=True)
32
+
33
+ ranked_memories = []
34
+
35
+ # 2. Score relevance using vector closeness metrics
36
+ for idx, text in enumerate(context_pool):
37
+ similarity = self._cosine_similarity(query_embedding, context_embeddings[idx])
38
+ ranked_memories.append((similarity, text))
39
+
40
+ # Sort context blocks dynamically (highest attention relevance first)
41
+ ranked_memories.sort(key=lambda x: x[0], reverse=True)
42
+
43
+ selected_blocks = []
44
+ current_tokens = 0
45
+
46
+ # 3. Fill the prompt token budget safely using tiktoken metrics checks
47
+ from compactpy.token_utils import count_tokens
48
+
49
+ for similarity, text in ranked_memories:
50
+ block_tokens = count_tokens(text)
51
+ # Retain high-attention vectors if they fit inside our target token limit
52
+ if current_tokens + block_tokens <= token_budget:
53
+ selected_blocks.append(text)
54
+ current_tokens += block_tokens
55
+ else:
56
+ continue
57
+
58
+ original_flat = " ".join(context_pool)
59
+ compressed_flat = " ".join(selected_blocks)
60
+ metrics = calculate_savings(original_flat, compressed_flat)
61
+
62
+ return compressed_flat, metrics
@@ -0,0 +1,30 @@
1
+ from compactpy.token_utils import calculate_savings
2
+
3
+ class DeduplicationEngine:
4
+ '''
5
+ v1: deduplication engine that identifies and removes duplicate text segments from a given input.
6
+ '''
7
+ def compress(self, text:list[str]) -> tuple[list[str], dict]:
8
+ """
9
+ Compress the input text by removing duplicate segments.
10
+
11
+ Args:
12
+ text (list[str]): A list of text segments to be compressed.
13
+
14
+ Returns:
15
+ tuple: A tuple containing the compressed text and a report dictionary.
16
+ """
17
+ seen = set()
18
+ deduplicated = []
19
+
20
+ for segment in text:
21
+ if segment not in seen:
22
+ seen.add(segment)
23
+ deduplicated.append(segment)
24
+
25
+ # Generate a report on token savings
26
+ original_text = " ".join(text)
27
+ deduplicated_str = " ".join(deduplicated)
28
+ report = calculate_savings(original_text, deduplicated_str)
29
+
30
+ return deduplicated, report
@@ -0,0 +1,50 @@
1
+ from sentence_transformers import SentenceTransformer
2
+ import numpy as np
3
+ from compactpy.token_utils import calculate_savings
4
+
5
+ class SemanticCompressor:
6
+ '''
7
+ v2: Version 2: Semantic Compression Engine.
8
+ Filters out semantically redundant text items based on an embedding similarity threshold.'''
9
+
10
+ def __init__(self, threshold: float = 0.75, model_name: str='all-MiniLM-L6-v2'):
11
+ self.threshold = threshold
12
+ self.model = SentenceTransformer(model_name)
13
+
14
+ @staticmethod
15
+ def _cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float:
16
+ dot_product = np.dot(v1, v2)
17
+ norm_v1 = np.linalg.norm(v1)
18
+ norm_v2 = np.linalg.norm(v2)
19
+ if norm_v1 == 0 or norm_v2 == 0:
20
+ return 0.0
21
+ return float(dot_product / (norm_v1 * norm_v2))
22
+
23
+ def compress(self, texts: list[str]) -> tuple[list[str], dict]:
24
+ if not texts:
25
+ return [], {"original_length": 0, "compressed_length": 0, "savings_percentage": 0.0}
26
+
27
+ #compute embeddings for all texts
28
+ embeddings = self.model.encode(texts, convert_to_numpy=True)
29
+ unique_texts = []
30
+ unique_embeddings = []
31
+
32
+ for idx, text in enumerate(texts):
33
+ current_emb = embeddings[idx]
34
+ is_redundant = False
35
+
36
+ for unique_emb in unique_embeddings:
37
+ similarity = self._cosine_similarity(current_emb, unique_emb)
38
+ if similarity >= self.threshold:
39
+ is_redundant = True
40
+ break
41
+
42
+ if not is_redundant:
43
+ unique_texts.append(text)
44
+ unique_embeddings.append(current_emb)
45
+
46
+ # Generate a report on token savings
47
+ original_text = " ".join(texts)
48
+ compressed_text = " ".join(unique_texts)
49
+ report = calculate_savings(original_text, compressed_text)
50
+ return unique_texts, report
File without changes
@@ -0,0 +1,29 @@
1
+ def chunk_text_by_words(text: str, chunk_size: int = 50, overlap: int=10) -> list[str]:
2
+ """
3
+ Chunk text by words.
4
+
5
+ Args:
6
+ text (str): The input text to be chunked.
7
+ chunk_size (int, optional): The number of words in each chunk. Defaults to 50.
8
+ overlap (int, optional): The number of overlapping words between chunks. Defaults to 10.
9
+
10
+ Returns:
11
+ list[str]: A list of text chunks.
12
+ """
13
+ words = text.split()
14
+ if not words:
15
+ return []
16
+ chunks = []
17
+ i=0
18
+
19
+ while i < len(words):
20
+ chunk_words = words[i:i+chunk_size]
21
+ chunks.append(" ".join(chunk_words))
22
+
23
+ #advance the pointer by chunk_size - overlap to create the next chunk
24
+ i += (chunk_size - overlap)
25
+
26
+ #if the next chunk would go beyond the end of the list, break the loop
27
+ if chunk_size <= 0:
28
+ break
29
+ return chunks
@@ -0,0 +1,52 @@
1
+ import networkx as nx
2
+
3
+ class GraphMemorySystem:
4
+ """
5
+ Version 5: Graph Memory System.
6
+ Stores knowledge as explicit structural graphs instead of raw, flat text.
7
+ """
8
+ def __init__(self):
9
+ # Initialize an empty directed graph using NetworkX
10
+ self.graph = nx.DiGraph()
11
+
12
+ def add_relation(self, source: str, relation: str, target: str):
13
+ """
14
+ Extracts and inserts a directional concept dependency triplet into the graph map.
15
+ """
16
+ source = source.strip()
17
+ target = target.strip()
18
+ relation = relation.strip().lower()
19
+
20
+ # Add nodes if they don't exist, then bridge them with a named edge attribute
21
+ self.graph.add_node(source)
22
+ self.graph.add_node(target)
23
+ self.graph.add_edge(source, target, relation=relation)
24
+
25
+ def get_relationships_as_text(self) -> list[str]:
26
+ """
27
+ Flattens the graph edges back into hyper-dense structural string representations.
28
+ """
29
+ statements = []
30
+ for u, v, data in self.graph.edges(data=True):
31
+ relation_label = data.get("relation", "connected_to")
32
+ statements.append(f"{u} → {relation_label} → {v}")
33
+ return statements
34
+
35
+ def find_connected_concepts(self, entity: str) -> list[str]:
36
+ """
37
+ Retrieves all immediate relational dependencies connected to a specific concept node.
38
+ """
39
+ if not self.graph.has_node(entity):
40
+ return []
41
+
42
+ connections = []
43
+ # Find outgoing relations
44
+ for success in self.graph.successors(entity):
45
+ rel = self.graph[entity][success]["relation"]
46
+ connections.append(f"{entity} is {rel} {success}")
47
+ # Find incoming relations
48
+ for pred in self.graph.predecessors(entity):
49
+ rel = self.graph[pred][entity]["relation"]
50
+ connections.append(f"{pred} is {rel} {entity}")
51
+
52
+ return connections
compactpy/memory.py ADDED
@@ -0,0 +1,37 @@
1
+ import time
2
+
3
+ class HierarchicalMemory:
4
+ """
5
+ Version 3: Hierarchical Memory Tier Architecture.
6
+ Manages text data streams separated into explicit cognitive abstraction layers[cite: 69, 70, 71].
7
+ """
8
+ def __init__(self):
9
+ # Initialize the lists exactly inside the constructor
10
+ self.raw_memory = [] # Ephemeral input stream log [cite: 71]
11
+ self.working_memory = [] # Active short-term operational context [cite: 71]
12
+ self.summary_memory = [] # Abstracted/Summarized context blocks [cite: 71]
13
+ self.long_term_memory = [] # Consolidated permanent values/rules [cite: 71]
14
+
15
+ def add_memory(self, text: str, importance: float = 0.5, utility: float = 0.5, metadata: dict = None):
16
+ """
17
+ Deposits a new raw memory node into the system with initial tracking metrics.
18
+ """
19
+ memory_node = {
20
+ "text": text,
21
+ "timestamp": time.time(),
22
+ "frequency": 1,
23
+ "importance": importance,
24
+ "utility": utility,
25
+ "metadata": metadata or {}
26
+ }
27
+ self.raw_memory.append(memory_node)
28
+
29
+ def increment_frequency(self, text_content: str):
30
+ """
31
+ Simulates memory usage by locating an existing entry and incrementing its frequency tracking score.
32
+ """
33
+ for list_tier in [self.raw_memory, self.working_memory, self.long_term_memory]:
34
+ for node in list_tier:
35
+ if node["text"] == text_content:
36
+ node["frequency"] += 1
37
+ return
compactpy/scoring.py ADDED
@@ -0,0 +1,72 @@
1
+ import time
2
+
3
+ class MemoryScoringEngine:
4
+ '''
5
+ ver4: memory scoring engine that evaluates memory items based on importance, utility, frequency, and recency.
6
+ '''
7
+
8
+ def __init__(self, w_imp:float=0.4, w_util:float=0.3, w_freq:float=0.2, w_rec:float=0.2):
9
+ self.w_imp = w_imp
10
+ self.w_util = w_util
11
+ self.w_freq = w_freq
12
+ self.w_rec = w_rec
13
+
14
+ def calculate_score(self, importance: float, utility: float, frequency: int, recency: float) -> float:
15
+ """
16
+ Calculate the memory score based on importance, utility, frequency, and recency.
17
+
18
+ Args:
19
+ importance (float): Importance score of the memory (0.0 to 1.0).
20
+ utility (float): Utility score of the memory (0.0 to 1.0).
21
+ frequency (int): Frequency count of how many times the memory has been accessed.
22
+ timestamp (float): The time when the memory was last accessed.
23
+ """
24
+ return (self.w_imp * importance) + (self.w_util * utility) + (self.w_freq * frequency) + (self.w_rec * recency)
25
+
26
+ def process_lifecycle_cycle(self, memory_system):
27
+ """
28
+ Process the lifecycle cycle for memory items, including scoring and tier management.
29
+
30
+ Args:
31
+ memory_system (HeirarchicalMemory): The memory system containing memory items to be processed.
32
+ """
33
+ all_memories = memory_system.raw_memory
34
+ if not all_memories:
35
+ return
36
+
37
+ current_time = time.time()
38
+ timestamps = [m["timestamp"] for m in all_memories]
39
+ max_t, min_t = max(timestamps), min(timestamps)
40
+ t_range = max_t - min_t if max_t != min_t else 1.0
41
+
42
+ # Create temporary lists to hold current cycle route allocations
43
+ promoted_long_term = []
44
+ kept_working = []
45
+
46
+ for m in all_memories:
47
+ # 1. Compute dynamic normalized recency (1.0 = newest, 0.0 = oldest)
48
+ normalized_recency = (m["timestamp"] - min_t) / t_range
49
+
50
+ # 2. Normalize frequency baseline safely
51
+ normalized_frequency = min(m["frequency"] / 5.0, 1.0)
52
+
53
+ # 3. Compute final lifecycle performance score
54
+ final_score = self.calculate_score(
55
+ importance=m["importance"],
56
+ utility=m["utility"],
57
+ recency=normalized_recency,
58
+ frequency=normalized_frequency
59
+ )
60
+ m["lifecycle_score"] = round(final_score, 3)
61
+
62
+ # 4. Version 4 Lifecycle Routing Rules
63
+ if final_score >= 0.7: # High value -> Promote directly to Long-Term Memory
64
+ promoted_long_term.append(m)
65
+ elif final_score >= 0.35: # Medium value -> Keep in active Working Memory
66
+ kept_working.append(m)
67
+ else: # Low value -> Evicted automatically to drop token size
68
+ pass
69
+
70
+ # Sync routed allocations back to the main memory layer arrays
71
+ memory_system.long_term_memory = promoted_long_term
72
+ memory_system.working_memory = kept_working
@@ -0,0 +1,44 @@
1
+ import tiktoken
2
+
3
+ def count_tokens(text: str, model_name: str='gpt-4') -> int:
4
+ """
5
+ Count the number of tokens in a given text using the specified model's tokenizer.
6
+
7
+ Args:
8
+ text (str): The input text to be tokenized and counted.
9
+ model_name (str, optional): The name of the model whose tokenizer to use. Defaults to 'gpt-4'.
10
+
11
+ Returns:
12
+ int: The number of tokens in the input text.
13
+ """
14
+ try: # get the standard encoding for the specified model
15
+ encoding = tiktoken.encoding_for_model(model_name)
16
+ except KeyError: # if the model is not found, use the default encoding
17
+ encoding = tiktoken.get_encoding("cl100k_base")
18
+ return len(encoding.encode(text))
19
+
20
+ def calculate_savings(original_text: str, compressed_text: str, model_name: str='gpt-4') -> dict:
21
+ """
22
+ Calculate the percentage of tokens saved by compressing the original text.
23
+
24
+ Args:
25
+ original_text (str): The original input text before compression.
26
+ compressed_text (str): The compressed version of the input text.
27
+ model_name (str, optional): The name of the model whose tokenizer to use. Defaults to 'gpt-4'.
28
+
29
+ Returns:
30
+ float: The percentage of tokens saved by compression.
31
+ """
32
+ orig_tokens = count_tokens(original_text, model_name)
33
+ comp_tokens = count_tokens(compressed_text, model_name)
34
+ tokens_saved = orig_tokens - comp_tokens
35
+
36
+ reduction_percentage = (tokens_saved / orig_tokens * 100) if orig_tokens > 0 else 0.0
37
+
38
+ # CRITICAL: Check that this matches exactly and returns a dictionary
39
+ return {
40
+ "original_tokens": orig_tokens,
41
+ "compressed_tokens": comp_tokens,
42
+ "tokens_saved": tokens_saved,
43
+ "reduction_percentage": round(reduction_percentage, 2)
44
+ }
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.4
2
+ Name: compactpy
3
+ Version: 1.0.0
4
+ Summary: An intelligent, hierarchical context compression framework for LLM memory systems.
5
+ Author: Priyankar Majumdar
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
10
+ Requires-Python: >=3.9
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: tiktoken>=0.7.0
13
+ Requires-Dist: sentence-transformers>=3.0.0
14
+ Requires-Dist: networkx>=3.0
15
+ Requires-Dist: numpy>=1.26.4
16
+ Dynamic: requires-python
17
+
18
+ # CompactPy
19
+
20
+ An intelligent, hierarchical context compression framework for LLM memory systems.
@@ -0,0 +1,15 @@
1
+ compactpy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ compactpy/graph_memory.py,sha256=qO8Lk-TnIHscPSgw2j5E7NKg04b80sDnNbOYDZCXaLo,2006
3
+ compactpy/memory.py,sha256=3eWH1tIBK3dac5LdIFWTe6qG32P1M90yDhfuzIJoaRQ,1608
4
+ compactpy/scoring.py,sha256=vuh9JAJ8AhigtKf8nGvPlbizrS6H175Z3oI2WFlx_9I,3148
5
+ compactpy/token_utils.py,sha256=5Wi1eIZI4mpumgwgcKkcZlpMzR0cM9FHBEgDKANBoH0,1824
6
+ compactpy/compressors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ compactpy/compressors/attention.py,sha256=dakzAoycp5V0HBsx2CS25dKN8nHfeV2Z2SbmcH7ST8A,2589
8
+ compactpy/compressors/dedup.py,sha256=3YGFqSBqSoKaRvE_jcI4lQ1HywQPXzQeoM_xVeDf9vs,1023
9
+ compactpy/compressors/semantic.py,sha256=22nIy2nehuAnSGryONkLDKmA156bW63sWm1f8x4zye4,1953
10
+ compactpy/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ compactpy/core/chunk.py,sha256=5JM17WwMoV5G1fJgdyAKv-nBiBHSJKjlKvld2V7u5BQ,921
12
+ compactpy-1.0.0.dist-info/METADATA,sha256=NyrErLYaBcScosPIN4racXHqIE8RSiMHCYYVywTii4Y,727
13
+ compactpy-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
14
+ compactpy-1.0.0.dist-info/top_level.txt,sha256=47wmV49-78qF28J2mZ1V8Yat8BunmU3m2XXrAMHD0Hk,10
15
+ compactpy-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ compactpy