contextflow-ai 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextflow/__init__.py +0 -0
- contextflow/core/__init__.py +0 -0
- contextflow/core/compactor.py +120 -0
- contextflow/core/optimizer.py +39 -0
- contextflow/core/scorer.py +82 -0
- contextflow/core/strategies.py +128 -0
- contextflow/utils/__init__.py +0 -0
- contextflow/utils/cost_calculator.py +0 -0
- contextflow/utils/embeddings.py +0 -0
- contextflow/utils/llm.py +206 -0
- contextflow/utils/tokenizer.py +26 -0
- contextflow_ai-0.1.2.dist-info/METADATA +77 -0
- contextflow_ai-0.1.2.dist-info/RECORD +16 -0
- contextflow_ai-0.1.2.dist-info/WHEEL +5 -0
- contextflow_ai-0.1.2.dist-info/licenses/LICENSE +21 -0
- contextflow_ai-0.1.2.dist-info/top_level.txt +1 -0
contextflow/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Implements message summarization techniques
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Dict
|
|
6
|
+
from utils.llm import LLMClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MessageCompactor:
|
|
10
|
+
def __init__(self):
|
|
11
|
+
self.llm = LLMClient()
|
|
12
|
+
|
|
13
|
+
def summarize(
|
|
14
|
+
self,
|
|
15
|
+
messages_to_summarize: List[Dict[str, str]],
|
|
16
|
+
max_token_count: int = 500,
|
|
17
|
+
) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Summarizes a list of messages.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
messages_to_summarize: The list of messages to compress
|
|
23
|
+
agent_goal: The overall goal to guide the summary
|
|
24
|
+
max_token_count: The target length for the final summary
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
summaries: A single string containing the dense summary.
|
|
28
|
+
"""
|
|
29
|
+
return self._simple_summarize(messages_to_summarize, max_token_count)
|
|
30
|
+
|
|
31
|
+
def _simple_summarize(
|
|
32
|
+
self,
|
|
33
|
+
messages_to_summarize: List[Dict[str, str]],
|
|
34
|
+
max_token_count: int,
|
|
35
|
+
):
|
|
36
|
+
"""
|
|
37
|
+
Summarizes a list of messages using a simple LLM summary.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
messages_to_summarize: The list of messages to compress
|
|
41
|
+
agent_goal: The overall goal to guide the summary
|
|
42
|
+
max_token_count: The target length for the final summary
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
summaries: A single string containing the dense summary.
|
|
46
|
+
"""
|
|
47
|
+
if not messages_to_summarize:
|
|
48
|
+
return ""
|
|
49
|
+
|
|
50
|
+
if len(messages_to_summarize) == 1:
|
|
51
|
+
return messages_to_summarize[0]["content"]
|
|
52
|
+
|
|
53
|
+
conversation_text = self._format_messages(messages_to_summarize)
|
|
54
|
+
|
|
55
|
+
prompt = f"""You are summarizing a conversation to preserve key information while reducing length.
|
|
56
|
+
|
|
57
|
+
Conversation:
|
|
58
|
+
{conversation_text}
|
|
59
|
+
|
|
60
|
+
Instructions:
|
|
61
|
+
- Create a dense, information-rich summary
|
|
62
|
+
- Preserve all critical facts, names, numbers, and decisions
|
|
63
|
+
- Remove pleasantries and redundant information
|
|
64
|
+
- Target length: approximately {max_token_count} tokens
|
|
65
|
+
- Write in third person (e.g., "User reported X. Agent confirmed Y.")
|
|
66
|
+
|
|
67
|
+
Summary:"""
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
summary = self.llm.generate_text(
|
|
71
|
+
prompt,
|
|
72
|
+
max_tokens=max_token_count,
|
|
73
|
+
temperature=0.3, # Low temperature for factual summary
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return summary.strip()
|
|
77
|
+
except Exception as e:
|
|
78
|
+
# Fallback: return a simple concatenation
|
|
79
|
+
print(f"Warning: Summarization failed ({e}). Using fallback.")
|
|
80
|
+
return self._fallback_summary(messages_to_summarize)
|
|
81
|
+
|
|
82
|
+
def _format_messages(self, messages: List[Dict[str, str]]) -> str:
|
|
83
|
+
"""Format messages into a readable conversation"""
|
|
84
|
+
|
|
85
|
+
formatted = []
|
|
86
|
+
for msg in messages:
|
|
87
|
+
role = msg.get("role", "unknown")
|
|
88
|
+
content = msg.get("content", "")
|
|
89
|
+
formatted.append(f"{role.capitalize()}: {content}")
|
|
90
|
+
|
|
91
|
+
return "\n".join(formatted)
|
|
92
|
+
|
|
93
|
+
def _fallback_summary(self, messages: List[Dict[str, str]]) -> str:
|
|
94
|
+
"""Simple fallback if LLM summarization fails"""
|
|
95
|
+
|
|
96
|
+
# Just concatenate the messages with "..." between them
|
|
97
|
+
contents = [msg.get("content", "") for msg in messages]
|
|
98
|
+
return " ... ".join(contents)
|
|
99
|
+
|
|
100
|
+
def _hierarchical_summarize(
|
|
101
|
+
self,
|
|
102
|
+
messages_to_summarize: List[Dict[str, str]],
|
|
103
|
+
agent_goal: str,
|
|
104
|
+
max_token_count: int,
|
|
105
|
+
):
|
|
106
|
+
"""
|
|
107
|
+
Summarizes a list of messages using the map-reduce method.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
messages_to_summarize: The list of messages to compress
|
|
111
|
+
agent_goal: The overall goal to guide the summary
|
|
112
|
+
max_summary_tokens: The target length for the final summary
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
summaries: A single string containing the dense summary.
|
|
116
|
+
"""
|
|
117
|
+
raise NotImplementedError(
|
|
118
|
+
"Hierarchical summarization not yet implemented. "
|
|
119
|
+
"Use simple_summarize for now."
|
|
120
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from core.compactor import MessageCompactor
|
|
2
|
+
from core.scorer import MessageScorer
|
|
3
|
+
from typing import List, Dict, Union, Literal
|
|
4
|
+
from core.strategies import balanced_strategy
|
|
5
|
+
from utils.tokenizer import count_tokens
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ContextFlow:
|
|
9
|
+
def __init__(self):
|
|
10
|
+
self.message_compactor = MessageCompactor()
|
|
11
|
+
self.message_scorer = MessageScorer()
|
|
12
|
+
|
|
13
|
+
def optimize(
|
|
14
|
+
self,
|
|
15
|
+
messages: List[Dict[str, str]],
|
|
16
|
+
agent_goal: str,
|
|
17
|
+
max_token_count: int = 500,
|
|
18
|
+
):
|
|
19
|
+
scores = self.message_scorer.score_messages(
|
|
20
|
+
messages=messages, agent_goal=agent_goal
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
optimized = None
|
|
24
|
+
optimized = balanced_strategy(
|
|
25
|
+
messages, scores, max_token_count, self.message_compactor
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
tokens_before = count_tokens(messages)
|
|
29
|
+
tokens_after = count_tokens(optimized)
|
|
30
|
+
reduction_pct = ((tokens_before - tokens_after) / tokens_before) * 100
|
|
31
|
+
|
|
32
|
+
return {
|
|
33
|
+
"messages": optimized,
|
|
34
|
+
"analytics": {
|
|
35
|
+
"tokens_after": tokens_after,
|
|
36
|
+
"reduction_pct": reduction_pct,
|
|
37
|
+
"tokens_saved": tokens_before - tokens_after,
|
|
38
|
+
},
|
|
39
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Message relevance and utility scoring
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Dict
|
|
6
|
+
from utils.llm import LLMClient
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MessageScorer:
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self._embedding_cache = {}
|
|
13
|
+
self._llm_client = LLMClient()
|
|
14
|
+
|
|
15
|
+
def _encode_text(self, text: str) -> np.ndarray:
|
|
16
|
+
"""Convert text to a vector embedding
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
text (str): Text to encode
|
|
20
|
+
Returns:
|
|
21
|
+
A 384-dimensional numpy array representing the text encoding
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
if text in self._embedding_cache:
|
|
25
|
+
return self._embedding_cache[text]
|
|
26
|
+
|
|
27
|
+
embedding = self.embedding_model.encode(
|
|
28
|
+
text,
|
|
29
|
+
convert_to_numpy=True,
|
|
30
|
+
show_progress_bar=False,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
self._embedding_cache[text] = embedding
|
|
34
|
+
|
|
35
|
+
return embedding
|
|
36
|
+
|
|
37
|
+
def _create_batches(self, messages: List[Dict[str, str]]):
|
|
38
|
+
if len(messages) < 20:
|
|
39
|
+
return [messages]
|
|
40
|
+
|
|
41
|
+
return [messages[i : i + 20] for i in range(0, len(messages), 20)]
|
|
42
|
+
|
|
43
|
+
def score_messages(
|
|
44
|
+
self, messages: List[Dict[str, str]], agent_goal: str
|
|
45
|
+
) -> List[float]:
|
|
46
|
+
"""Scores messages based on how relevant they are to the agent's goal.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
messages: A list of messages
|
|
50
|
+
agent_goal: The goal of the agent
|
|
51
|
+
Returns:
|
|
52
|
+
A list scores such that scores[i] is the relevancy score of messages[i]
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
scores = []
|
|
56
|
+
|
|
57
|
+
batches = self._create_batches(messages)
|
|
58
|
+
|
|
59
|
+
for i, batch in enumerate(batches):
|
|
60
|
+
batch_scores = self._llm_client.batch_score_utility(
|
|
61
|
+
batch, agent_goal
|
|
62
|
+
)
|
|
63
|
+
scores += batch_scores
|
|
64
|
+
|
|
65
|
+
for i in range(len(scores) - 1, max(-1, len(scores) - 6), -1):
|
|
66
|
+
recency_bonus = 1.0
|
|
67
|
+
scores[i] += recency_bonus
|
|
68
|
+
|
|
69
|
+
return scores
|
|
70
|
+
|
|
71
|
+
def _calculate_utility(self, text: str, goal: str) -> float:
|
|
72
|
+
"""Returns a utility score (0-10 scale)
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
text: String to evaluate the utility of
|
|
76
|
+
Returns:
|
|
77
|
+
Utility score of `text` between 0 and 10
|
|
78
|
+
"""
|
|
79
|
+
text = text.strip()
|
|
80
|
+
text_lower = text.lower()
|
|
81
|
+
|
|
82
|
+
return self._llm_client.score_utility(text_lower, goal)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from typing import List, Union, Literal
|
|
2
|
+
from utils.tokenizer import count_tokens # You'll need this
|
|
3
|
+
from core.compactor import MessageCompactor
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_strategy(
|
|
7
|
+
strategy_name: Union[
|
|
8
|
+
Literal["conservative"], Literal["balanced"], Literal["aggressive"]
|
|
9
|
+
],
|
|
10
|
+
):
|
|
11
|
+
"""Factory to get strategy function"""
|
|
12
|
+
strategies = {
|
|
13
|
+
"conservative": conservative_strategy,
|
|
14
|
+
"balanced": balanced_strategy,
|
|
15
|
+
"aggressive": aggressive_strategy,
|
|
16
|
+
}
|
|
17
|
+
return strategies.get(strategy_name, balanced_strategy)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def conservative_strategy():
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def balanced_strategy(
|
|
25
|
+
messages: List[str],
|
|
26
|
+
scores: List[float],
|
|
27
|
+
max_token_count: int,
|
|
28
|
+
compactor: MessageCompactor,
|
|
29
|
+
):
|
|
30
|
+
"""Optimizes a conversation (i.e. a list of messages) using a balanced strategy (keep high-scoring, summarize mid, drop low)
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
messages: List of messages
|
|
34
|
+
scores: List of scores for each message
|
|
35
|
+
max_token_count: Maximum number of tokens allowed
|
|
36
|
+
compactor: Tool for summarizing messages
|
|
37
|
+
Returns:
|
|
38
|
+
Optimized list of messages that is less than max_token_count
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
preserve_recent = 5
|
|
42
|
+
|
|
43
|
+
recent_scores = scores[-10:] if len(scores) >= 10 else scores
|
|
44
|
+
|
|
45
|
+
# If the recent messages are high-utility, keep more
|
|
46
|
+
avg_recent_score = sum(recent_scores) / len(recent_scores)
|
|
47
|
+
|
|
48
|
+
if avg_recent_score >= 7:
|
|
49
|
+
preserve_recent = 5 # Keep 5 if they're useful
|
|
50
|
+
elif avg_recent_score >= 4:
|
|
51
|
+
preserve_recent = 3 # Keep 3 if they're medium
|
|
52
|
+
else:
|
|
53
|
+
preserve_recent = 2 # Keep only 2 if they're low-utility pleasantries
|
|
54
|
+
|
|
55
|
+
print(f"preserving: {preserve_recent}")
|
|
56
|
+
|
|
57
|
+
recent_messages = messages[-preserve_recent:]
|
|
58
|
+
older_messages = messages[:-preserve_recent]
|
|
59
|
+
older_scores = scores[:-preserve_recent]
|
|
60
|
+
|
|
61
|
+
sorted_pairs = sorted(
|
|
62
|
+
zip(older_messages, older_scores),
|
|
63
|
+
key=lambda x: x[1], # Sort by the score (second element of tuple)
|
|
64
|
+
reverse=True, # Highest scores first
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
optimized = recent_messages.copy()
|
|
68
|
+
current_tokens = count_tokens(optimized)
|
|
69
|
+
|
|
70
|
+
# FIX: Check if we're already over budget with just recent messages
|
|
71
|
+
if current_tokens >= max_token_count:
|
|
72
|
+
# Emergency: Even recent messages exceed budget
|
|
73
|
+
# Keep reducing until we fit
|
|
74
|
+
while current_tokens > max_token_count and len(optimized) > 1:
|
|
75
|
+
optimized.pop(0) # Remove oldest of the recent messages
|
|
76
|
+
current_tokens = count_tokens(optimized)
|
|
77
|
+
return optimized
|
|
78
|
+
|
|
79
|
+
# Categorize older messages into buckets
|
|
80
|
+
keep_bucket = []
|
|
81
|
+
summarize_bucket = []
|
|
82
|
+
|
|
83
|
+
for message, score in sorted_pairs:
|
|
84
|
+
if score > 7.0:
|
|
85
|
+
keep_bucket.append(message)
|
|
86
|
+
elif score > 4.0:
|
|
87
|
+
summarize_bucket.append(message)
|
|
88
|
+
# score <= 4.0: drop entirely
|
|
89
|
+
|
|
90
|
+
# Try to add high-scoring messages one by one
|
|
91
|
+
for message in keep_bucket:
|
|
92
|
+
message_tokens = count_tokens([message])
|
|
93
|
+
if current_tokens + message_tokens <= max_token_count:
|
|
94
|
+
optimized.insert(0, message) # Add before recent messages
|
|
95
|
+
current_tokens += message_tokens
|
|
96
|
+
else:
|
|
97
|
+
# print(f"adding {message} to summary...")
|
|
98
|
+
# Can't fit this message, add to summarize bucket instead
|
|
99
|
+
summarize_bucket.append(message)
|
|
100
|
+
|
|
101
|
+
# print(summarize_bucket)
|
|
102
|
+
if summarize_bucket:
|
|
103
|
+
summary_tokens = count_tokens(summarize_bucket)
|
|
104
|
+
print(f"Tokens needed to summarize: {summary_tokens}")
|
|
105
|
+
summary = compactor.summarize(summarize_bucket, summary_tokens * 0.2)
|
|
106
|
+
summary_message = {
|
|
107
|
+
"role": "system",
|
|
108
|
+
"content": f"Summary of earlier context: {summary}",
|
|
109
|
+
}
|
|
110
|
+
print(f"Tokens summary: {count_tokens([summary_message])}")
|
|
111
|
+
|
|
112
|
+
# print(summary_message)
|
|
113
|
+
|
|
114
|
+
summary_tokens = count_tokens([summary_message])
|
|
115
|
+
if current_tokens + summary_tokens <= max_token_count:
|
|
116
|
+
optimized.insert(0, summary_message)
|
|
117
|
+
current_tokens += summary_tokens
|
|
118
|
+
else:
|
|
119
|
+
print(
|
|
120
|
+
f"Summary tokens exceeds {max_token_count}. Dropping summary."
|
|
121
|
+
)
|
|
122
|
+
# If summary doesn't fit, skip it (rare but possible)
|
|
123
|
+
|
|
124
|
+
return optimized
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def aggressive_strategy():
|
|
128
|
+
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
contextflow/utils/llm.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Local LLM client using Ollama
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from google import genai
|
|
6
|
+
from google.genai import types
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
from typing import List, Dict
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LLMClient:
|
|
13
|
+
"""Lightweight LLM utility"""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self.client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
|
|
17
|
+
self.model_name = "gemini-2.5-flash-lite"
|
|
18
|
+
|
|
19
|
+
def generate_text(
|
|
20
|
+
self, prompt: str, max_tokens: int = 500, temperature: float = 0.3
|
|
21
|
+
) -> str:
|
|
22
|
+
"""
|
|
23
|
+
Generate text from a prompt (for summarization)
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
prompt: The prompt to send
|
|
27
|
+
max_tokens: Maximum tokens in response
|
|
28
|
+
temperature: Randomness (0=deterministic, 1=creative)
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Generated text
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
response = self.client.models.generate_content(
|
|
36
|
+
model=self.model_name,
|
|
37
|
+
contents=prompt,
|
|
38
|
+
config=types.GenerateContentConfig(
|
|
39
|
+
temperature=0,
|
|
40
|
+
),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
return response.text
|
|
44
|
+
|
|
45
|
+
except Exception as e:
|
|
46
|
+
raise Exception(f"LLM generation failed: {e}")
|
|
47
|
+
|
|
48
|
+
def score_utility(self, text: str, goal: str) -> float:
|
|
49
|
+
"""
|
|
50
|
+
Score message utility using an LLM
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
text: Message content
|
|
54
|
+
goal: Agent goal for context
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Utility score 0-10
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
prompt = f"""Rate message relevance to goal (0-10 scale):
|
|
61
|
+
|
|
62
|
+
Goal: {goal}
|
|
63
|
+
|
|
64
|
+
Scoring guide:
|
|
65
|
+
• 8-10: Specific facts, numbers, IDs, errors ("Error: timeout line 42" = 10)
|
|
66
|
+
• 4-7: Questions, partial info ("Can you check status?" = 6)
|
|
67
|
+
• 0-3: Acknowledgments, greetings ("Thanks!" = 1)
|
|
68
|
+
|
|
69
|
+
Message: "{text}"
|
|
70
|
+
|
|
71
|
+
Score (number only):"""
|
|
72
|
+
|
|
73
|
+
response = self.client.models.generate_content(
|
|
74
|
+
model=self.model_name,
|
|
75
|
+
contents=prompt,
|
|
76
|
+
config=types.GenerateContentConfig(
|
|
77
|
+
temperature=0,
|
|
78
|
+
response_mime_type="application/json",
|
|
79
|
+
response_schema={
|
|
80
|
+
"type": "object",
|
|
81
|
+
"properties": {
|
|
82
|
+
"score": {
|
|
83
|
+
"type": "number",
|
|
84
|
+
"description": "Rating from 0-10",
|
|
85
|
+
"minimum": 0,
|
|
86
|
+
"maximum": 10,
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
"required": ["score"],
|
|
90
|
+
},
|
|
91
|
+
),
|
|
92
|
+
)
|
|
93
|
+
result = json.loads(response.text)
|
|
94
|
+
score = result["score"]
|
|
95
|
+
|
|
96
|
+
return score
|
|
97
|
+
|
|
98
|
+
def batch_score_utility(
|
|
99
|
+
self, batch: List[Dict[str, str]], goal: str
|
|
100
|
+
) -> List[float]:
|
|
101
|
+
"""
|
|
102
|
+
Score message utility using an LLM
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
text: Message content
|
|
106
|
+
goal: Agent goal for context
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Utility score 0-10
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
formatted_messages = ""
|
|
113
|
+
for i, msg in enumerate(batch, 1):
|
|
114
|
+
role = msg.get("role", "unknown").capitalize()
|
|
115
|
+
content = msg.get("content", "")
|
|
116
|
+
# Truncate long messages to save prompt tokens?
|
|
117
|
+
# formatted_messages += f"{i}. [{role}] {content[:200]}\n"
|
|
118
|
+
formatted_messages += f"{i}. [{role}] {content}\n"
|
|
119
|
+
|
|
120
|
+
prompt = f"""Rate message relevance to goal (0-10 scale):
|
|
121
|
+
|
|
122
|
+
Goal: {goal}
|
|
123
|
+
|
|
124
|
+
Scoring guide:
|
|
125
|
+
• 8-10: Specific facts, numbers, IDs, errors ("Error: timeout line 42" = 10)
|
|
126
|
+
• 4-7: Questions, partial info ("Can you check status?" = 6)
|
|
127
|
+
• 0-3: Acknowledgments, greetings ("Thanks!" = 1)
|
|
128
|
+
|
|
129
|
+
MESSAGES TO RATE:
|
|
130
|
+
{formatted_messages}
|
|
131
|
+
|
|
132
|
+
Return a JSON array with one score per message in order
|
|
133
|
+
"""
|
|
134
|
+
response_schema = {
|
|
135
|
+
"type": "array",
|
|
136
|
+
"items": {
|
|
137
|
+
"type": "object",
|
|
138
|
+
"properties": {
|
|
139
|
+
"message_index": {
|
|
140
|
+
"type": "integer",
|
|
141
|
+
"description": "The message number (1-indexed)",
|
|
142
|
+
},
|
|
143
|
+
"score": {
|
|
144
|
+
"type": "number",
|
|
145
|
+
"description": "Utility score from 0-10",
|
|
146
|
+
"minimum": 0,
|
|
147
|
+
"maximum": 10,
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
"required": ["message_index", "score"],
|
|
151
|
+
},
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
response = self.client.models.generate_content(
|
|
156
|
+
model=self.model_name,
|
|
157
|
+
contents=prompt,
|
|
158
|
+
config=types.GenerateContentConfig(
|
|
159
|
+
temperature=0,
|
|
160
|
+
response_mime_type="application/json",
|
|
161
|
+
response_schema=response_schema,
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
scores_data = json.loads(response.text)
|
|
166
|
+
|
|
167
|
+
scores = self._extract_scores_from_json(scores_data, len(batch))
|
|
168
|
+
|
|
169
|
+
return scores
|
|
170
|
+
|
|
171
|
+
except Exception as e:
|
|
172
|
+
print(
|
|
173
|
+
f"Warning: Batch scoring failed ({e}). Using fallback scores."
|
|
174
|
+
)
|
|
175
|
+
return [5.0] * len(batch)
|
|
176
|
+
|
|
177
|
+
def _extract_scores_from_json(
|
|
178
|
+
self, scores_data: List[Dict], expected_count: int
|
|
179
|
+
) -> List[float]:
|
|
180
|
+
"""
|
|
181
|
+
Extracts scores from the JSON array response.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
scores_data: List of {"message_index": 1, "score": 8.5} objects
|
|
185
|
+
expected_count: Number of messages we scored
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
List of scores in the correct order
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
# Create a dict for quick lookup
|
|
192
|
+
score_dict = {}
|
|
193
|
+
for item in scores_data:
|
|
194
|
+
idx = item.get("message_index")
|
|
195
|
+
score = item.get("score", 5.0)
|
|
196
|
+
if idx is not None:
|
|
197
|
+
score_dict[idx] = max(0.0, min(10.0, float(score)))
|
|
198
|
+
|
|
199
|
+
# Build the final list in order (1-indexed to 0-indexed)
|
|
200
|
+
final_scores = []
|
|
201
|
+
for i in range(1, expected_count + 1):
|
|
202
|
+
final_scores.append(
|
|
203
|
+
score_dict.get(i, 5.0)
|
|
204
|
+
) # Default to 5.0 if missing
|
|
205
|
+
|
|
206
|
+
return final_scores
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Token counting utilities
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import List, Dict
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def count_tokens(messages: List[Dict[str, str]]) -> int:
|
|
9
|
+
"""
|
|
10
|
+
Count tokens in a list of messages
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
messages: List of message dicts [{"role": "user", "content": "..."}]
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Total token count
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# Hueristic. Not exact
|
|
20
|
+
|
|
21
|
+
total = 0
|
|
22
|
+
|
|
23
|
+
for message in messages:
|
|
24
|
+
total += len(message["content"])
|
|
25
|
+
|
|
26
|
+
return total // 4
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: contextflow-ai
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Context optimization for AI agents
|
|
5
|
+
Author-email: Ayush Thapa <ayushthapa026@gmail.com>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: google-genai==1.49.0
|
|
10
|
+
Requires-Dist: click==8.1.7
|
|
11
|
+
Requires-Dist: rich==13.9.4
|
|
12
|
+
Requires-Dist: python-dotenv==1.0.1
|
|
13
|
+
Requires-Dist: numpy==2.3.4
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
<p align="center">
|
|
17
|
+
<p align="center">
|
|
18
|
+
<img width="128" height="128" src="https://github.com/athaapa/contextflow/blob/74d4dffdb3c0c1777ccd3593b7d52f89f9406afc/images/logo.png" alt="Logo">
|
|
19
|
+
</p>
|
|
20
|
+
|
|
21
|
+
<h1 align="center"><b>ContextFlow</b></h1>
|
|
22
|
+
<p align="center">
|
|
23
|
+
Context optimization for AI agents.
|
|
24
|
+
<br />
|
|
25
|
+
</p>
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
**ContextFlow** is a fast, open-source Python library that compresses chat, agent, or RAG context before you send it to an expensive LLM. It finds the “needle in the haystack,” scoring every message by utility, keeping only critical facts, summarizing the rest, and dropping noise—preserving quality while saving money.
|
|
29
|
+
|
|
30
|
+
# Why ContextFlow?
|
|
31
|
+
Most LLM applications waste money by sending entire chat histories, long chains of document chunks, or endless boilerplate every API call. Most of it is redundant, repetitive, or irrelevant to the user’s actual goal.
|
|
32
|
+
|
|
33
|
+
ContextFlow solves this by:
|
|
34
|
+
- Ranking every message by utility using a fast LLM batch
|
|
35
|
+
- Keeping only high-signal content (order numbers, errors, decisions)
|
|
36
|
+
- Aggressively summarizing medium-utility content
|
|
37
|
+
- Dropping low-value fluff and polite filler
|
|
38
|
+
- Always preserving the last few recent messages for context
|
|
39
|
+
|
|
40
|
+
# Quickstart
|
|
41
|
+
## Installation
|
|
42
|
+
```bash
|
|
43
|
+
pip install contextflow
|
|
44
|
+
```
|
|
45
|
+
## Setup
|
|
46
|
+
Get your Gemini API Key (for message scoring and summarization):
|
|
47
|
+
```.env
|
|
48
|
+
export GEMINI_API_KEY="YOUR_KEY_HERE"
|
|
49
|
+
```
|
|
50
|
+
If you don't have one, you can get one for free [here](https://aistudio.google.com/api-keys).
|
|
51
|
+
## Example
|
|
52
|
+
```python
|
|
53
|
+
from contextflow import ContextFlow
|
|
54
|
+
|
|
55
|
+
# Chat history (list of {"role": str, "content": str})
|
|
56
|
+
messages = [
|
|
57
|
+
# ... up to 50 messages ...
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
cf = ContextFlow()
|
|
61
|
+
|
|
62
|
+
result = cf.optimize(
|
|
63
|
+
messages=messages,
|
|
64
|
+
agent_goal="Resolve customer shipping inquiry", # this guides scoring
|
|
65
|
+
max_tokens=400 # your target for output
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
print("Tokens before:", result["analytics"]["tokens_before"])
|
|
69
|
+
print("Tokens after:", result["analytics"]["tokens_after"])
|
|
70
|
+
print("Reduction:", result["analytics"]["reduction_pct"], "%")
|
|
71
|
+
print("Messages ready for LLM:", result["messages"])
|
|
72
|
+
|
|
73
|
+
# Now use result.messages in your LLM query!
|
|
74
|
+
# response = openai.ChatCompletion.create(model="gpt-4", messages=result.messages)
|
|
75
|
+
```
|
|
76
|
+
# License
|
|
77
|
+
MIT License - see [LICENSE](https://github.com/athaapa/contextflow/blob/6b79c95d55c53c40bd8f500a6c921ea394c32666/LICENSE) file for details.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
contextflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
contextflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
contextflow/core/compactor.py,sha256=Sv_KflgD67Raujb2eIu9grv-irT4MCOYhNkhr-rWjD0,3911
|
|
4
|
+
contextflow/core/optimizer.py,sha256=falRan-SLVZ-8a5i5V7R0ZSJEW16bmrNdMTV3RA9QbY,1195
|
|
5
|
+
contextflow/core/scorer.py,sha256=CcSqasPa3-yPKJmIJUXwDCnuvaOh9COMRkcZ2FEXgas,2244
|
|
6
|
+
contextflow/core/strategies.py,sha256=9d3j8sy7IEsmG7Hk45tiOY4axiSIjjAey9Q1I1jHMGM,4260
|
|
7
|
+
contextflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
contextflow/utils/cost_calculator.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
contextflow/utils/embeddings.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
contextflow/utils/llm.py,sha256=ddVs9rwl-eJCmx9yUcR88KWpdv4-aESuIACO5l4PvA0,6138
|
|
11
|
+
contextflow/utils/tokenizer.py,sha256=C3Bwo2w8aYMDts7tsBFlJCX8U8yhU5SKxWNOiiUqr8A,443
|
|
12
|
+
contextflow_ai-0.1.2.dist-info/licenses/LICENSE,sha256=avSXTUv_48kpuVUFhhSvvklB-U4R5Pb9VcK-fuCP7zQ,1068
|
|
13
|
+
contextflow_ai-0.1.2.dist-info/METADATA,sha256=X34X-nVW44k1-lLvHOS5U6L8QLkKCl_nysy621Wv1J4,2781
|
|
14
|
+
contextflow_ai-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
contextflow_ai-0.1.2.dist-info/top_level.txt,sha256=zgLkleoXiZGCvGnFTWoxj7S_yLu8z_DC2qh_MstAkBs,12
|
|
16
|
+
contextflow_ai-0.1.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Ayush Thapa
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
contextflow
|