ctxintel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctxintel/__init__.py +16 -0
- ctxintel/compressor.py +122 -0
- ctxintel/data/patterns.yaml +183 -0
- ctxintel/extractor.py +411 -0
- ctxintel/memory.py +205 -0
- ctxintel/models.py +65 -0
- ctxintel/optimizer.py +123 -0
- ctxintel/pipeline.py +159 -0
- ctxintel/presets.py +101 -0
- ctxintel/ranker.py +169 -0
- ctxintel-0.1.0.dist-info/METADATA +210 -0
- ctxintel-0.1.0.dist-info/RECORD +15 -0
- ctxintel-0.1.0.dist-info/WHEEL +5 -0
- ctxintel-0.1.0.dist-info/licenses/LICENSE +21 -0
- ctxintel-0.1.0.dist-info/top_level.txt +1 -0
ctxintel/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""ctxintel — Context Operating System for LLM Applications."""
|
|
2
|
+
|
|
3
|
+
from ctxintel.pipeline import ContextIntel
|
|
4
|
+
from ctxintel.models import Message, Memory, ContextResult
|
|
5
|
+
from ctxintel.presets import PRESETS, load_preset, list_presets
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ContextIntel",
|
|
10
|
+
"Message",
|
|
11
|
+
"Memory",
|
|
12
|
+
"ContextResult",
|
|
13
|
+
"PRESETS",
|
|
14
|
+
"load_preset",
|
|
15
|
+
"list_presets",
|
|
16
|
+
]
|
ctxintel/compressor.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Message compressor using extractive summarization (sumy LSA) with fallback."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
from ctxintel.models import Message
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from sumy.parsers.plaintext import PlaintextParser
|
|
10
|
+
from sumy.nlp.tokenizers import Tokenizer
|
|
11
|
+
from sumy.summarizers.lsa import LsaSummarizer
|
|
12
|
+
from sumy.nlp.stemmers import Stemmer
|
|
13
|
+
_HAS_SUMY = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
_HAS_SUMY = False
|
|
16
|
+
warnings.warn(
|
|
17
|
+
"sumy not found. Compression will use a simpler sentence-based fallback. "
|
|
18
|
+
"Install with: pip install sumy",
|
|
19
|
+
stacklevel=2,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Compressor:
|
|
24
|
+
"""Compresses low-importance messages into a summary while preserving critical ones.
|
|
25
|
+
|
|
26
|
+
Uses sumy's LSA summarizer for extractive summarization (zero AI API calls).
|
|
27
|
+
Falls back to a simple first-N-sentences approach if sumy is unavailable.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def compress(
|
|
31
|
+
self,
|
|
32
|
+
messages: List[Message],
|
|
33
|
+
preserve: Optional[List[str]] = None,
|
|
34
|
+
threshold: float = 0.4,
|
|
35
|
+
) -> List[Message]:
|
|
36
|
+
"""Compress messages below the importance threshold into a summary.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
messages: Ranked list of Message objects.
|
|
40
|
+
preserve: Category names to preserve (unused — preserved flag is on Message).
|
|
41
|
+
threshold: Messages below this importance score get compressed.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
List of messages with low-importance ones replaced by a summary.
|
|
45
|
+
"""
|
|
46
|
+
keep_msgs = [m for m in messages if m.importance >= threshold or m.preserved]
|
|
47
|
+
compress_msgs = [m for m in messages if m.importance < threshold and not m.preserved]
|
|
48
|
+
|
|
49
|
+
if not compress_msgs:
|
|
50
|
+
return keep_msgs
|
|
51
|
+
|
|
52
|
+
combined_text = " ".join(m.content for m in compress_msgs)
|
|
53
|
+
|
|
54
|
+
# If the compressed content is very short, just drop it —
|
|
55
|
+
# a summary message would be larger than the original filler
|
|
56
|
+
if len(combined_text) < 50:
|
|
57
|
+
return keep_msgs
|
|
58
|
+
|
|
59
|
+
summary = self._summarize(combined_text, len(compress_msgs))
|
|
60
|
+
|
|
61
|
+
# Only inject summary if it's actually shorter than the combined original
|
|
62
|
+
if len(summary) >= len(combined_text):
|
|
63
|
+
return keep_msgs
|
|
64
|
+
|
|
65
|
+
summary_msg = Message(
|
|
66
|
+
role="system",
|
|
67
|
+
content=f"[Earlier context summary]: {summary}",
|
|
68
|
+
importance=0.85,
|
|
69
|
+
preserved=True,
|
|
70
|
+
)
|
|
71
|
+
return [summary_msg] + keep_msgs
|
|
72
|
+
|
|
73
|
+
def estimate_reduction(self, messages: List[Message], threshold: float = 0.4) -> Dict:
|
|
74
|
+
"""Preview compression stats without actually compressing.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
messages: Ranked list of Message objects.
|
|
78
|
+
threshold: Importance threshold for compression.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Dict with total_messages, will_keep, will_compress, estimated_reduction_pct.
|
|
82
|
+
"""
|
|
83
|
+
keep = sum(1 for m in messages if m.importance >= threshold or m.preserved)
|
|
84
|
+
compress = sum(1 for m in messages if m.importance < threshold and not m.preserved)
|
|
85
|
+
total = len(messages)
|
|
86
|
+
return {
|
|
87
|
+
"total_messages": total,
|
|
88
|
+
"will_keep": keep,
|
|
89
|
+
"will_compress": compress,
|
|
90
|
+
"estimated_reduction_pct": round(compress / max(total, 1) * 100, 1),
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
def _summarize(self, text: str, msg_count: int) -> str:
|
|
94
|
+
"""Summarize text using sumy LSA or a sentence-based fallback."""
|
|
95
|
+
if _HAS_SUMY:
|
|
96
|
+
return self._summarize_lsa(text, msg_count)
|
|
97
|
+
return self._summarize_fallback(text)
|
|
98
|
+
|
|
99
|
+
def _summarize_lsa(self, text: str, msg_count: int) -> str:
|
|
100
|
+
"""Use sumy's LSA summarizer for extractive summarization."""
|
|
101
|
+
try:
|
|
102
|
+
sentence_count = max(3, min(7, msg_count // 3))
|
|
103
|
+
parser = PlaintextParser.from_string(text, Tokenizer("english"))
|
|
104
|
+
stemmer = Stemmer("english")
|
|
105
|
+
summarizer = LsaSummarizer(stemmer)
|
|
106
|
+
sentences = summarizer(parser.document, sentence_count)
|
|
107
|
+
summary = " ".join(str(s) for s in sentences)
|
|
108
|
+
if not summary.strip():
|
|
109
|
+
return self._summarize_fallback(text)
|
|
110
|
+
return summary
|
|
111
|
+
except Exception as exc:
|
|
112
|
+
warnings.warn(f"sumy LSA failed ({exc}). Using fallback.", stacklevel=2)
|
|
113
|
+
return self._summarize_fallback(text)
|
|
114
|
+
|
|
115
|
+
def _summarize_fallback(self, text: str) -> str:
|
|
116
|
+
"""Simple fallback: take the first 5 sentences."""
|
|
117
|
+
sentences = text.split(". ")
|
|
118
|
+
selected = sentences[:5]
|
|
119
|
+
result = ". ".join(selected)
|
|
120
|
+
if not result.endswith("."):
|
|
121
|
+
result += "."
|
|
122
|
+
return result
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
categories:
|
|
2
|
+
|
|
3
|
+
user_name:
|
|
4
|
+
patterns:
|
|
5
|
+
- "my name is {X}"
|
|
6
|
+
- "i am {X}"
|
|
7
|
+
- "call me {X}"
|
|
8
|
+
- "i go by {X}"
|
|
9
|
+
priority: high
|
|
10
|
+
|
|
11
|
+
user_preference:
|
|
12
|
+
patterns:
|
|
13
|
+
- "i prefer {X}"
|
|
14
|
+
- "i like {X}"
|
|
15
|
+
- "i love {X}"
|
|
16
|
+
- "i always use {X}"
|
|
17
|
+
- "my favorite {X}"
|
|
18
|
+
- "my favourite {X}"
|
|
19
|
+
- "i enjoy {X}"
|
|
20
|
+
- "i work best with {X}"
|
|
21
|
+
priority: high
|
|
22
|
+
|
|
23
|
+
task:
|
|
24
|
+
patterns:
|
|
25
|
+
- "build {X}"
|
|
26
|
+
- "create {X}"
|
|
27
|
+
- "implement {X}"
|
|
28
|
+
- "i need {X}"
|
|
29
|
+
- "i need to {X}"
|
|
30
|
+
- "i want to {X}"
|
|
31
|
+
- "we need to {X}"
|
|
32
|
+
- "fix {X}"
|
|
33
|
+
- "add {X}"
|
|
34
|
+
- "make {X}"
|
|
35
|
+
- "develop {X}"
|
|
36
|
+
- "write {X}"
|
|
37
|
+
- "design {X}"
|
|
38
|
+
priority: high
|
|
39
|
+
|
|
40
|
+
decision:
|
|
41
|
+
patterns:
|
|
42
|
+
- "decided to {X}"
|
|
43
|
+
- "we decided {X}"
|
|
44
|
+
- "going with {X}"
|
|
45
|
+
- "we will use {X}"
|
|
46
|
+
- "confirmed {X}"
|
|
47
|
+
- "agreed to {X}"
|
|
48
|
+
- "we are using {X}"
|
|
49
|
+
- "we chose {X}"
|
|
50
|
+
- "settled on {X}"
|
|
51
|
+
priority: high
|
|
52
|
+
|
|
53
|
+
constraint:
|
|
54
|
+
patterns:
|
|
55
|
+
- "don't {X}"
|
|
56
|
+
- "do not {X}"
|
|
57
|
+
- "never {X}"
|
|
58
|
+
- "avoid {X}"
|
|
59
|
+
- "must not {X}"
|
|
60
|
+
- "should not {X}"
|
|
61
|
+
- "no {X}"
|
|
62
|
+
- "without {X}"
|
|
63
|
+
- "exclude {X}"
|
|
64
|
+
priority: high
|
|
65
|
+
|
|
66
|
+
coding_language:
|
|
67
|
+
patterns:
|
|
68
|
+
- "using {X}"
|
|
69
|
+
- "written in {X}"
|
|
70
|
+
- "in {X}"
|
|
71
|
+
- "with {X}"
|
|
72
|
+
keywords:
|
|
73
|
+
- python
|
|
74
|
+
- javascript
|
|
75
|
+
- typescript
|
|
76
|
+
- go
|
|
77
|
+
- golang
|
|
78
|
+
- rust
|
|
79
|
+
- java
|
|
80
|
+
- ruby
|
|
81
|
+
- php
|
|
82
|
+
- swift
|
|
83
|
+
- kotlin
|
|
84
|
+
- c++
|
|
85
|
+
- "c#"
|
|
86
|
+
- scala
|
|
87
|
+
priority: high
|
|
88
|
+
|
|
89
|
+
framework:
|
|
90
|
+
patterns:
|
|
91
|
+
- "using {X}"
|
|
92
|
+
- "with {X}"
|
|
93
|
+
- "built on {X}"
|
|
94
|
+
keywords:
|
|
95
|
+
- flask
|
|
96
|
+
- django
|
|
97
|
+
- fastapi
|
|
98
|
+
- react
|
|
99
|
+
- vue
|
|
100
|
+
- angular
|
|
101
|
+
- nextjs
|
|
102
|
+
- express
|
|
103
|
+
- spring
|
|
104
|
+
- rails
|
|
105
|
+
- laravel
|
|
106
|
+
- svelte
|
|
107
|
+
- nuxt
|
|
108
|
+
priority: medium
|
|
109
|
+
|
|
110
|
+
cloud_provider:
|
|
111
|
+
patterns:
|
|
112
|
+
- "on {X}"
|
|
113
|
+
- "deploy to {X}"
|
|
114
|
+
- "hosted on {X}"
|
|
115
|
+
- "running on {X}"
|
|
116
|
+
keywords:
|
|
117
|
+
- aws
|
|
118
|
+
- gcp
|
|
119
|
+
- azure
|
|
120
|
+
- vercel
|
|
121
|
+
- heroku
|
|
122
|
+
- railway
|
|
123
|
+
- render
|
|
124
|
+
- digitalocean
|
|
125
|
+
- cloudflare
|
|
126
|
+
priority: medium
|
|
127
|
+
|
|
128
|
+
architecture:
|
|
129
|
+
patterns:
|
|
130
|
+
- "using {X}"
|
|
131
|
+
- "following {X}"
|
|
132
|
+
- "architecture is {X}"
|
|
133
|
+
- "structured as {X}"
|
|
134
|
+
keywords:
|
|
135
|
+
- microservices
|
|
136
|
+
- monolith
|
|
137
|
+
- serverless
|
|
138
|
+
- event-driven
|
|
139
|
+
- rest
|
|
140
|
+
- graphql
|
|
141
|
+
- grpc
|
|
142
|
+
- websockets
|
|
143
|
+
- mvc
|
|
144
|
+
- clean architecture
|
|
145
|
+
priority: medium
|
|
146
|
+
|
|
147
|
+
spacy_entity_map:
|
|
148
|
+
PERSON: "person_mentioned"
|
|
149
|
+
ORG: "organization"
|
|
150
|
+
PRODUCT: "product"
|
|
151
|
+
GPE: "location"
|
|
152
|
+
LOC: "location"
|
|
153
|
+
DATE: "temporal_reference"
|
|
154
|
+
TIME: "temporal_reference"
|
|
155
|
+
MONEY: "budget_constraint"
|
|
156
|
+
PERCENT: "metric"
|
|
157
|
+
|
|
158
|
+
dependency_verbs:
|
|
159
|
+
- deploy
|
|
160
|
+
- use
|
|
161
|
+
- build
|
|
162
|
+
- integrate
|
|
163
|
+
- avoid
|
|
164
|
+
- implement
|
|
165
|
+
- install
|
|
166
|
+
- configure
|
|
167
|
+
- connect
|
|
168
|
+
- migrate
|
|
169
|
+
|
|
170
|
+
emphasis_signals:
|
|
171
|
+
high:
|
|
172
|
+
- "very important"
|
|
173
|
+
- "critical"
|
|
174
|
+
- "must"
|
|
175
|
+
- "always remember"
|
|
176
|
+
- "don't forget"
|
|
177
|
+
- "crucial"
|
|
178
|
+
- "essential"
|
|
179
|
+
medium:
|
|
180
|
+
- "important"
|
|
181
|
+
- "remember"
|
|
182
|
+
- "please note"
|
|
183
|
+
- "keep in mind"
|