pymrsf 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymrsf-0.4.0/.gitignore +46 -0
- pymrsf-0.4.0/LICENSE +21 -0
- pymrsf-0.4.0/PKG-INFO +204 -0
- pymrsf-0.4.0/README.md +177 -0
- pymrsf-0.4.0/demo_novelty.py +157 -0
- pymrsf-0.4.0/mrsf_benchmark_full.py +235 -0
- pymrsf-0.4.0/pymrsf/__init__.py +12 -0
- pymrsf-0.4.0/pymrsf/benchmark.py +124 -0
- pymrsf-0.4.0/pymrsf/core.py +356 -0
- pymrsf-0.4.0/pymrsf/embeddings.py +13 -0
- pymrsf-0.4.0/pymrsf/inspect.py +83 -0
- pymrsf-0.4.0/pymrsf/probe.py +157 -0
- pymrsf-0.4.0/pymrsf/rag.py +513 -0
- pymrsf-0.4.0/pymrsf/storage.py +169 -0
- pymrsf-0.4.0/pyproject.toml +53 -0
- pymrsf-0.4.0/rag_experiment.py +112 -0
- pymrsf-0.4.0/tests/__init__.py +1 -0
- pymrsf-0.4.0/tests/test_core.py +34 -0
- pymrsf-0.4.0/tests/test_probe.py +44 -0
- pymrsf-0.4.0/tests/test_rag.py +119 -0
- pymrsf-0.4.0/verify_reddit_claims.py +92 -0
pymrsf-0.4.0/.gitignore
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Environment variables
|
|
2
|
+
.env
|
|
3
|
+
|
|
4
|
+
# Python cache
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.pyc
|
|
7
|
+
*.pyo
|
|
8
|
+
*.pyd
|
|
9
|
+
.Python
|
|
10
|
+
|
|
11
|
+
# Virtual environment
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
ENV/
|
|
15
|
+
|
|
16
|
+
# MRSF database and indexes
|
|
17
|
+
mrsf.db
|
|
18
|
+
mrsf.faiss
|
|
19
|
+
mrsf.faiss.meta
|
|
20
|
+
|
|
21
|
+
# Results and outputs
|
|
22
|
+
*.csv
|
|
23
|
+
mrsf_results_full.csv
|
|
24
|
+
|
|
25
|
+
# Large model files
|
|
26
|
+
models/
|
|
27
|
+
*.gguf
|
|
28
|
+
*.tar.gz
|
|
29
|
+
*.bin
|
|
30
|
+
*.safetensors
|
|
31
|
+
|
|
32
|
+
# Distribution files (can be rebuilt)
|
|
33
|
+
dist/
|
|
34
|
+
build/
|
|
35
|
+
*.egg-info/
|
|
36
|
+
|
|
37
|
+
# IDE
|
|
38
|
+
.vscode/
|
|
39
|
+
.idea/
|
|
40
|
+
*.swp
|
|
41
|
+
*.swo
|
|
42
|
+
*~
|
|
43
|
+
|
|
44
|
+
# OS
|
|
45
|
+
.DS_Store
|
|
46
|
+
Thumbs.db
|
pymrsf-0.4.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Eric Monthe
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pymrsf-0.4.0/PKG-INFO
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pymrsf
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Novelty-Aware RAG scoring — Filter chunks by information gain, not just relevance
|
|
5
|
+
Project-URL: Homepage, https://github.com/riiseup08/mrsf
|
|
6
|
+
Author: Eric Monthe
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: embeddings,llm,novelty,rag,retrieval,semantic
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Requires-Python: >=3.9
|
|
16
|
+
Requires-Dist: faiss-cpu>=1.7.4
|
|
17
|
+
Requires-Dist: httpx>=0.25.0
|
|
18
|
+
Requires-Dist: llama-cpp-python>=0.2.0
|
|
19
|
+
Requires-Dist: msgpack>=1.0.0
|
|
20
|
+
Requires-Dist: numpy>=1.24.0
|
|
21
|
+
Requires-Dist: requests>=2.31.0
|
|
22
|
+
Requires-Dist: tiktoken>=0.5.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# pymrsf — Novelty-Aware RAG Chunk Scoring
|
|
29
|
+
|
|
30
|
+
[](https://www.python.org/downloads/)
|
|
31
|
+
[](LICENSE)
|
|
32
|
+
[]()
|
|
33
|
+
|
|
34
|
+
**Stop wasting context window on information your LLM already knows.**
|
|
35
|
+
|
|
36
|
+
`pymrsf` scores RAG chunks by measuring **information gain** — not just relevance. It uses the model's own predictive surprise to detect which chunks contain genuinely new information.
|
|
37
|
+
|
|
38
|
+
## The Problem
|
|
39
|
+
|
|
40
|
+
Standard RAG retrieves chunks by *relevance* (cosine similarity). But a chunk can be highly relevant while containing *only facts the model already memorized during training*. You waste precious context window on redundant information.
|
|
41
|
+
|
|
42
|
+
Also, if the LLM already *knows the answer* to the query, even novel chunks are less useful. And if two chunks say the same thing, you don't need both.
|
|
43
|
+
|
|
44
|
+
## The Solution
|
|
45
|
+
|
|
46
|
+
`pymrsf` introduces **multi-factor novelty-aware scoring**:
|
|
47
|
+
|
|
48
|
+
| Factor | What It Measures | Weight |
|
|
49
|
+
|--------|-----------------|--------|
|
|
50
|
+
| **Novelty** | How much *new* information does this chunk contain? | 40% |
|
|
51
|
+
| **Relevance** | How related is this chunk to the query? | 40% |
|
|
52
|
+
| **Query Ignorance** | Does the model *not* know the answer to your question? | 20% |
|
|
53
|
+
| **Diversity** | Does a better chunk already cover this content? | Dedup |
|
|
54
|
+
|
|
55
|
+
## Quick Start
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from pymrsf.rag import filter_chunks
|
|
59
|
+
|
|
60
|
+
# Your retrieved chunks
|
|
61
|
+
chunks = [
|
|
62
|
+
"Backpropagation computes gradients using the chain rule.",
|
|
63
|
+
"Neural networks are inspired by the human brain.",
|
|
64
|
+
"The sky is blue because of Rayleigh scattering.",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
# Filter to only useful chunks
|
|
68
|
+
query = "How does backpropagation work?"
|
|
69
|
+
useful = filter_chunks(chunks, query, min_rag_score=50, verbose=True)
|
|
70
|
+
|
|
71
|
+
# → Pass only useful chunks to your LLM
|
|
72
|
+
answer = llm.complete(query, context=useful)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Installation
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install llama-cpp-python faiss-cpu msgpack tiktoken
|
|
79
|
+
git clone https://github.com/riiseup08/mrsf.git
|
|
80
|
+
cd mrsf
|
|
81
|
+
pip install -e .
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Features
|
|
85
|
+
|
|
86
|
+
### 🎯 RAG Chunk Scoring (Core Feature)
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from pymrsf.rag import score_chunk, score_chunks, score_chunks_batch
|
|
90
|
+
|
|
91
|
+
# Single chunk scoring
|
|
92
|
+
result = score_chunk(
|
|
93
|
+
"Backpropagation computes gradients using the chain rule.",
|
|
94
|
+
query="How does backpropagation work?",
|
|
95
|
+
verbose=True
|
|
96
|
+
)
|
|
97
|
+
print(result["rag_score"]) # 72/100
|
|
98
|
+
print(result["verdict"]) # "good"
|
|
99
|
+
print(result["query_knowledge"]) # how much model knows the query
|
|
100
|
+
|
|
101
|
+
# Batch scoring (3-5x faster for many chunks)
|
|
102
|
+
results = score_chunks_batch(chunks, query)
|
|
103
|
+
|
|
104
|
+
# Custom weights (adjust the formula)
|
|
105
|
+
weights = {"novelty": 0.5, "relevance": 0.3, "query_ignorance": 0.2}
|
|
106
|
+
result = score_chunk(chunk, query, weights=weights)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### 🔍 Knowledge Probing
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from pymrsf import probe
|
|
113
|
+
|
|
114
|
+
result = probe("To be or not to be, that is the question.")
|
|
115
|
+
print(f"Knowledge: {result['knowledge_score']}/100 ({result['label']})")
|
|
116
|
+
# → Knowledge: 92/100 (memorized) — Shakespeare is well-known
|
|
117
|
+
|
|
118
|
+
result = probe("My proprietary algorithm uses a novel attention mechanism.")
|
|
119
|
+
print(f"Knowledge: {result['knowledge_score']}/100 ({result['label']})")
|
|
120
|
+
# → Knowledge: 15/100 (unknown) — novel content!
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### 🔧 RAG Pipeline Filter
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from pymrsf.rag import filter_chunks
|
|
127
|
+
|
|
128
|
+
chunks = retriever.get(query, top_k=20) # your retriever
|
|
129
|
+
|
|
130
|
+
# Only keep chunks worth sending to the LLM
|
|
131
|
+
good = filter_chunks(
|
|
132
|
+
chunks,
|
|
133
|
+
query,
|
|
134
|
+
min_rag_score=50, # skip low-value chunks
|
|
135
|
+
top_k=5, # limit context window usage
|
|
136
|
+
diversity_threshold=0.85, # dedup similar chunks
|
|
137
|
+
verbose=True,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
answer = llm.complete(query, context=good)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### 📦 Delta Compression (Experimental)
|
|
144
|
+
|
|
145
|
+
Store text efficiently using LLM surprises:
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
from pymrsf import mrsf_write, mrsf_read, save_index
|
|
149
|
+
|
|
150
|
+
# Write (stores only surprise tokens = ~40% compression)
|
|
151
|
+
mrsf_write("The Eiffel Tower is in Paris.")
|
|
152
|
+
save_index()
|
|
153
|
+
|
|
154
|
+
# Read (reconstructs from delta + model)
|
|
155
|
+
results = mrsf_read("famous landmark in France")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Configuration
|
|
159
|
+
|
|
160
|
+
Create a `.env` file:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
PYMRSF_PROVIDER=local
|
|
164
|
+
PYMRSF_MODEL_PATH=./models/mistral-7b-v0.1.Q4_K_M.gguf
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Scoring Concepts
|
|
168
|
+
|
|
169
|
+
### RAG Score Formula
|
|
170
|
+
```
|
|
171
|
+
rag_score = novelty × 0.40 + relevance × 0.40 + query_ignorance × 0.20
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### What the Scores Mean
|
|
175
|
+
|
|
176
|
+
| Score | Verdict | Action |
|
|
177
|
+
|-------|---------|--------|
|
|
178
|
+
| 80-100 | Excellent | Prioritize this chunk |
|
|
179
|
+
| 60-79 | Good | Include in context |
|
|
180
|
+
| 40-59 | Moderate | Include if space allows |
|
|
181
|
+
| 20-39 | Weak | Skip if better chunks exist |
|
|
182
|
+
| 0-19 | Skip | Model already knows this |
|
|
183
|
+
|
|
184
|
+
## Project Structure
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
pymrsf/
|
|
188
|
+
├── __init__.py # Public API exports
|
|
189
|
+
├── core.py # Provider routing (local + openai), lazy model loading
|
|
190
|
+
├── embeddings.py # Ollama embedding API client
|
|
191
|
+
├── probe.py # Knowledge probing (how well does model know a text?)
|
|
192
|
+
├── rag.py # RAG chunk scoring with novelty + relevance + diversity
|
|
193
|
+
├── storage.py # Delta compression storage (experimental)
|
|
194
|
+
├── inspect.py # Token-level visualization tools
|
|
195
|
+
└── benchmark.py # Compression/latency benchmarks
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Project Status
|
|
199
|
+
|
|
200
|
+
**Alpha** — The RAG novelty scoring works and solves a real problem. The delta compression/storage system is experimental.
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT
|
pymrsf-0.4.0/README.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# pymrsf — Novelty-Aware RAG Chunk Scoring
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/downloads/)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
[]()
|
|
6
|
+
|
|
7
|
+
**Stop wasting context window on information your LLM already knows.**
|
|
8
|
+
|
|
9
|
+
`pymrsf` scores RAG chunks by measuring **information gain** — not just relevance. It uses the model's own predictive surprise to detect which chunks contain genuinely new information.
|
|
10
|
+
|
|
11
|
+
## The Problem
|
|
12
|
+
|
|
13
|
+
Standard RAG retrieves chunks by *relevance* (cosine similarity). But a chunk can be highly relevant while containing *only facts the model already memorized during training*. You waste precious context window on redundant information.
|
|
14
|
+
|
|
15
|
+
Also, if the LLM already *knows the answer* to the query, even novel chunks are less useful. And if two chunks say the same thing, you don't need both.
|
|
16
|
+
|
|
17
|
+
## The Solution
|
|
18
|
+
|
|
19
|
+
`pymrsf` introduces **multi-factor novelty-aware scoring**:
|
|
20
|
+
|
|
21
|
+
| Factor | What It Measures | Weight |
|
|
22
|
+
|--------|-----------------|--------|
|
|
23
|
+
| **Novelty** | How much *new* information does this chunk contain? | 40% |
|
|
24
|
+
| **Relevance** | How related is this chunk to the query? | 40% |
|
|
25
|
+
| **Query Ignorance** | Does the model *not* know the answer to your question? | 20% |
|
|
26
|
+
| **Diversity** | Does a better chunk already cover this content? | Dedup |
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from pymrsf.rag import filter_chunks
|
|
32
|
+
|
|
33
|
+
# Your retrieved chunks
|
|
34
|
+
chunks = [
|
|
35
|
+
"Backpropagation computes gradients using the chain rule.",
|
|
36
|
+
"Neural networks are inspired by the human brain.",
|
|
37
|
+
"The sky is blue because of Rayleigh scattering.",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
# Filter to only useful chunks
|
|
41
|
+
query = "How does backpropagation work?"
|
|
42
|
+
useful = filter_chunks(chunks, query, min_rag_score=50, verbose=True)
|
|
43
|
+
|
|
44
|
+
# → Pass only useful chunks to your LLM
|
|
45
|
+
answer = llm.complete(query, context=useful)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install llama-cpp-python faiss-cpu msgpack tiktoken
|
|
52
|
+
git clone https://github.com/riiseup08/mrsf.git
|
|
53
|
+
cd mrsf
|
|
54
|
+
pip install -e .
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Features
|
|
58
|
+
|
|
59
|
+
### 🎯 RAG Chunk Scoring (Core Feature)
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from pymrsf.rag import score_chunk, score_chunks, score_chunks_batch
|
|
63
|
+
|
|
64
|
+
# Single chunk scoring
|
|
65
|
+
result = score_chunk(
|
|
66
|
+
"Backpropagation computes gradients using the chain rule.",
|
|
67
|
+
query="How does backpropagation work?",
|
|
68
|
+
verbose=True
|
|
69
|
+
)
|
|
70
|
+
print(result["rag_score"]) # 72/100
|
|
71
|
+
print(result["verdict"]) # "good"
|
|
72
|
+
print(result["query_knowledge"]) # how much model knows the query
|
|
73
|
+
|
|
74
|
+
# Batch scoring (3-5x faster for many chunks)
|
|
75
|
+
results = score_chunks_batch(chunks, query)
|
|
76
|
+
|
|
77
|
+
# Custom weights (adjust the formula)
|
|
78
|
+
weights = {"novelty": 0.5, "relevance": 0.3, "query_ignorance": 0.2}
|
|
79
|
+
result = score_chunk(chunk, query, weights=weights)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 🔍 Knowledge Probing
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from pymrsf import probe
|
|
86
|
+
|
|
87
|
+
result = probe("To be or not to be, that is the question.")
|
|
88
|
+
print(f"Knowledge: {result['knowledge_score']}/100 ({result['label']})")
|
|
89
|
+
# → Knowledge: 92/100 (memorized) — Shakespeare is well-known
|
|
90
|
+
|
|
91
|
+
result = probe("My proprietary algorithm uses a novel attention mechanism.")
|
|
92
|
+
print(f"Knowledge: {result['knowledge_score']}/100 ({result['label']})")
|
|
93
|
+
# → Knowledge: 15/100 (unknown) — novel content!
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 🔧 RAG Pipeline Filter
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from pymrsf.rag import filter_chunks
|
|
100
|
+
|
|
101
|
+
chunks = retriever.get(query, top_k=20) # your retriever
|
|
102
|
+
|
|
103
|
+
# Only keep chunks worth sending to the LLM
|
|
104
|
+
good = filter_chunks(
|
|
105
|
+
chunks,
|
|
106
|
+
query,
|
|
107
|
+
min_rag_score=50, # skip low-value chunks
|
|
108
|
+
top_k=5, # limit context window usage
|
|
109
|
+
diversity_threshold=0.85, # dedup similar chunks
|
|
110
|
+
verbose=True,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
answer = llm.complete(query, context=good)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### 📦 Delta Compression (Experimental)
|
|
117
|
+
|
|
118
|
+
Store text efficiently using LLM surprises:
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from pymrsf import mrsf_write, mrsf_read, save_index
|
|
122
|
+
|
|
123
|
+
# Write (stores only surprise tokens = ~40% compression)
|
|
124
|
+
mrsf_write("The Eiffel Tower is in Paris.")
|
|
125
|
+
save_index()
|
|
126
|
+
|
|
127
|
+
# Read (reconstructs from delta + model)
|
|
128
|
+
results = mrsf_read("famous landmark in France")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Configuration
|
|
132
|
+
|
|
133
|
+
Create a `.env` file:
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
PYMRSF_PROVIDER=local
|
|
137
|
+
PYMRSF_MODEL_PATH=./models/mistral-7b-v0.1.Q4_K_M.gguf
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Scoring Concepts
|
|
141
|
+
|
|
142
|
+
### RAG Score Formula
|
|
143
|
+
```
|
|
144
|
+
rag_score = novelty × 0.40 + relevance × 0.40 + query_ignorance × 0.20
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### What the Scores Mean
|
|
148
|
+
|
|
149
|
+
| Score | Verdict | Action |
|
|
150
|
+
|-------|---------|--------|
|
|
151
|
+
| 80-100 | Excellent | Prioritize this chunk |
|
|
152
|
+
| 60-79 | Good | Include in context |
|
|
153
|
+
| 40-59 | Moderate | Include if space allows |
|
|
154
|
+
| 20-39 | Weak | Skip if better chunks exist |
|
|
155
|
+
| 0-19 | Skip | Model already knows this |
|
|
156
|
+
|
|
157
|
+
## Project Structure
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
pymrsf/
|
|
161
|
+
├── __init__.py # Public API exports
|
|
162
|
+
├── core.py # Provider routing (local + openai), lazy model loading
|
|
163
|
+
├── embeddings.py # Ollama embedding API client
|
|
164
|
+
├── probe.py # Knowledge probing (how well does model know a text?)
|
|
165
|
+
├── rag.py # RAG chunk scoring with novelty + relevance + diversity
|
|
166
|
+
├── storage.py # Delta compression storage (experimental)
|
|
167
|
+
├── inspect.py # Token-level visualization tools
|
|
168
|
+
└── benchmark.py # Compression/latency benchmarks
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Project Status
|
|
172
|
+
|
|
173
|
+
**Alpha** — The RAG novelty scoring works and solves a real problem. The delta compression/storage system is experimental.
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
MIT
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
pymrsf demo — see the novelty-aware RAG scoring in action.
|
|
4
|
+
|
|
5
|
+
This script tests 3 things:
|
|
6
|
+
1. Knowledge probing : Does the model know Shakespeare better than novel text?
|
|
7
|
+
2. RAG scoring : Do novel+relevant chunks score higher?
|
|
8
|
+
3. Diversity dedup : Are duplicate chunks filtered out?
|
|
9
|
+
|
|
10
|
+
Run: python demo_novelty.py
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import sys
|
|
14
|
+
|
|
15
|
+
# ── Test 1: Knowledge Probe ──────────────────────────────────────────────────
|
|
16
|
+
|
|
17
|
+
print("=" * 65)
|
|
18
|
+
print("TEST 1: Knowledge Probing")
|
|
19
|
+
print("Does the model know famous text better than novel text?")
|
|
20
|
+
print("=" * 65)
|
|
21
|
+
|
|
22
|
+
from pymrsf import probe
|
|
23
|
+
|
|
24
|
+
famous = [
|
|
25
|
+
"To be or not to be, that is the question. Whether tis nobler in the mind to suffer.",
|
|
26
|
+
"The quick brown fox jumps over the lazy dog.",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
novel = [
|
|
30
|
+
"My proprietary attention mechanism uses a novel sparse gating function.",
|
|
31
|
+
"The Zeta-7 protocol encrypts data using quantum-resistant lattice cryptography.",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
print("\n📚 FAMOUS TEXT (should score HIGH — model knows this):")
|
|
35
|
+
for text in famous:
|
|
36
|
+
result = probe(text)
|
|
37
|
+
if "error" in result:
|
|
38
|
+
print(f" ⚠️ {result['error']}")
|
|
39
|
+
print(f" (This is expected if no local model is loaded)")
|
|
40
|
+
break
|
|
41
|
+
bar = "█" * (result['knowledge_score'] // 4) + "░" * (25 - result['knowledge_score'] // 4)
|
|
42
|
+
print(f" [{bar}] {result['knowledge_score']:>2}/100 {result['label'].upper():<12} {text[:40]}...")
|
|
43
|
+
|
|
44
|
+
else:
|
|
45
|
+
print("\n🔬 NOVEL TEXT (should score LOW — model hasn't seen this):")
|
|
46
|
+
for text in novel:
|
|
47
|
+
result = probe(text)
|
|
48
|
+
bar = "█" * (result['knowledge_score'] // 4) + "░" * (25 - result['knowledge_score'] // 4)
|
|
49
|
+
print(f" [{bar}] {result['knowledge_score']:>2}/100 {result['label'].upper():<12} {text[:40]}...")
|
|
50
|
+
|
|
51
|
+
# Verify the claim
|
|
52
|
+
print(f"\n{'─'*65}")
|
|
53
|
+
famous_avg = sum(probe(t)['knowledge_score'] for t in famous) / len(famous)
|
|
54
|
+
novel_avg = sum(probe(t)['knowledge_score'] for t in novel) / len(novel)
|
|
55
|
+
print(f" Famous avg: {famous_avg:.0f}/100 | Novel avg: {novel_avg:.0f}/100")
|
|
56
|
+
if famous_avg > novel_avg:
|
|
57
|
+
print(f" ✅ CONFIRMED: Model knows famous text {famous_avg - novel_avg:.0f}% better!")
|
|
58
|
+
else:
|
|
59
|
+
print(f" ⚠️ Unexpected result — but that's interesting data too!")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ── Test 2: RAG Chunk Scoring ─────────────────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
print("\n\n" + "=" * 65)
|
|
65
|
+
print("TEST 2: RAG Chunk Scoring")
|
|
66
|
+
print("Do novel+relevant chunks score higher than known+irrelevant ones?")
|
|
67
|
+
print("=" * 65)
|
|
68
|
+
|
|
69
|
+
from pymrsf.rag import score_chunks_batch, filter_chunks
|
|
70
|
+
|
|
71
|
+
# Simulated RAG chunks for the query "How does backpropagation work?"
|
|
72
|
+
query = "How does backpropagation work?"
|
|
73
|
+
rag_chunks = [
|
|
74
|
+
"Backpropagation computes gradients using the chain rule. It propagates error backwards.",
|
|
75
|
+
"Neural networks are inspired by the human brain. They consist of layers of neurons.",
|
|
76
|
+
"The sky is blue because of Rayleigh scattering. This is well-known physics.",
|
|
77
|
+
"A novel optimization technique uses second-order gradients for faster convergence.",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
print(f"\nQuery: '{query}'")
|
|
81
|
+
print(f"Chunks to score: {len(rag_chunks)}")
|
|
82
|
+
print()
|
|
83
|
+
|
|
84
|
+
results = score_chunks_batch(rag_chunks, query, diversity_threshold=0.90)
|
|
85
|
+
|
|
86
|
+
for r in results:
|
|
87
|
+
bar = "█" * (r['rag_score'] // 4) + "░" * (25 - r['rag_score'] // 4)
|
|
88
|
+
status = "✅" if r['rag_score'] >= 50 else "❌"
|
|
89
|
+
print(f" {status} [{bar}] RAG={r['rag_score']:>2} N={r['novelty_score']:>2} R={r['relevance_score']:>2} "
|
|
90
|
+
f"Q={r['query_knowledge']:>2} {r['verdict'].upper():<10} {r['chunk'][:50]}...")
|
|
91
|
+
|
|
92
|
+
best_score = results[0]['rag_score']
|
|
93
|
+
worst_score = results[-1]['rag_score']
|
|
94
|
+
print(f"\n Best chunk: {results[0]['chunk'][:50]}... ({best_score}/100)")
|
|
95
|
+
print(f" Worst chunk: {results[-1]['chunk'][:50]}... ({worst_score}/100)")
|
|
96
|
+
if best_score > worst_score:
|
|
97
|
+
print(f" ✅ RAG scoring successfully ranked chunks by usefulness!")
|
|
98
|
+
else:
|
|
99
|
+
print(f" ⚠️ Scores are similar — fine-tuning threshold may help")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ── Test 3: Diversity Dedup ───────────────────────────────────────────────────
|
|
103
|
+
|
|
104
|
+
print("\n\n" + "=" * 65)
|
|
105
|
+
print("TEST 3: Diversity Dedup")
|
|
106
|
+
print("Does the filter remove duplicate chunks?")
|
|
107
|
+
print("=" * 65)
|
|
108
|
+
|
|
109
|
+
# Create chunks where two are very similar
|
|
110
|
+
dup_chunks = [
|
|
111
|
+
"Backpropagation computes gradients using the chain rule error propagation.",
|
|
112
|
+
"Backpropagation uses chain rule to compute gradients by propagating error.",
|
|
113
|
+
"The sky is blue because of Rayleigh scattering.",
|
|
114
|
+
"The sky appears blue due to Rayleigh scattering of sunlight.",
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
print(f"\nQuery: 'How does backpropagation work?'")
|
|
118
|
+
print(f"Input: {len(dup_chunks)} chunks (2 pairs of near-duplicates)")
|
|
119
|
+
print()
|
|
120
|
+
|
|
121
|
+
# First: score without dedup
|
|
122
|
+
print("--- WITHOUT dedup ---")
|
|
123
|
+
raw = score_chunks_batch(dup_chunks, query, diversity_threshold=1.0)
|
|
124
|
+
for r in raw:
|
|
125
|
+
print(f" [{r['rag_score']:>2}/100] {r['chunk'][:55]}...")
|
|
126
|
+
|
|
127
|
+
# Then: with dedup (default threshold 0.85)
|
|
128
|
+
print("\n--- WITH dedup (threshold=0.85) ---")
|
|
129
|
+
filtered = score_chunks_batch(dup_chunks, query, diversity_threshold=0.85)
|
|
130
|
+
dup_count = sum(1 for r in filtered if r['rag_score'] == 0)
|
|
131
|
+
for r in filtered:
|
|
132
|
+
if r['rag_score'] > 0:
|
|
133
|
+
print(f" ✅ [{r['rag_score']:>2}/100] {r['chunk'][:55]}...")
|
|
134
|
+
else:
|
|
135
|
+
print(f" ❌ [{r['rag_score']:>2}/100] {r['chunk'][:55]}... (DUPLICATE)")
|
|
136
|
+
|
|
137
|
+
if dup_count > 0:
|
|
138
|
+
print(f"\n ✅ Dedup removed {dup_count} duplicate chunks!")
|
|
139
|
+
else:
|
|
140
|
+
print(f"\n ℹ️ No duplicates detected (threshold may need adjustment)")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ── Summary ───────────────────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
print("\n\n" + "=" * 65)
|
|
146
|
+
print("SUMMARY")
|
|
147
|
+
print("=" * 65)
|
|
148
|
+
print(f"\n pymrsf gives you:")
|
|
149
|
+
print(f" 1. Which chunks contain NEW information (not just relevant)")
|
|
150
|
+
print(f" 2. Whether the model already KNOWS THE ANSWER to your query")
|
|
151
|
+
print(f" 3. Automatic removal of DUPLICATE chunks")
|
|
152
|
+
print(f" 4. A tunable RAG score to optimize context window usage")
|
|
153
|
+
print(f"\n Next step: Drop filter_chunks() into your RAG pipeline:")
|
|
154
|
+
print(f" from pymrsf.rag import filter_chunks")
|
|
155
|
+
print(f" good_chunks = filter_chunks(retriever.get(query), query)")
|
|
156
|
+
print(f" answer = llm.complete(query, context=good_chunks)")
|
|
157
|
+
print("=" * 65)
|