context-drift-analyzer 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. context_drift_analyzer-0.4.0/.gitignore +24 -0
  2. context_drift_analyzer-0.4.0/LICENSE +21 -0
  3. context_drift_analyzer-0.4.0/PKG-INFO +580 -0
  4. context_drift_analyzer-0.4.0/README.md +540 -0
  5. context_drift_analyzer-0.4.0/pyproject.toml +75 -0
  6. context_drift_analyzer-0.4.0/src/context_drift_analyzer/__init__.py +71 -0
  7. context_drift_analyzer-0.4.0/src/context_drift_analyzer/cli/__init__.py +1 -0
  8. context_drift_analyzer-0.4.0/src/context_drift_analyzer/cli/main.py +179 -0
  9. context_drift_analyzer-0.4.0/src/context_drift_analyzer/context/__init__.py +1 -0
  10. context_drift_analyzer-0.4.0/src/context_drift_analyzer/context/explainer.py +98 -0
  11. context_drift_analyzer-0.4.0/src/context_drift_analyzer/context/manager.py +206 -0
  12. context_drift_analyzer-0.4.0/src/context_drift_analyzer/core/__init__.py +1 -0
  13. context_drift_analyzer-0.4.0/src/context_drift_analyzer/core/analyzer.py +139 -0
  14. context_drift_analyzer-0.4.0/src/context_drift_analyzer/core/scorer.py +85 -0
  15. context_drift_analyzer-0.4.0/src/context_drift_analyzer/core/session.py +100 -0
  16. context_drift_analyzer-0.4.0/src/context_drift_analyzer/persistence/__init__.py +1 -0
  17. context_drift_analyzer-0.4.0/src/context_drift_analyzer/persistence/session_memory.py +103 -0
  18. context_drift_analyzer-0.4.0/src/context_drift_analyzer/providers/__init__.py +17 -0
  19. context_drift_analyzer-0.4.0/src/context_drift_analyzer/providers/base.py +88 -0
  20. context_drift_analyzer-0.4.0/src/context_drift_analyzer/providers/generic.py +93 -0
  21. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/__init__.py +10 -0
  22. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/base.py +42 -0
  23. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/callable_embedding.py +64 -0
  24. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/composite.py +57 -0
  25. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/embedding_base.py +123 -0
  26. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/keyword.py +53 -0
  27. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/openai_embedding.py +50 -0
  28. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/sentence_transformer.py +71 -0
  29. context_drift_analyzer-0.4.0/src/context_drift_analyzer/strategies/token_overlap.py +60 -0
  30. context_drift_analyzer-0.4.0/src/context_drift_analyzer/tracker.py +385 -0
  31. context_drift_analyzer-0.4.0/src/context_drift_analyzer/utils/__init__.py +1 -0
  32. context_drift_analyzer-0.4.0/src/context_drift_analyzer/utils/markdown.py +58 -0
  33. context_drift_analyzer-0.4.0/src/context_drift_analyzer/utils/text.py +92 -0
  34. context_drift_analyzer-0.4.0/src/context_drift_analyzer/wrap.py +283 -0
@@ -0,0 +1,24 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ dist/
6
+ build/
7
+ *.egg-info/
8
+ *.egg
9
+ .eggs/
10
+ .pytest_cache/
11
+ .coverage
12
+ htmlcov/
13
+ .tox/
14
+ .venv/
15
+ venv/
16
+ env/
17
+ .env
18
+ *.log
19
+ .mypy_cache/
20
+ .ruff_cache/
21
+ .idea/
22
+ .vscode/
23
+ *.swp
24
+ *.swo
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Suman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,580 @@
1
+ Metadata-Version: 2.4
2
+ Name: context-drift-analyzer
3
+ Version: 0.4.0
4
+ Summary: Detect, explain, and solve context drift in LLM conversations across sessions
5
+ Project-URL: Homepage, https://github.com/Suman-Git-DS/ContextDriftAnalyzer
6
+ Project-URL: Documentation, https://github.com/Suman-Git-DS/ContextDriftAnalyzer#readme
7
+ Project-URL: Repository, https://github.com/Suman-Git-DS/ContextDriftAnalyzer
8
+ Project-URL: Issues, https://github.com/Suman-Git-DS/ContextDriftAnalyzer/issues
9
+ Author-email: Suman <suman@example.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: anthropic,banking-chatbot,chatbot,context-drift,drift-detection,embeddings,llm,monitoring,openai,sentence-transformers
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Requires-Python: >=3.9
25
+ Provides-Extra: all
26
+ Requires-Dist: anthropic>=0.20.0; extra == 'all'
27
+ Requires-Dist: openai>=1.0.0; extra == 'all'
28
+ Requires-Dist: sentence-transformers>=2.0.0; extra == 'all'
29
+ Provides-Extra: anthropic
30
+ Requires-Dist: anthropic>=0.20.0; extra == 'anthropic'
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
33
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
34
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
35
+ Provides-Extra: openai
36
+ Requires-Dist: openai>=1.0.0; extra == 'openai'
37
+ Provides-Extra: semantic
38
+ Requires-Dist: sentence-transformers>=2.0.0; extra == 'semantic'
39
+ Description-Content-Type: text/markdown
40
+
41
+ <p align="center">
42
+ <h1 align="center">context-drift-analyzer</h1>
43
+ <p align="center">
44
+ Detect, explain, and <strong>solve</strong> context drift in LLM conversations across sessions.
45
+ </p>
46
+ </p>
47
+
48
+ <p align="center">
49
+ <a href="https://pypi.org/project/context-drift-analyzer/"><img alt="PyPI" src="https://img.shields.io/pypi/v/context-drift-analyzer"></a>
50
+ <a href="https://pypi.org/project/context-drift-analyzer/"><img alt="Python" src="https://img.shields.io/pypi/pyversions/context-drift-analyzer"></a>
51
+ <a href="https://github.com/Suman-Git-DS/ContextDriftAnalyzer/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/Suman-Git-DS/ContextDriftAnalyzer"></a>
52
+ </p>
53
+
54
+ ---
55
+
56
+ ## The Problem
57
+
58
+ LLM-powered chatbots lose focus over long conversations. A banking assistant that starts by explaining savings accounts ends up discussing travel tips after a few sessions — with no visibility into *when* this happens or *why*.
59
+
60
+ Most tools only **detect** drift. This package **detects, explains, and solves** it:
61
+
62
+ | Capability | What it does |
63
+ |-----------|-------------|
64
+ | **Detect** | Drift score (0-100) via semantic embeddings |
65
+ | **Explain** | 1-2 line human-readable reason for drift |
66
+ | **Solve** | Context management: original context + session summaries, kept within token budget |
67
+ | **Persist** | `.session_memory` file tracks drift across restarts and deploys |
68
+
69
+ ## How It Works
70
+
71
+ Imagine a banking assistant chatbot:
72
+
73
+ ```
74
+ Session 1 (Turn 1-2): Score 92 [FRESH] "Context well-preserved. Responses align with banking instructions."
75
+ Session 2 (Turn 3-6): Score 76 [MILD] "Mild drift: core topics still present (savings, accounts, interest)."
76
+ Session 3 (Turn 7-12): Score 48 [SEVERE] "Significant drift: now focused on travel, restaurants, recipes."
77
+ Session 4 (Turn 13+): Score 22 [CRITICAL] "Critical drift: conversation departed from banking purpose."
78
+ ↑ recommend reset
79
+ ```
80
+
81
+ ## Installation
82
+
83
+ ```bash
84
+ # Core + Sentence Transformers (recommended — free, local, semantic)
85
+ pip install context-drift-analyzer[semantic]
86
+
87
+ # Core only (zero dependencies — keyword/TF strategies, or bring your own embedder)
88
+ pip install context-drift-analyzer
89
+
90
+ # Everything (semantic + OpenAI + Anthropic embedding support)
91
+ pip install context-drift-analyzer[all]
92
+ ```
93
+
94
+ ## Quick Start — Drop-in Client Wrapper
95
+
96
+ The easiest way to add drift tracking. Wrap your existing LLM client and use it exactly as before — drift scores are attached to every response automatically.
97
+
98
+ ### OpenAI
99
+
100
+ ```python
101
+ from openai import OpenAI
102
+ from context_drift_analyzer import wrap
103
+
104
+ client = OpenAI()
105
+ tracked = wrap(client, system_prompt="You are a banking assistant specializing in savings accounts, credit cards, and loans. Always provide accurate financial information.")
106
+
107
+ # Use exactly like the original client
108
+ response = tracked.chat.completions.create(
109
+ model="gpt-4o",
110
+ messages=[
111
+ {"role": "system", "content": "You are a banking assistant."},
112
+ {"role": "user", "content": "What savings accounts do you offer?"},
113
+ ],
114
+ )
115
+
116
+ # Drift score is attached to the response
117
+ print(response._drift.score) # 88.5
118
+ print(response._drift_explanation) # "Context well-preserved..."
119
+
120
+ # On-demand check
121
+ report = tracked.drift_check()
122
+ ```
123
+
124
+ ### Anthropic
125
+
126
+ ```python
127
+ from anthropic import Anthropic
128
+ from context_drift_analyzer import wrap
129
+
130
+ client = Anthropic()
131
+ tracked = wrap(client, system_prompt="You are a banking assistant specializing in savings accounts, credit cards, and loans.")
132
+
133
+ response = tracked.messages.create(
134
+ model="claude-haiku-4-5-20251001",
135
+ system="You are a banking assistant.",
136
+ messages=[{"role": "user", "content": "Tell me about your credit card options."}],
137
+ max_tokens=200,
138
+ )
139
+
140
+ print(response._drift.score) # Drift score attached!
141
+ report = tracked.drift_check()
142
+ ```
143
+
144
+ ## Quick Start — Direct Tracker
145
+
146
+ For more control, use `DriftTracker` directly in your pipeline:
147
+
148
+ ```python
149
+ from context_drift_analyzer import DriftTracker, FewShotExample
150
+
151
+ tracker = DriftTracker(
152
+ system_prompt="You are a banking assistant for Acme Bank. Help customers with savings accounts, credit cards, loans, and account inquiries. Always provide accurate financial information.",
153
+ few_shot_examples=[
154
+ FewShotExample(
155
+ user="What interest rate do your savings accounts offer?",
156
+ assistant="Our standard savings account offers 4.5% APY. Premium savings offers 5.1% APY for balances over $10,000."
157
+ ),
158
+ FewShotExample(
159
+ user="How do I apply for a credit card?",
160
+ assistant="You can apply online at acmebank.com/cards or visit any branch. You'll need your ID, proof of income, and SSN. Approval typically takes 1-2 business days."
161
+ ),
162
+ ],
163
+ mode="always", # "always" or "ondemand"
164
+ persist=True, # save to .session_memory file
165
+ max_summary_sessions=3, # keep last 3 session summaries
166
+ )
167
+
168
+ # After each LLM call in your pipeline:
169
+ result = tracker.record_turn(
170
+ user_message="What are the requirements for a home loan?",
171
+ assistant_response="For a home loan at Acme Bank, you'll need a credit score of 620+, proof of income, 2 years of tax returns, and a down payment of at least 3.5% for FHA loans or 20% for conventional loans."
172
+ )
173
+
174
+ print(f"Score: {result.drift.score:.1f}/100") # 87.3/100
175
+ print(f"Verdict: {result.drift.verdict.value}") # "mild"
176
+ print(f"Explanation: {result.explanation}") # "Mild drift: core topics present (loans, credit, banking)."
177
+ print(f"Effective: {result.drift.is_effective}") # True
178
+ print(f"Needs reset: {result.drift.needs_reset}") # False
179
+
180
+ # Get managed context (original + session summaries) for your LLM
181
+ system_message = tracker.get_managed_context()
182
+
183
+ # End session — summarizes and preserves for next time
184
+ tracker.end_session()
185
+ ```
186
+
187
+ ## On-Demand vs Always-On Mode
188
+
189
+ Choose when drift scoring happens:
190
+
191
+ ```python
192
+ # Always-on: scores every turn (default)
193
+ # Good for: monitoring dashboards, alerting
194
+ tracker = DriftTracker(system_prompt="You are a banking assistant.", mode="always")
195
+ result = tracker.record_turn(user_msg, assistant_msg)
196
+ print(result.drift.score) # computed automatically
197
+
198
+ # On-demand: scores only when you ask
199
+ # Good for: production pipelines where you check periodically
200
+ tracker = DriftTracker(system_prompt="You are a banking assistant.", mode="ondemand")
201
+ tracker.record_turn(user_msg, assistant_msg) # no scoring overhead
202
+ tracker.record_turn(user_msg2, assistant_msg2)
203
+
204
+ report = tracker.check() # explicitly request drift check
205
+ print(report.drift.score)
206
+ print(report.explanation)
207
+ ```
208
+
209
+ ## Context Management (The Solution)
210
+
211
+ Most drift tools stop at detection. This package actually **solves** the problem by managing the context window intelligently:
212
+
213
+ ```
214
+ ┌──────────────────────────────────────────────────────┐
215
+ │ Managed Context Window │
216
+ ├──────────────────────────────────────────────────────┤
217
+ │ [ALWAYS] Original System Prompt │
218
+ │ "You are a banking assistant for Acme Bank..." │
219
+ │ [ALWAYS] Few-Shot Examples │
220
+ │ "Q: What interest rate? A: 4.5% APY..." │
221
+ ├──────────────────────────────────────────────────────┤
222
+ │ [AUTO] Session 1 Summary: "Customer asked about │
223
+ │ savings accounts and CD rates." │
224
+ │ [AUTO] Session 2 Summary: "Discussed home loan │
225
+ │ requirements and mortgage pre-approval." │
226
+ │ [AUTO] Session 3 Summary: "Helped with credit card │
227
+ │ dispute and fraud alert process." │
228
+ ├──────────────────────────────────────────────────────┤
229
+ │ [LIVE] Current Conversation Turns │
230
+ └──────────────────────────────────────────────────────┘
231
+ ```
232
+
233
+ **How it works:**
234
+ 1. The original context (system prompt + few-shots) is **always preserved** — never truncated
235
+ 2. At the end of each session, the conversation is **summarized** into 2-3 compact sentences
236
+ 3. You configure how many past session summaries to keep (default: 3)
237
+ 4. The managed context = original + summaries — use this as your system message
238
+ 5. Old summaries are automatically dropped when `max_summary_sessions` is exceeded
239
+
240
+ ```python
241
+ tracker = DriftTracker(
242
+ system_prompt="You are a banking assistant for Acme Bank.",
243
+ max_summary_sessions=3,
244
+ summarize_fn=my_llm_summarizer, # optional: use an LLM to summarize
245
+ )
246
+
247
+ # After each session:
248
+ tracker.end_session()
249
+
250
+ # Use this as your system message — includes original context + session summaries
251
+ system_message = tracker.get_managed_context()
252
+ ```
253
+
254
+ ### Custom Summarization (LLM-Powered)
255
+
256
+ By default, summaries use simple extractive logic (first + last sentences). For production, provide an LLM-based summarizer:
257
+
258
+ ```python
259
+ from openai import OpenAI
260
+ client = OpenAI()
261
+
262
+ def llm_summarize(session_text: str) -> str:
263
+ response = client.chat.completions.create(
264
+ model="gpt-4o-mini",
265
+ messages=[
266
+ {"role": "system", "content": "Summarize this banking conversation in 2-3 sentences. Focus on products discussed and customer needs."},
267
+ {"role": "user", "content": session_text},
268
+ ],
269
+ max_tokens=100,
270
+ )
271
+ return response.choices[0].message.content
272
+
273
+ tracker = DriftTracker(
274
+ system_prompt="You are a banking assistant.",
275
+ summarize_fn=llm_summarize,
276
+ )
277
+ ```
278
+
279
+ ### Context Control
280
+
281
+ ```python
282
+ # Freeze context — prevent any modifications to session history
283
+ tracker.freeze_context()
284
+
285
+ # Unfreeze to allow changes again
286
+ tracker.unfreeze_context()
287
+
288
+ # Clear all session summaries (original context preserved)
289
+ tracker.clear_history()
290
+
291
+ # Full reset — clears everything including .session_memory file
292
+ tracker.reset()
293
+ ```
294
+
295
+ ## Drift Explanation
296
+
297
+ Every drift score comes with a human-readable explanation of **why** drift occurred:
298
+
299
+ ```python
300
+ # Banking assistant getting asked about cooking
301
+ result = tracker.record_turn(
302
+ "What's a good pasta recipe?",
303
+ "Try pasta carbonara with eggs, parmesan, pancetta, and black pepper..."
304
+ )
305
+ print(result.explanation)
306
+ # "Significant drift: conversation has moved away from original purpose.
307
+ # Now focused on: carbonara, pasta, recipe, cooking."
308
+ ```
309
+
310
+ Explanations are generated locally (no API calls) by default. You can plug in your own explainer:
311
+
312
+ ```python
313
+ def llm_explain(original_context: str, recent_text: str, score: float) -> str:
314
+ # Call an LLM to explain the drift
315
+ ...
316
+
317
+ tracker = DriftTracker(system_prompt="...", explain_fn=llm_explain)
318
+ ```
319
+
320
+ ## Persistence (.session_memory File)
321
+
322
+ Enable persistence to track drift **across restarts and deploys**:
323
+
324
+ ```python
325
+ tracker = DriftTracker(
326
+ system_prompt="You are a banking assistant.",
327
+ persist=True,
328
+ persist_path=".session_memory", # default
329
+ )
330
+ ```
331
+
332
+ The `.session_memory` file is a plain JSON file stored locally:
333
+
334
+ | Field | Description |
335
+ |-------|-------------|
336
+ | `original_context` | The full initial context (system prompt + few-shots) |
337
+ | `session_summaries` | List of past session summaries |
338
+ | `session_count` | Total number of sessions |
339
+ | `total_turns` | Cumulative turn count |
340
+ | `context_frozen` | Whether context is frozen |
341
+ | `drift_history` | List of `{turn, session, score, verdict, explanation}` entries |
342
+ | `last_response_text` | Most recent response text |
343
+
344
+ > **Note:** Add `.session_memory` to your `.gitignore`. Do not commit it — it may contain content from your conversations.
345
+
346
+ ## Embedding Strategies
347
+
348
+ Choose how drift is measured:
349
+
350
+ ### Sentence Transformers (Recommended — Free, Local)
351
+
352
+ ```python
353
+ from context_drift_analyzer.strategies.sentence_transformer import SentenceTransformerStrategy
354
+
355
+ tracker = DriftTracker(
356
+ system_prompt="You are a banking assistant.",
357
+ strategies=[SentenceTransformerStrategy(model_name="all-MiniLM-L6-v2")],
358
+ )
359
+ ```
360
+
361
+ Models: `all-MiniLM-L6-v2` (80MB, fast), `all-mpnet-base-v2` (420MB, best quality), `paraphrase-MiniLM-L3-v2` (60MB, fastest).
362
+
363
+ ### OpenAI Embeddings (Paid API)
364
+
365
+ ```python
366
+ from openai import OpenAI
367
+ from context_drift_analyzer.strategies.openai_embedding import OpenAIEmbeddingStrategy
368
+
369
+ client = OpenAI()
370
+ tracker = DriftTracker(
371
+ system_prompt="You are a banking assistant.",
372
+ strategies=[OpenAIEmbeddingStrategy(client=client, model="text-embedding-3-small")],
373
+ )
374
+ ```
375
+
376
+ ### Bring Your Own Embedder
377
+
378
+ ```python
379
+ from context_drift_analyzer.strategies.callable_embedding import CallableEmbeddingStrategy
380
+
381
+ def my_embedder(text: str) -> list[float]:
382
+ # Cohere, Voyage, Google, custom model, etc.
383
+ ...
384
+
385
+ tracker = DriftTracker(
386
+ system_prompt="You are a banking assistant.",
387
+ strategies=[CallableEmbeddingStrategy(embed_fn=my_embedder, strategy_name="cohere")],
388
+ )
389
+ ```
390
+
391
+ ### Keyword + Token Overlap (Zero Dependencies)
392
+
393
+ The default strategies when no embedding backend is installed:
394
+
395
+ ```python
396
+ # No extra install needed — uses keyword hit-rate + TF cosine similarity
397
+ tracker = DriftTracker(system_prompt="You are a banking assistant.")
398
+ ```
399
+
400
+ ### Composite (Mix Multiple Strategies)
401
+
402
+ ```python
403
+ from context_drift_analyzer.strategies.composite import CompositeStrategy
404
+ from context_drift_analyzer.strategies.sentence_transformer import SentenceTransformerStrategy
405
+ from context_drift_analyzer.strategies.keyword import KeywordStrategy
406
+
407
+ tracker = DriftTracker(
408
+ system_prompt="You are a banking assistant.",
409
+ strategies=[
410
+ CompositeStrategy(
411
+ strategies=[SentenceTransformerStrategy(), KeywordStrategy()],
412
+ weights=[0.8, 0.2], # 80% semantic, 20% keyword
413
+ )
414
+ ],
415
+ )
416
+ ```
417
+
418
+ ## CLI
419
+
420
+ ```bash
421
+ # Show session memory status
422
+ context-drift-analyzer status
423
+ context-drift-analyzer status --file /path/to/.session_memory
424
+
425
+ # Show drift history
426
+ context-drift-analyzer history
427
+ context-drift-analyzer history --last 10
428
+
429
+ # Delete session memory
430
+ context-drift-analyzer reset
431
+
432
+ # Freeze/unfreeze context
433
+ context-drift-analyzer freeze
434
+ context-drift-analyzer unfreeze
435
+ ```
436
+
437
+ ## Drift Score Reference
438
+
439
+ | Score | Verdict | Meaning | Action |
440
+ |-------|---------|---------|--------|
441
+ | 90-100 | `FRESH` | Context well-preserved | None needed |
442
+ | 75-89 | `MILD` | Minor drift | Monitor |
443
+ | 55-74 | `MODERATE` | Noticeable drift | Consider intervention |
444
+ | 35-54 | `SEVERE` | Significant drift | Reset recommended |
445
+ | 0-34 | `CRITICAL` | Context largely lost | Reset required |
446
+
447
+ ## Under the Hood
448
+
449
+ Here is exactly what happens when you call `tracker.record_turn()`:
450
+
451
+ ```
452
+ 1. USER MESSAGE recorded in Session
453
+
454
+ 2. ASSISTANT RESPONSE stripped of markdown formatting
455
+ (code blocks, headers, bold, links removed to avoid false-positive drift)
456
+
457
+ 3. Cleaned response recorded in Session
458
+
459
+ 4. STRATEGY SCORING (if mode="always"):
460
+ a. The initial context (system prompt + few-shots) is embedded → reference vector
461
+ (cached after first call — never re-computed)
462
+ b. Recent assistant responses (last N turns) are embedded → current vector
463
+ c. Cosine similarity(reference, current) → raw score (0-1)
464
+ d. Calibrated scaling [0, 0.55] → [0, 100] for meaningful scores
465
+ e. Exponential decay applied: raw_score × decay_rate^(turns/2)
466
+ f. Clamped to 0-100 → final drift score
467
+
468
+ 5. EXPLANATION generated:
469
+ - Score-based analysis with topic comparison
470
+ - Shared and divergent topics identified
471
+ - 1-2 sentence explanation produced (locally, no API calls)
472
+
473
+ 6. PERSISTENCE (if enabled):
474
+ - Drift entry appended to .session_memory drift_history
475
+ - Session metadata updated
476
+
477
+ 7. TURN RESULT returned with:
478
+ - drift score + verdict
479
+ - explanation
480
+ - managed context string
481
+ ```
482
+
483
+ **When you call `tracker.end_session()`:**
484
+
485
+ ```
486
+ 1. Final drift score computed
487
+ 2. Session text SUMMARIZED (extractive or LLM-based)
488
+ 3. Summary added to ContextManager (capped at max_summary_sessions)
489
+ 4. Session turns CLEARED
490
+ 5. Session counter incremented
491
+ 6. State persisted to .session_memory
492
+ 7. Next session starts fresh with original context + summaries intact
493
+ ```
494
+
495
+ ## Cost and Latency
496
+
497
+ | Strategy | Cost | Latency per Turn | Install Size | Quality |
498
+ |----------|------|-------------------|-------------|---------|
499
+ | Keyword + Token Overlap (default) | **Free** | **<1ms** | **0 MB** | Basic (lexical) |
500
+ | Sentence Transformers (`all-MiniLM-L6-v2`) | **Free** | ~20-50ms (CPU) | ~80 MB | Good (semantic) |
501
+ | Sentence Transformers (`all-mpnet-base-v2`) | **Free** | ~50-100ms (CPU) | ~420 MB | Best (semantic) |
502
+ | OpenAI `text-embedding-3-small` | ~$0.02/1M tokens | ~100-200ms (API) | ~1 MB | Excellent |
503
+ | OpenAI `text-embedding-3-large` | ~$0.13/1M tokens | ~100-200ms (API) | ~1 MB | Best (API) |
504
+ | Custom callable | Varies | Varies | Varies | You decide |
505
+
506
+ ## Configuration Reference
507
+
508
+ ```python
509
+ DriftTracker(
510
+ system_prompt="...", # Required: your system instructions
511
+ few_shot_examples=[...], # Optional: FewShotExample pairs
512
+ mode="always", # "always" or "ondemand"
513
+ strategies=[...], # Optional: custom strategies
514
+ decay_rate=0.95, # 0-1, lower = faster decay
515
+ window_size=5, # recent turns to evaluate (0 = all)
516
+ persist=False, # save to .session_memory
517
+ persist_path=".session_memory", # file path
518
+ max_summary_sessions=3, # past session summaries to keep
519
+ summarize_fn=None, # custom summarizer (str) -> str
520
+ explain_fn=None, # custom explainer (str, str, float) -> str
521
+ strip_md=True, # strip markdown before embedding
522
+ frozen=False, # freeze context (no modifications)
523
+ )
524
+ ```
525
+
526
+ ## Project Structure
527
+
528
+ ```
529
+ src/context_drift_analyzer/
530
+ tracker.py # DriftTracker — main entry point
531
+ wrap.py # Drop-in client wrapper (OpenAI, Anthropic)
532
+ core/
533
+ analyzer.py # Drift analysis engine
534
+ scorer.py # DriftScore, DriftVerdict
535
+ session.py # Session, Turn, FewShotExample
536
+ context/
537
+ manager.py # Context window management + session summaries
538
+ explainer.py # Drift explanation generator
539
+ persistence/
540
+ session_memory.py # .session_memory file read/write
541
+ strategies/
542
+ embedding_base.py # Base class for embedding strategies
543
+ sentence_transformer.py # HuggingFace sentence-transformers
544
+ openai_embedding.py # OpenAI embedding API
545
+ callable_embedding.py # Bring-your-own embedder
546
+ keyword.py # Keyword hit-rate (lexical)
547
+ token_overlap.py # TF cosine similarity (lexical)
548
+ composite.py # Weighted multi-strategy combiner
549
+ cli/
550
+ main.py # CLI tool (status/history/reset/freeze)
551
+ utils/
552
+ text.py # Tokenization, TF vectors
553
+ markdown.py # Markdown stripping
554
+ tests/ # 188 tests
555
+ examples/ # Ready-to-run examples (banking chatbot)
556
+ ```
557
+
558
+ ## Running Tests
559
+
560
+ ```bash
561
+ git clone https://github.com/Suman-Git-DS/ContextDriftAnalyzer.git
562
+ cd ContextDriftAnalyzer
563
+ pip install -e ".[dev]"
564
+ pytest tests/ -v
565
+ ```
566
+
567
+ ## Contributing
568
+
569
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
570
+
571
+ ## License
572
+
573
+ MIT License - see [LICENSE](LICENSE) for details.
574
+
575
+ ## Links
576
+
577
+ - [PyPI Package](https://pypi.org/project/context-drift-analyzer/)
578
+ - [GitHub Repository](https://github.com/Suman-Git-DS/ContextDriftAnalyzer)
579
+ - [Issue Tracker](https://github.com/Suman-Git-DS/ContextDriftAnalyzer/issues)
580
+ - [Changelog](CHANGELOG.md)