rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,602 @@
1
+ """
2
+ RNSR Self-Reflection Loop
3
+
4
+ Implements iterative self-correction where the system:
5
+ 1. Generates an initial answer
6
+ 2. Critiques its own answer
7
+ 3. If issues found, re-navigates with critique as context
8
+ 4. Repeats until confident or max iterations
9
+
10
+ Based on self-reflection patterns from:
11
+ - Reflexion (Shinn et al.)
12
+ - Self-Refine (Madaan et al.)
13
+ - Constitutional AI principles
14
+
15
+ Key insight: LLMs can often identify problems in their own outputs
16
+ that they couldn't avoid in initial generation.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import re
23
+ import time
24
+ from dataclasses import dataclass, field
25
+ from datetime import datetime
26
+ from enum import Enum
27
+ from typing import Any, Callable
28
+
29
+ import structlog
30
+
31
+ logger = structlog.get_logger(__name__)
32
+
33
+
34
+ # =============================================================================
35
+ # Self-Reflection Prompts
36
+ # =============================================================================
37
+
38
+ CRITIQUE_PROMPT = """You are a critical reviewer. Analyze this answer for potential issues.
39
+
40
+ QUESTION: {question}
41
+
42
+ ANSWER: {answer}
43
+
44
+ EVIDENCE USED:
45
+ {evidence}
46
+
47
+ Critically evaluate:
48
+ 1. ACCURACY: Does the evidence actually support this answer?
49
+ 2. COMPLETENESS: Is anything important missing?
50
+ 3. CONTRADICTIONS: Does any evidence contradict the answer?
51
+ 4. SPECIFICITY: Is the answer too vague or too specific?
52
+ 5. ASSUMPTIONS: Are there unstated assumptions?
53
+
54
+ If you find issues, explain them clearly.
55
+ If the answer is good, say "NO ISSUES FOUND".
56
+
57
+ Respond in JSON:
58
+ {{
59
+ "has_issues": true/false,
60
+ "issues": [
61
+ {{"type": "accuracy|completeness|contradiction|specificity|assumption", "description": "...", "severity": "high|medium|low"}}
62
+ ],
63
+ "confidence_in_critique": 0.0-1.0,
64
+ "suggested_improvements": ["..."],
65
+ "should_retry": true/false
66
+ }}"""
67
+
68
+
69
+ REFINEMENT_PROMPT = """You are improving an answer based on feedback.
70
+
71
+ ORIGINAL QUESTION: {question}
72
+
73
+ PREVIOUS ANSWER: {previous_answer}
74
+
75
+ CRITIQUE/ISSUES FOUND:
76
+ {critique}
77
+
78
+ EVIDENCE AVAILABLE:
79
+ {evidence}
80
+
81
+ Generate an IMPROVED answer that addresses the identified issues.
82
+ Be specific and directly address each criticism.
83
+
84
+ Respond with ONLY the improved answer, no meta-commentary."""
85
+
86
+
87
+ VERIFICATION_PROMPT = """Compare these two answers and determine which is better.
88
+
89
+ QUESTION: {question}
90
+
91
+ ANSWER A (Original):
92
+ {answer_a}
93
+
94
+ ANSWER B (Refined):
95
+ {answer_b}
96
+
97
+ Which answer is:
98
+ 1. More accurate?
99
+ 2. More complete?
100
+ 3. Better supported by evidence?
101
+
102
+ Respond in JSON:
103
+ {{
104
+ "better_answer": "A" or "B",
105
+ "confidence": 0.0-1.0,
106
+ "reasoning": "..."
107
+ }}"""
108
+
109
+
110
+ # =============================================================================
111
+ # Data Models
112
+ # =============================================================================
113
+
114
+ class IssueType(str, Enum):
115
+ """Types of issues that can be identified."""
116
+
117
+ ACCURACY = "accuracy"
118
+ COMPLETENESS = "completeness"
119
+ CONTRADICTION = "contradiction"
120
+ SPECIFICITY = "specificity"
121
+ ASSUMPTION = "assumption"
122
+ HALLUCINATION = "hallucination"
123
+
124
+
125
+ class IssueSeverity(str, Enum):
126
+ """Severity of identified issues."""
127
+
128
+ HIGH = "high"
129
+ MEDIUM = "medium"
130
+ LOW = "low"
131
+
132
+
133
+ @dataclass
134
+ class Issue:
135
+ """An issue identified during self-critique."""
136
+
137
+ type: IssueType
138
+ description: str
139
+ severity: IssueSeverity
140
+
141
+ def to_dict(self) -> dict[str, Any]:
142
+ """Convert to dictionary."""
143
+ return {
144
+ "type": self.type.value,
145
+ "description": self.description,
146
+ "severity": self.severity.value,
147
+ }
148
+
149
+
150
+ @dataclass
151
+ class CritiqueResult:
152
+ """Result of self-critique."""
153
+
154
+ has_issues: bool = False
155
+ issues: list[Issue] = field(default_factory=list)
156
+ confidence: float = 0.5
157
+ suggested_improvements: list[str] = field(default_factory=list)
158
+ should_retry: bool = False
159
+ raw_response: str = ""
160
+
161
+ def to_dict(self) -> dict[str, Any]:
162
+ """Convert to dictionary."""
163
+ return {
164
+ "has_issues": self.has_issues,
165
+ "issues": [i.to_dict() for i in self.issues],
166
+ "confidence": self.confidence,
167
+ "suggested_improvements": self.suggested_improvements,
168
+ "should_retry": self.should_retry,
169
+ }
170
+
171
+
172
+ @dataclass
173
+ class ReflectionIteration:
174
+ """One iteration of the reflection loop."""
175
+
176
+ iteration: int
177
+ answer: str
178
+ critique: CritiqueResult | None = None
179
+ improved_answer: str | None = None
180
+ improvement_accepted: bool = False
181
+ duration_ms: float = 0.0
182
+
183
+ def to_dict(self) -> dict[str, Any]:
184
+ """Convert to dictionary."""
185
+ return {
186
+ "iteration": self.iteration,
187
+ "answer": self.answer,
188
+ "critique": self.critique.to_dict() if self.critique else None,
189
+ "improved_answer": self.improved_answer,
190
+ "improvement_accepted": self.improvement_accepted,
191
+ "duration_ms": self.duration_ms,
192
+ }
193
+
194
+
195
+ @dataclass
196
+ class ReflectionResult:
197
+ """Complete result of self-reflection process."""
198
+
199
+ original_answer: str = ""
200
+ final_answer: str = ""
201
+ question: str = ""
202
+
203
+ # Iterations
204
+ iterations: list[ReflectionIteration] = field(default_factory=list)
205
+ total_iterations: int = 0
206
+
207
+ # Outcome
208
+ improved: bool = False
209
+ final_confidence: float = 0.0
210
+ all_issues: list[Issue] = field(default_factory=list)
211
+
212
+ # Timing
213
+ total_duration_ms: float = 0.0
214
+
215
+ def to_dict(self) -> dict[str, Any]:
216
+ """Convert to dictionary."""
217
+ return {
218
+ "original_answer": self.original_answer,
219
+ "final_answer": self.final_answer,
220
+ "question": self.question,
221
+ "iterations": [i.to_dict() for i in self.iterations],
222
+ "total_iterations": self.total_iterations,
223
+ "improved": self.improved,
224
+ "final_confidence": self.final_confidence,
225
+ "all_issues": [i.to_dict() for i in self.all_issues],
226
+ "total_duration_ms": self.total_duration_ms,
227
+ }
228
+
229
+
230
+ # =============================================================================
231
+ # Self-Reflection Engine
232
+ # =============================================================================
233
+
234
+
235
+ class SelfReflectionEngine:
236
+ """
237
+ Implements iterative self-correction for answers.
238
+
239
+ Flow:
240
+ 1. Take initial answer
241
+ 2. Generate critique (what could be wrong?)
242
+ 3. If issues found, generate improved answer
243
+ 4. Verify improvement is actually better
244
+ 5. Repeat until confident or max iterations
245
+ """
246
+
247
+ def __init__(
248
+ self,
249
+ llm_fn: Callable[[str], str] | None = None,
250
+ max_iterations: int = 3,
251
+ min_confidence_threshold: float = 0.8,
252
+ accept_improvement_threshold: float = 0.6,
253
+ enable_verification: bool = True,
254
+ ):
255
+ """
256
+ Initialize the self-reflection engine.
257
+
258
+ Args:
259
+ llm_fn: LLM function for critique and refinement.
260
+ max_iterations: Maximum reflection iterations.
261
+ min_confidence_threshold: Stop if critique confidence exceeds this.
262
+ accept_improvement_threshold: Accept improvement if confidence exceeds this.
263
+ enable_verification: Verify improvements are actually better.
264
+ """
265
+ self.llm_fn = llm_fn
266
+ self.max_iterations = max_iterations
267
+ self.min_confidence_threshold = min_confidence_threshold
268
+ self.accept_improvement_threshold = accept_improvement_threshold
269
+ self.enable_verification = enable_verification
270
+
271
+ # Learning: track which issues are commonly found
272
+ self._issue_stats: dict[str, int] = {}
273
+
274
+ def set_llm_function(self, llm_fn: Callable[[str], str]) -> None:
275
+ """Set the LLM function."""
276
+ self.llm_fn = llm_fn
277
+
278
+ def reflect(
279
+ self,
280
+ answer: str,
281
+ question: str,
282
+ evidence: str = "",
283
+ navigate_fn: Callable[[str], str] | None = None,
284
+ ) -> ReflectionResult:
285
+ """
286
+ Perform self-reflection on an answer.
287
+
288
+ Args:
289
+ answer: The initial answer to reflect on.
290
+ question: The original question.
291
+ evidence: Evidence that was used to generate the answer.
292
+ navigate_fn: Optional function to re-navigate with new context.
293
+
294
+ Returns:
295
+ ReflectionResult with final answer and iteration history.
296
+ """
297
+ if self.llm_fn is None:
298
+ logger.warning("no_llm_configured_for_reflection")
299
+ return ReflectionResult(
300
+ original_answer=answer,
301
+ final_answer=answer,
302
+ question=question,
303
+ )
304
+
305
+ start_time = time.time()
306
+
307
+ result = ReflectionResult(
308
+ original_answer=answer,
309
+ final_answer=answer,
310
+ question=question,
311
+ )
312
+
313
+ current_answer = answer
314
+
315
+ for iteration in range(self.max_iterations):
316
+ iter_start = time.time()
317
+
318
+ logger.info(
319
+ "reflection_iteration_started",
320
+ iteration=iteration + 1,
321
+ max=self.max_iterations,
322
+ )
323
+
324
+ # Step 1: Critique the current answer
325
+ critique = self._critique(current_answer, question, evidence)
326
+
327
+ iter_result = ReflectionIteration(
328
+ iteration=iteration + 1,
329
+ answer=current_answer,
330
+ critique=critique,
331
+ )
332
+
333
+ # Collect issues for statistics
334
+ for issue in critique.issues:
335
+ self._issue_stats[issue.type.value] = \
336
+ self._issue_stats.get(issue.type.value, 0) + 1
337
+ result.all_issues.append(issue)
338
+
339
+ # Check if we should stop
340
+ if not critique.has_issues or not critique.should_retry:
341
+ logger.info(
342
+ "reflection_no_issues",
343
+ iteration=iteration + 1,
344
+ confidence=critique.confidence,
345
+ )
346
+ iter_result.duration_ms = (time.time() - iter_start) * 1000
347
+ result.iterations.append(iter_result)
348
+ break
349
+
350
+ # Step 2: Generate improved answer
351
+ improved_answer = self._refine(
352
+ current_answer, question, critique, evidence
353
+ )
354
+
355
+ iter_result.improved_answer = improved_answer
356
+
357
+ # Step 3: Verify improvement (optional)
358
+ if self.enable_verification and improved_answer:
359
+ is_better = self._verify_improvement(
360
+ question, current_answer, improved_answer
361
+ )
362
+ iter_result.improvement_accepted = is_better
363
+
364
+ if is_better:
365
+ current_answer = improved_answer
366
+ result.improved = True
367
+ logger.info(
368
+ "improvement_accepted",
369
+ iteration=iteration + 1,
370
+ )
371
+ else:
372
+ logger.info(
373
+ "improvement_rejected",
374
+ iteration=iteration + 1,
375
+ )
376
+ elif improved_answer:
377
+ # Accept without verification
378
+ current_answer = improved_answer
379
+ iter_result.improvement_accepted = True
380
+ result.improved = True
381
+
382
+ iter_result.duration_ms = (time.time() - iter_start) * 1000
383
+ result.iterations.append(iter_result)
384
+
385
+ # Check confidence threshold
386
+ if critique.confidence >= self.min_confidence_threshold:
387
+ logger.info(
388
+ "confidence_threshold_reached",
389
+ confidence=critique.confidence,
390
+ threshold=self.min_confidence_threshold,
391
+ )
392
+ break
393
+
394
+ result.final_answer = current_answer
395
+ result.total_iterations = len(result.iterations)
396
+ result.total_duration_ms = (time.time() - start_time) * 1000
397
+
398
+ # Calculate final confidence
399
+ if result.iterations and result.iterations[-1].critique:
400
+ last_critique = result.iterations[-1].critique
401
+ result.final_confidence = last_critique.confidence if not last_critique.has_issues else 1.0 - (len(last_critique.issues) * 0.1)
402
+ else:
403
+ result.final_confidence = 0.7 # Default
404
+
405
+ logger.info(
406
+ "reflection_complete",
407
+ iterations=result.total_iterations,
408
+ improved=result.improved,
409
+ final_confidence=result.final_confidence,
410
+ duration_ms=result.total_duration_ms,
411
+ )
412
+
413
+ return result
414
+
415
+ def _critique(
416
+ self,
417
+ answer: str,
418
+ question: str,
419
+ evidence: str,
420
+ ) -> CritiqueResult:
421
+ """Generate a critique of the answer."""
422
+ prompt = CRITIQUE_PROMPT.format(
423
+ question=question,
424
+ answer=answer,
425
+ evidence=evidence[:2000] if evidence else "No specific evidence provided.",
426
+ )
427
+
428
+ try:
429
+ response = self.llm_fn(prompt)
430
+ return self._parse_critique(response)
431
+
432
+ except Exception as e:
433
+ logger.warning("critique_failed", error=str(e))
434
+ return CritiqueResult(
435
+ has_issues=False,
436
+ confidence=0.5,
437
+ raw_response=str(e),
438
+ )
439
+
440
+ def _parse_critique(self, response: str) -> CritiqueResult:
441
+ """Parse critique response into structured format."""
442
+ result = CritiqueResult(raw_response=response)
443
+
444
+ # Check for "NO ISSUES FOUND"
445
+ if "NO ISSUES FOUND" in response.upper():
446
+ result.has_issues = False
447
+ result.confidence = 0.9
448
+ return result
449
+
450
+ # Parse JSON
451
+ try:
452
+ json_match = re.search(r'\{[\s\S]*\}', response)
453
+ if not json_match:
454
+ return result
455
+
456
+ data = json.loads(json_match.group())
457
+
458
+ result.has_issues = data.get("has_issues", False)
459
+ result.confidence = data.get("confidence_in_critique", 0.5)
460
+ result.should_retry = data.get("should_retry", False)
461
+ result.suggested_improvements = data.get("suggested_improvements", [])
462
+
463
+ for issue_data in data.get("issues", []):
464
+ try:
465
+ issue = Issue(
466
+ type=IssueType(issue_data.get("type", "accuracy")),
467
+ description=issue_data.get("description", ""),
468
+ severity=IssueSeverity(issue_data.get("severity", "medium")),
469
+ )
470
+ result.issues.append(issue)
471
+ except ValueError:
472
+ pass
473
+
474
+ except json.JSONDecodeError:
475
+ # If JSON parsing fails, look for issue indicators
476
+ if any(word in response.lower() for word in ["issue", "problem", "incorrect", "missing"]):
477
+ result.has_issues = True
478
+ result.should_retry = True
479
+
480
+ return result
481
+
482
+ def _refine(
483
+ self,
484
+ answer: str,
485
+ question: str,
486
+ critique: CritiqueResult,
487
+ evidence: str,
488
+ ) -> str:
489
+ """Generate an improved answer based on critique."""
490
+ # Format critique for prompt
491
+ critique_text = []
492
+ for issue in critique.issues:
493
+ critique_text.append(f"- [{issue.severity.value.upper()}] {issue.type.value}: {issue.description}")
494
+
495
+ if critique.suggested_improvements:
496
+ critique_text.append("\nSuggested improvements:")
497
+ for suggestion in critique.suggested_improvements:
498
+ critique_text.append(f"- {suggestion}")
499
+
500
+ prompt = REFINEMENT_PROMPT.format(
501
+ question=question,
502
+ previous_answer=answer,
503
+ critique="\n".join(critique_text) if critique_text else "No specific issues identified.",
504
+ evidence=evidence[:2000] if evidence else "Use your knowledge to improve the answer.",
505
+ )
506
+
507
+ try:
508
+ response = self.llm_fn(prompt)
509
+ return response.strip()
510
+
511
+ except Exception as e:
512
+ logger.warning("refinement_failed", error=str(e))
513
+ return ""
514
+
515
+ def _verify_improvement(
516
+ self,
517
+ question: str,
518
+ original: str,
519
+ improved: str,
520
+ ) -> bool:
521
+ """Verify that the improved answer is actually better."""
522
+ if not improved:
523
+ return False
524
+
525
+ prompt = VERIFICATION_PROMPT.format(
526
+ question=question,
527
+ answer_a=original,
528
+ answer_b=improved,
529
+ )
530
+
531
+ try:
532
+ response = self.llm_fn(prompt)
533
+
534
+ # Parse response
535
+ json_match = re.search(r'\{[\s\S]*\}', response)
536
+ if not json_match:
537
+ # Default to accepting improvement
538
+ return True
539
+
540
+ data = json.loads(json_match.group())
541
+ better = data.get("better_answer", "B")
542
+ confidence = data.get("confidence", 0.5)
543
+
544
+ # Accept B (improved) if confident enough
545
+ return better == "B" and confidence >= self.accept_improvement_threshold
546
+
547
+ except Exception as e:
548
+ logger.warning("verification_failed", error=str(e))
549
+ # Default to accepting improvement
550
+ return True
551
+
552
+ def get_issue_stats(self) -> dict[str, int]:
553
+ """Get statistics on issues found across all reflections."""
554
+ return dict(self._issue_stats)
555
+
556
+
557
+ # =============================================================================
558
+ # Convenience Functions
559
+ # =============================================================================
560
+
561
+
562
+ def reflect_on_answer(
563
+ answer: str,
564
+ question: str,
565
+ evidence: str = "",
566
+ llm_fn: Callable[[str], str] | None = None,
567
+ max_iterations: int = 2,
568
+ ) -> ReflectionResult:
569
+ """
570
+ Perform self-reflection on an answer.
571
+
572
+ Simple interface for one-off reflection.
573
+
574
+ Args:
575
+ answer: The answer to reflect on.
576
+ question: The original question.
577
+ evidence: Evidence used.
578
+ llm_fn: LLM function (uses default if not provided).
579
+ max_iterations: Maximum iterations.
580
+
581
+ Returns:
582
+ ReflectionResult with final answer.
583
+ """
584
+ if llm_fn is None:
585
+ try:
586
+ from rnsr.llm import get_llm
587
+ llm = get_llm()
588
+ llm_fn = lambda p: str(llm.complete(p))
589
+ except Exception as e:
590
+ logger.warning("no_llm_available", error=str(e))
591
+ return ReflectionResult(
592
+ original_answer=answer,
593
+ final_answer=answer,
594
+ question=question,
595
+ )
596
+
597
+ engine = SelfReflectionEngine(
598
+ llm_fn=llm_fn,
599
+ max_iterations=max_iterations,
600
+ )
601
+
602
+ return engine.reflect(answer, question, evidence)