PyPI - rnsr - Versions diffs - 0.1.0__py3-none-any.whl - Mend

rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

rnsr/__init__.py +118 -0
rnsr/__main__.py +242 -0
rnsr/agent/__init__.py +218 -0
rnsr/agent/cross_doc_navigator.py +767 -0
rnsr/agent/graph.py +1557 -0
rnsr/agent/llm_cache.py +575 -0
rnsr/agent/navigator_api.py +497 -0
rnsr/agent/provenance.py +772 -0
rnsr/agent/query_clarifier.py +617 -0
rnsr/agent/reasoning_memory.py +736 -0
rnsr/agent/repl_env.py +709 -0
rnsr/agent/rlm_navigator.py +2108 -0
rnsr/agent/self_reflection.py +602 -0
rnsr/agent/variable_store.py +308 -0
rnsr/benchmarks/__init__.py +118 -0
rnsr/benchmarks/comprehensive_benchmark.py +733 -0
rnsr/benchmarks/evaluation_suite.py +1210 -0
rnsr/benchmarks/finance_bench.py +147 -0
rnsr/benchmarks/pdf_merger.py +178 -0
rnsr/benchmarks/performance.py +321 -0
rnsr/benchmarks/quality.py +321 -0
rnsr/benchmarks/runner.py +298 -0
rnsr/benchmarks/standard_benchmarks.py +995 -0
rnsr/client.py +560 -0
rnsr/document_store.py +394 -0
rnsr/exceptions.py +74 -0
rnsr/extraction/__init__.py +172 -0
rnsr/extraction/candidate_extractor.py +357 -0
rnsr/extraction/entity_extractor.py +581 -0
rnsr/extraction/entity_linker.py +825 -0
rnsr/extraction/grounded_extractor.py +722 -0
rnsr/extraction/learned_types.py +599 -0
rnsr/extraction/models.py +232 -0
rnsr/extraction/relationship_extractor.py +600 -0
rnsr/extraction/relationship_patterns.py +511 -0
rnsr/extraction/relationship_validator.py +392 -0
rnsr/extraction/rlm_extractor.py +589 -0
rnsr/extraction/rlm_unified_extractor.py +990 -0
rnsr/extraction/tot_validator.py +610 -0
rnsr/extraction/unified_extractor.py +342 -0
rnsr/indexing/__init__.py +60 -0
rnsr/indexing/knowledge_graph.py +1128 -0
rnsr/indexing/kv_store.py +313 -0
rnsr/indexing/persistence.py +323 -0
rnsr/indexing/semantic_retriever.py +237 -0
rnsr/indexing/semantic_search.py +320 -0
rnsr/indexing/skeleton_index.py +395 -0
rnsr/ingestion/__init__.py +161 -0
rnsr/ingestion/chart_parser.py +569 -0
rnsr/ingestion/document_boundary.py +662 -0
rnsr/ingestion/font_histogram.py +334 -0
rnsr/ingestion/header_classifier.py +595 -0
rnsr/ingestion/hierarchical_cluster.py +515 -0
rnsr/ingestion/layout_detector.py +356 -0
rnsr/ingestion/layout_model.py +379 -0
rnsr/ingestion/ocr_fallback.py +177 -0
rnsr/ingestion/pipeline.py +936 -0
rnsr/ingestion/semantic_fallback.py +417 -0
rnsr/ingestion/table_parser.py +799 -0
rnsr/ingestion/text_builder.py +460 -0
rnsr/ingestion/tree_builder.py +402 -0
rnsr/ingestion/vision_retrieval.py +965 -0
rnsr/ingestion/xy_cut.py +555 -0
rnsr/llm.py +733 -0
rnsr/models.py +167 -0
rnsr/py.typed +2 -0
rnsr-0.1.0.dist-info/METADATA +592 -0
rnsr-0.1.0.dist-info/RECORD +72 -0
rnsr-0.1.0.dist-info/WHEEL +5 -0
rnsr-0.1.0.dist-info/entry_points.txt +2 -0
rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
rnsr-0.1.0.dist-info/top_level.txt +1 -0

rnsr-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,592 @@
+Metadata-Version: 2.4
+Name: rnsr
+Version: 0.1.0
+Summary: Recursive Neural-Symbolic Retriever - Hierarchical document retrieval with font-based structure analysis
+Author: RNSR Contributors
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/theeufj/RNSR
+Project-URL: Documentation, https://github.com/theeufj/RNSR#readme
+Project-URL: Repository, https://github.com/theeufj/RNSR.git
+Project-URL: Issues, https://github.com/theeufj/RNSR/issues
+Keywords: rag,retrieval,document-processing,llm,hierarchical-indexing,pdf-parsing,neural-symbolic
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Text Processing :: Indexing
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pymupdf>=1.23.0
+Requires-Dist: pdfplumber>=0.10.0
+Requires-Dist: numpy>=1.24.0
+Requires-Dist: scipy>=1.10.0
+Requires-Dist: structlog>=23.0.0
+Requires-Dist: python-dotenv>=1.0.0
+Requires-Dist: llama-index>=0.10.0
+Requires-Dist: langgraph>=0.2.0
+Requires-Dist: langchain-core>=0.2.0
+Requires-Dist: transformers>=4.35.0
+Requires-Dist: torch>=2.0.0
+Requires-Dist: torchvision>=0.15.0
+Requires-Dist: pillow>=10.0.0
+Requires-Dist: pdf2image>=1.16.0
+Requires-Dist: scikit-learn>=1.3.0
+Requires-Dist: pymupdf>=1.23.0
+Provides-Extra: openai
+Requires-Dist: openai>=1.0.0; extra == "openai"
+Requires-Dist: llama-index-llms-openai>=0.1.0; extra == "openai"
+Requires-Dist: llama-index-embeddings-openai>=0.1.0; extra == "openai"
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
+Requires-Dist: llama-index-llms-anthropic>=0.1.0; extra == "anthropic"
+Provides-Extra: gemini
+Requires-Dist: google-genai>=1.0.0; extra == "gemini"
+Provides-Extra: all
+Requires-Dist: rnsr[anthropic,gemini,openai]; extra == "all"
+Provides-Extra: benchmarks
+Requires-Dist: datasets>=2.0.0; extra == "benchmarks"
+Requires-Dist: ragas>=0.1.0; extra == "benchmarks"
+Provides-Extra: demo
+Requires-Dist: gradio>=4.0.0; extra == "demo"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
+Requires-Dist: mypy>=1.0.0; extra == "dev"
+Requires-Dist: ruff>=0.1.0; extra == "dev"
+Dynamic: license-file
+# RNSR - Recursive Neural-Symbolic Retriever
+A state-of-the-art document retrieval system that preserves hierarchical structure for superior RAG performance. Combines PageIndex, Recursive Language Models (RLM), Knowledge Graphs, and Tree of Thoughts navigation.
+## Overview
+RNSR combines neural and symbolic approaches to achieve accurate document understanding:
+- **Font Histogram Algorithm** - Automatically detects document hierarchy from font sizes (no training required)
+- **Skeleton Index Pattern** - Lightweight summaries with KV store for efficient retrieval
+- **Tree-of-Thoughts Navigation** - LLM reasons about document structure to find answers
+- **RLM Unified Extraction** - LLM writes extraction code, grounded in actual text
+- **Knowledge Graph** - Entity and relationship storage for cross-document linking
+- **Self-Reflection Loop** - Iterative answer improvement through self-critique
+- **Adaptive Learning** - System learns from your document workload over time
+## Key Features
+| Feature | Description |
+|---------|-------------|
+| **Hierarchical Extraction** | Preserves document structure (sections, subsections, paragraphs) |
+| **RLM Unified Extractor** | LLM writes extraction code + ToT validation (grounded, no hallucination) |
+| **Provenance System** | Every answer traces back to exact document citations |
+| **LLM Response Cache** | Semantic-aware caching for 10x cost/speed improvement |
+| **Self-Reflection** | Iterative self-correction improves answer quality |
+| **Reasoning Memory** | Learns successful query patterns for faster future queries |
+| **Query Clarification** | Detects ambiguous queries and asks clarifying questions |
+| **Table/Chart Parsing** | SQL-like queries over tables, chart trend analysis |
+| **Adaptive Learning** | 6 registries that learn from usage and persist to disk |
+| **Multi-Document Detection** | Automatically splits bundled PDFs |
+| **Vision Mode** | OCR-free analysis for scanned documents and charts |
+## Installation
+```bash
+# Clone the repository
+git clone https://github.com/theeufj/RNSR.git
+cd RNSR
+# Create virtual environment
+python -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+# Install with all LLM providers
+pip install -e ".[all]"
+# Or install with specific provider
+pip install -e ".[openai]"      # OpenAI only
+pip install -e ".[anthropic]"   # Anthropic only
+pip install -e ".[gemini]"      # Google Gemini only
+```
+## Quick Start
+### 1. Set up API keys
+Create a `.env` file:
+```bash
+cp .env.example .env
+# Edit .env with your API keys
+```
+```env
+# Choose your preferred LLM provider
+OPENAI_API_KEY=sk-...
+# or
+ANTHROPIC_API_KEY=sk-ant-...
+# or
+GOOGLE_API_KEY=AI...
+# Optional: Override default models
+LLM_PROVIDER=anthropic
+SUMMARY_MODEL=claude-sonnet-4-5
+```
+### 2. Use the Python API
+```python
+from rnsr import RNSRClient
+# Simple one-line Q&A
+client = RNSRClient()
+answer = client.ask("contract.pdf", "What are the payment terms?")
+print(answer)
+# Advanced navigation with verification and self-reflection
+result = client.ask_advanced(
+    "complex_report.pdf",
+    "Compare liability clauses in sections 5 and 8",
+    enable_verification=True,
+    enable_self_reflection=True,
+    max_recursion_depth=3,
+)
+```
+### 3. Run the Demo UI
+```bash
+python demo.py
+# Open http://localhost:7860 in your browser
+```
+## New Features
+### Provenance System
+Every answer includes traceable citations:
+```python
+from rnsr.agent import ProvenanceTracker, format_citations_for_display
+tracker = ProvenanceTracker(kv_store=kv_store, skeleton=skeleton)
+record = tracker.create_provenance_record(
+    answer="The payment terms are net 30.",
+    question="What are the payment terms?",
+    variables=navigation_variables,
+)
+print(f"Confidence: {record.aggregate_confidence:.0%}")
+print(format_citations_for_display(record.citations))
+# Output:
+# **Sources:**
+# 1. [contract.pdf] Section: Payment Terms, Page 5: "Payment shall be due within 30 days..."
+```
+### LLM Response Caching
+Automatic caching reduces costs and latency:
+```python
+from rnsr.agent import wrap_llm_with_cache, get_global_cache
+# Wrap any LLM function with caching
+cached_llm = wrap_llm_with_cache(llm.complete, ttl_seconds=3600)
+# Use cached LLM - repeated prompts hit cache
+response = cached_llm("What is 2+2?")  # Calls LLM
+response = cached_llm("What is 2+2?")  # Returns cached (instant)
+# Check cache stats
+print(get_global_cache().get_stats())
+# {'entries': 150, 'hits': 89, 'hit_rate': 0.59}
+```
+### Self-Reflection Loop
+Answers are automatically critiqued and improved:
+```python
+from rnsr.agent import SelfReflectionEngine, reflect_on_answer
+# Quick one-liner
+result = reflect_on_answer(
+    answer="The contract expires in 2024.",
+    question="When does the contract expire?",
+    evidence="Contract dated 2023, 2-year term...",
+)
+print(f"Improved: {result.improved}")
+print(f"Final answer: {result.final_answer}")
+print(f"Iterations: {result.total_iterations}")
+```
+### Reasoning Chain Memory
+The system learns from successful queries:
+```python
+from rnsr.agent import get_reasoning_memory, find_similar_chains
+# Find similar past queries
+matches = find_similar_chains("What is the liability cap?")
+for match in matches:
+    print(f"Similar query: {match.chain.query}")
+    print(f"Similarity: {match.similarity:.0%}")
+    print(f"Past answer: {match.chain.answer}")
+```
+### Table Parsing
+Extract and query tables from documents:
+```python
+from rnsr.ingestion import TableParser, TableQueryEngine
+parser = TableParser()
+tables = parser.parse_from_text(document_text)
+# SQL-like queries
+engine = TableQueryEngine(tables[0])
+results = engine.select(
+    columns=["Name", "Amount"],
+    where={"Status": "Active"},
+    order_by="Amount",
+)
+# Aggregations
+total = engine.aggregate("Amount", "sum")
+```
+### Query Clarification
+Handle ambiguous queries gracefully:
+```python
+from rnsr.agent import QueryClarifier, needs_clarification
+# Check if query needs clarification
+is_ambiguous, analysis = needs_clarification(
+    "What does it say about the clause?"
+)
+if is_ambiguous:
+    print(f"Ambiguity: {analysis.ambiguity_type}")
+    print(f"Clarifying question: {analysis.suggested_clarification}")
+    # "What does 'it' refer to in your question?"
+```
+## Adaptive Learning
+RNSR learns from your document workload. All learned data persists in `~/.rnsr/`:
+```
+~/.rnsr/
+├── learned_entity_types.json       # New entity types discovered
+├── learned_relationship_types.json # New relationship types
+├── learned_normalization.json      # Title/suffix patterns
+├── learned_stop_words.json         # Domain-specific stop words
+├── learned_header_thresholds.json  # Document-type font thresholds
+├── learned_query_patterns.json     # Successful query patterns
+├── reasoning_chains.json           # Successful reasoning chains
+└── llm_cache.db                    # LLM response cache
+```
+The more you use RNSR, the better it gets at understanding your domain.
+## How It Works
+### Document Ingestion Pipeline
+```
+PDF → Font Analysis → Header Classification → Tree Building → Skeleton Index
+         ↓                    ↓                    ↓              ↓
+   Detect font sizes   Classify H1/H2/H3    Build hierarchy   Create summaries
+                                                  ↓
+                                        Multi-doc detection
+                                        (page number resets)
+```
+### Query Processing
+```
+Question → Clarify → Pre-Filter → Tree Navigation → Answer → Self-Reflect → Verify
+              ↓           ↓              ↓             ↓           ↓           ↓
+        Ask if ambig  Keyword scan  ToT reasoning  Synthesize  Critique   Fact-check
+                                         ↓                        ↓
+                                  Sub-LLM recursion        Improve answer
+                                  (complex queries)        (if issues)
+```
+### Entity Extraction (RLM Unified)
+```
+Document → LLM writes code → Execute on DOC_VAR → ToT validation → Cross-validate
+              ↓                     ↓                   ↓               ↓
+     Generates regex/Python   Grounded results   Probability scores  Entity↔Relationship
+                                    ↓
+                            All tied to exact text spans
+```
+## Architecture
+```
+rnsr/
+├── agent/                   # Query processing
+│   ├── rlm_navigator.py     # Main navigation agent
+│   ├── provenance.py        # Citation tracking (NEW)
+│   ├── llm_cache.py         # Response caching (NEW)
+│   ├── self_reflection.py   # Answer improvement (NEW)
+│   ├── reasoning_memory.py  # Chain memory (NEW)
+│   ├── query_clarifier.py   # Ambiguity handling (NEW)
+│   ├── graph.py             # LangGraph workflow
+│   └── variable_store.py    # Context management
+├── extraction/              # Entity/relationship extraction
+│   ├── rlm_unified_extractor.py  # Best extractor (NEW)
+│   ├── learned_types.py     # Adaptive type learning
+│   ├── entity_linker.py     # Cross-document linking
+│   └── models.py            # Entity/Relationship models
+├── indexing/                # Index construction
+│   ├── skeleton_index.py    # Summary generation
+│   ├── knowledge_graph.py   # Entity/relationship storage
+│   ├── kv_store.py          # SQLite/in-memory storage
+│   └── semantic_search.py   # Optional vector search
+├── ingestion/               # Document processing
+│   ├── pipeline.py          # Main ingestion orchestrator
+│   ├── font_histogram.py    # Font-based structure detection
+│   ├── header_classifier.py # H1/H2/H3 classification
+│   ├── table_parser.py      # Table extraction (NEW)
+│   ├── chart_parser.py      # Chart interpretation (NEW)
+│   └── tree_builder.py      # Hierarchical tree construction
+├── llm.py                   # Multi-provider LLM abstraction
+├── client.py                # High-level API
+└── models.py                # Data structures
+```
+## API Reference
+### High-Level API
+```python
+from rnsr import RNSRClient
+client = RNSRClient(
+    llm_provider="anthropic",  # or "openai", "gemini"
+    llm_model="claude-sonnet-4-5"
+)
+# Simple query
+answer = client.ask("document.pdf", "What is the main topic?")
+# Vision mode (for scanned docs)
+answer = client.ask_vision("scanned.pdf", "What does the chart show?")
+```
+### Low-Level API
+```python
+from rnsr import (
+    ingest_document,
+    build_skeleton_index,
+    run_rlm_navigator,
+    SQLiteKVStore
+)
+from rnsr.extraction import RLMUnifiedExtractor
+from rnsr.agent import ProvenanceTracker, SelfReflectionEngine
+# Step 1: Ingest document
+result = ingest_document("document.pdf")
+print(f"Extracted {result.tree.total_nodes} nodes")
+# Step 2: Build index
+kv_store = SQLiteKVStore("./data/index.db")
+skeleton = build_skeleton_index(result.tree, kv_store)
+# Step 3: Extract entities (grounded, no hallucination)
+extractor = RLMUnifiedExtractor()
+extraction = extractor.extract(
+    node_id="section_1",
+    doc_id="document",
+    header="Introduction",
+    content="..."
+)
+# Step 4: Query with provenance
+answer = run_rlm_navigator(
+    question="What are the key findings?",
+    skeleton=skeleton,
+    kv_store=kv_store
+)
+# Step 5: Get citations
+tracker = ProvenanceTracker(kv_store=kv_store)
+record = tracker.create_provenance_record(answer, question, variables)
+```
+## Configuration
+### Environment Variables
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `LLM_PROVIDER` | Primary LLM provider | `auto` (detect from keys) |
+| `SUMMARY_MODEL` | Model for summarization | Provider default |
+| `AGENT_MODEL` | Model for navigation | Provider default |
+| `EMBEDDING_MODEL` | Embedding model | `text-embedding-3-small` |
+| `KV_STORE_PATH` | SQLite database path | `./data/kv_store.db` |
+| `LOG_LEVEL` | Logging verbosity | `INFO` |
+| `RNSR_LLM_CACHE_PATH` | Custom cache location | `~/.rnsr/llm_cache.db` |
+| `RNSR_REASONING_MEMORY_PATH` | Custom memory location | `~/.rnsr/reasoning_chains.json` |
+### Supported Models
+| Provider | Models |
+|----------|--------|
+| **OpenAI** | `gpt-5.2`, `gpt-5-mini`, `gpt-5-nano`, `gpt-4.1`, `gpt-4o-mini` |
+| **Anthropic** | `claude-opus-4-5`, `claude-sonnet-4-5`, `claude-haiku-4-5` |
+| **Gemini** | `gemini-3-pro-preview`, `gemini-3-flash-preview`, `gemini-2.5-pro`, `gemini-2.5-flash` |
+## Benchmarks
+RNSR is designed for complex document understanding tasks:
+- **Multi-document PDFs** - Automatically detects and separates bundled documents
+- **Hierarchical queries** - "Compare section 3.2 with section 5.1"
+- **Cross-reference questions** - "What does the appendix say about the claim in section 2?"
+- **Entity extraction** - Grounded extraction with ToT validation (no hallucination)
+- **Table queries** - "What is the total for Q4 2024?"
+## Sample Documents
+RNSR includes sample documents for testing and demonstration:
+### Synthetic Documents (`samples/`)
+| File | Type | Features Demonstrated |
+|------|------|----------------------|
+| `sample_contract.md` | Legal Contract | Entities (people, orgs), relationships, payment tables, legal terms |
+| `sample_financial_report.md` | Financial Report | Financial tables, metrics, executive names, quarterly data |
+| `sample_research_paper.md` | Academic Paper | Citations, hierarchical sections, technical content, tables |
+### Real Test Documents (`rnsr/test-documents/`)
+Legal documents from the Djokovic visa case (public court records) for testing with actual PDFs:
+- Affidavits and court applications
+- Legal submissions and orders
+- Interview transcripts
+### Using Sample Documents
+```python
+from pathlib import Path
+from rnsr.ingestion import TableParser
+from rnsr.extraction import CandidateExtractor
+# Parse a sample document
+sample = Path("samples/sample_contract.md").read_text()
+# Extract tables
+parser = TableParser()
+tables = parser.parse_from_text(sample)
+print(f"Found {len(tables)} tables")
+# Extract entities
+extractor = CandidateExtractor()
+candidates = extractor.extract_candidates(sample)
+print(f"Found {len(candidates)} entity candidates")
+```
+## Testing
+### Test Suite Overview
+RNSR has comprehensive test coverage with **281+ tests**:
+```bash
+# Run all tests
+pytest tests/ -v
+# Run specific feature tests
+pytest tests/test_provenance.py tests/test_llm_cache.py -v
+# Run end-to-end workflow tests
+pytest tests/test_e2e_workflow.py -v
+# Run with coverage
+pytest tests/ --cov=rnsr --cov-report=html
+```
+### Test Categories
+| Test File | Tests | Coverage |
+|-----------|-------|----------|
+| `test_e2e_workflow.py` | 18 | Full pipeline: ingestion → extraction → KG → query → provenance |
+| `test_provenance.py` | 17 | Citations, contradictions, provenance records |
+| `test_llm_cache.py` | 17 | Cache get/set, TTL, persistence |
+| `test_self_reflection.py` | 13 | Critique, refinement, iteration limits |
+| `test_reasoning_memory.py` | 15 | Chain storage, similarity matching |
+| `test_query_clarifier.py` | 19 | Ambiguity detection, clarification |
+| `test_table_parser.py` | 26 | Markdown/ASCII tables, SQL-like queries |
+| `test_chart_parser.py` | 16 | Chart detection, trend analysis |
+| `test_rlm_unified.py` | 13 | REPL execution, code cleaning |
+| `test_learned_types.py` | 13 | Adaptive learning registries |
+### End-to-End Workflow Tests
+The `test_e2e_workflow.py` demonstrates the complete pipeline:
+```python
+# Tests cover:
+# 1. Document Ingestion - Parse structure and tables
+# 2. Entity Extraction - Pattern-based grounded extraction
+# 3. Knowledge Graph - Store entities and relationships
+# 4. Query Processing - Ambiguity detection, table queries
+# 5. Provenance - Citations and evidence tracking
+# 6. Self-Reflection - Answer improvement loop
+# 7. Reasoning Memory - Learn from successful queries
+# 8. LLM Cache - Response caching
+# 9. Adaptive Learning - Type discovery
+# 10. Full Workflow - Contract and financial analysis
+```
+## Development
+```bash
+# Install dev dependencies
+pip install -e ".[dev]"
+# Run linting
+ruff check .
+# Type checking
+mypy rnsr/
+```
+## Requirements
+- Python 3.10+
+- At least one LLM API key (OpenAI, Anthropic, or Gemini)
+## License
+MIT License - see [LICENSE](LICENSE) for details.
+## Contributing
+See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+## Research
+RNSR is inspired by:
+- [Hybrid Document Retrieval System Design](Research/Hybrid%20Document%20Retrieval%20System%20Design.pdf) - Core architecture and design principles
+- [PageIndex (VectifyAI)](https://github.com/VectifyAI/PageIndex) - Vectorless reasoning-based tree search
+- [Recursive Language Models](https://arxiv.org/html/2512.24601v1) - REPL environment with recursive sub-LLM calls
+- Tree of Thoughts - LLM-based decision making with probabilities
+- Self-Refine / Reflexion - Iterative self-correction patterns

rnsr-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,72 @@
+rnsr/__init__.py,sha256=KOphXkwumziihpMKN0R1hipflpKoKD2BJ2vZHhVgBAw,3588
+rnsr/__main__.py,sha256=bupD9Fx6wJuDvmUMWQ4JWkOsk0nrhevmXuhqvolsaz4,6901
+rnsr/client.py,sha256=23JKz_5xUnp-QDq8FysqMMQxTtDoJ28Zo8QQU5tby6g,19041
+rnsr/document_store.py,sha256=NsYhkMAp8uB1DjkoZ_hKZiGQ9ut9ws2GCwA_3njtQh0,11168
+rnsr/exceptions.py,sha256=ukfZG6lv6L1G1o0_2nu2hMdcHDoaHEtx7rA-qo1coi0,1209
+rnsr/llm.py,sha256=Tyd5jF1NLKWOKoLPkMaCVlbtkTdQVU1Bq1lHIMCE2EY,26394
+rnsr/models.py,sha256=XTxyb5o8LFpyummhLfsPhuLzHnHcFj6_3TXcaeVuvZA,4288
+rnsr/py.typed,sha256=ixa8YukDZ3kLo0WsFJRGohLMyHzbMur1ALmmASML2cs,64
+rnsr/agent/__init__.py,sha256=UjSESsMqu84EgEWV2zMzRM0CmVAtZmGafFhVf8gA5sI,5622
+rnsr/agent/cross_doc_navigator.py,sha256=QD8m0EFy3P7Klp68hq57VK7QviIV5TFan_kZvu0ljnM,24118
+rnsr/agent/graph.py,sha256=khxfdwHU9s2Fi4tWkShA_6_pJ4UxklgFtgP8DHUonPI,52516
+rnsr/agent/llm_cache.py,sha256=mz4_IblfaTwhQQz58snn5Imc9HkfzW3DpMdmrRghsKY,17467
+rnsr/agent/navigator_api.py,sha256=cvYpqPjDUeSsP-UflPN_E6a6X44qpkAwM-N5jXDg-k0,15747
+rnsr/agent/provenance.py,sha256=pFYAVec-kbluhm2ibVAeQZuADQkexdHDWnLP7-Ycauk,25084
+rnsr/agent/query_clarifier.py,sha256=t3BvmtxQp37U5Xx4qsxxCE2X-Bzegxm7BgQsujn5YWg,21071
+rnsr/agent/reasoning_memory.py,sha256=d_JWt1rbWgXAPfcNh3_gi_75_JYFhCxIiurvqBij7pY,23984
+rnsr/agent/repl_env.py,sha256=2lurSa0C9VGGwm0SJxMzZjA1S-sjKmGlPhcDvuWrM8s,25346
+rnsr/agent/rlm_navigator.py,sha256=TSa3x85MywwERuaLfNfNN9GESQlnAX7b2ycpLwFuXPM,72070
+rnsr/agent/self_reflection.py,sha256=XnQ6D9dVCNlr1zWGSxwWRgAkDWOYvSbtjeYbUcCq4jQ,19231
+rnsr/agent/variable_store.py,sha256=NfKx2JzzRtP7WjPG0Zzr_3IFjac9BZl6jBb4KNNJ0-k,9187
+rnsr/benchmarks/__init__.py,sha256=A2WNIIzsAdRFg7WYtcG3f6kemQFiE3m4_zpq4ldS7cI,3085
+rnsr/benchmarks/comprehensive_benchmark.py,sha256=HqZNQ9DZiZ0LkgeEVCv3uhiATbSAYm73PookujXqe_c,24085
+rnsr/benchmarks/evaluation_suite.py,sha256=gXmkXyAXQMtxLXGGveBdZYfuY10-9ma6u0yboIhNpU4,44904
+rnsr/benchmarks/finance_bench.py,sha256=bSKsBwMtuabcWyKQEYJFYhnnJ2jRw0khzE9NBsTiJRY,5330
+rnsr/benchmarks/pdf_merger.py,sha256=UNQHefghWTh7eSkWE2qD_1Rhi97gHcfWs0jXY8F9MDM,6323
+rnsr/benchmarks/performance.py,sha256=bZnR-4xZtII8legY1c9b9ingtML9aSiHkHvQbc4oRbE,9728
+rnsr/benchmarks/quality.py,sha256=Mog6nThF-5N9A0kFkFnXlfl1eOcLoKMUbHI_EK-adko,10054
+rnsr/benchmarks/runner.py,sha256=hzbJZBAk1MgGDAPBC0bE3Gd_KKCobhSpdWiZOhh0cfg,9740
+rnsr/benchmarks/standard_benchmarks.py,sha256=1xSLZH6jPYNwhZpdVTHp4vckoAm-P9UmUru9gpb47P0,34793
+rnsr/extraction/__init__.py,sha256=4EBU5M2cIf3gqR26n7EuALO96VcZ8tfJUJhxGbmUr9A,4528
+rnsr/extraction/candidate_extractor.py,sha256=9pmuxG3PhoRvDrRvyr0gF9ZntUMkS7iTDapc28Bhy7I,13457
+rnsr/extraction/entity_extractor.py,sha256=DRH-tdr0jc7ujPGa4VuvG5xVDfkSavf4OhU7LVQhmnc,19496
+rnsr/extraction/entity_linker.py,sha256=peqzkqvKkrWo8ZJ3MQF1qBVK9BoXY9tNmtPRIIZE9YA,28279
+rnsr/extraction/grounded_extractor.py,sha256=aBiB7ckkFWnQiUXk9IrRYzgDN2PXujzdR6dqAGsYrJo,24512
+rnsr/extraction/learned_types.py,sha256=XdAmwQkaQ2hMlakYYvM1sw_9TK1RLAbKoePhbKuDJBc,19117
+rnsr/extraction/models.py,sha256=GwoGkczyp6hwnYrzUGR9mevATlEkxPRzOMEeLrwYo98,8921
+rnsr/extraction/relationship_extractor.py,sha256=9a5U3iZECC-b5mwazFP2N3_fiwEz99heONq0PeIA-Ek,20080
+rnsr/extraction/relationship_patterns.py,sha256=ByFAOHXqINqVsD-LlisYPS92lZNUKG60rWkAP6WFPyQ,18910
+rnsr/extraction/relationship_validator.py,sha256=56aPDpKIx_EXRbPvdjBUc7fqBZU4mV8RehPNuh19pbg,13483
+rnsr/extraction/rlm_extractor.py,sha256=UsSyMPPqcyPJ02jzkttjn4CzEK7u8TDYF0ciAFpnRLo,20366
+rnsr/extraction/rlm_unified_extractor.py,sha256=JPHyOW04AamYs6RT4h1IEbFg3OBHSW8h7bAa2gjeXp8,36762
+rnsr/extraction/tot_validator.py,sha256=mmM6TtEoyTV-8pCUucBy8TcJ3hPOpABa9yeFgLRDGfI,21586
+rnsr/extraction/unified_extractor.py,sha256=AuRhPJ0Zna1_7vTpZ2fnl0-s_7xjQfGHQ6oNKGJzDH8,12540
+rnsr/indexing/__init__.py,sha256=on-bohmE6qoLIxMQNBHQCXSRKr98aAC0RzVOkrVlIVA,1417
+rnsr/indexing/knowledge_graph.py,sha256=xuceER-kevyZJHGNlCcIHxnoFifZWxmTvjnqPgxFQa8,38592
+rnsr/indexing/kv_store.py,sha256=iXNwGGIKev-AhMDx-dinYqqhUE3UT20TGM2uTOSHQRM,9438
+rnsr/indexing/persistence.py,sha256=GhE8VWCsB0SKAiN_SQiw5fLsYAXm849SqRdhUb9BMDE,9427
+rnsr/indexing/semantic_retriever.py,sha256=ypNl0_gi6ySPBDhgYwML0_OkS5bgbOg5H1yV0xy8tQU,7365
+rnsr/indexing/semantic_search.py,sha256=Qv_25cr3D-uX7Duk2clt_SJR6wATU4UdQogzeTeVYfw,10395
+rnsr/indexing/skeleton_index.py,sha256=jNC6U0JdT-7zoQ-Z85mpB0xk_mgmTlEUfDUVAb6cFvk,11717
+rnsr/ingestion/__init__.py,sha256=eWEWlWp7qmD_RkAjjNwUnWUHfSOnyNnPQdprochhgZM,4583
+rnsr/ingestion/chart_parser.py,sha256=O5y7FfgeCoHL8W3DY0AQSPPlOAMV4JhvkbpxJnPlmlw,18672
+rnsr/ingestion/document_boundary.py,sha256=aGsFxa7haXSCFuefxbTWiWVtXmHKmsTjsZI1y0GhAgc,25447
+rnsr/ingestion/font_histogram.py,sha256=B8PqlJjjEASyYkd97JPPFPtNN6zO7uVBi4p6XqVFaTc,11370
+rnsr/ingestion/header_classifier.py,sha256=9AT7BHNB4yXHyPpxROb0oydmcHUYJyMtshB6UTqnT_o,19535
+rnsr/ingestion/hierarchical_cluster.py,sha256=-A77Kfvzwl6y6XfKjt-kF_Xk7tOy5aPc-K0QBIP2bOQ,17343
+rnsr/ingestion/layout_detector.py,sha256=kHarT41pbETINewhI_MU-5bmB5OmU2LiJFeHHOzbDZo,10693
+rnsr/ingestion/layout_model.py,sha256=41XtQsjxIny0Z6WfeuclXNACh6ayVYjlXILmQzjHljw,11629
+rnsr/ingestion/ocr_fallback.py,sha256=WaUiY6YnoNYtD-smIogSltRKYwQ4MNDNId2mOT5fsD8,4615
+rnsr/ingestion/pipeline.py,sha256=ciGeq51mzWPNViNUxdEccJ9gsKVp5nCa68xF9451zh8,30449
+rnsr/ingestion/semantic_fallback.py,sha256=LhnY4qfqRDhNgutBVJqW_6I_uKyX0IvxM6tT6YhYwSo,13207
+rnsr/ingestion/table_parser.py,sha256=-QzXsFLyZm4nsic6zLKBQWFaB8xlSYXg9QtpneojJ0o,24699
+rnsr/ingestion/text_builder.py,sha256=NGc80EfehQCNQdf0do4U1lnhKpiObu6VsV0u9EepvSI,14700
+rnsr/ingestion/tree_builder.py,sha256=2XrpJDhk1rPQNTR4FQKELKPOXFa5TDDmDGCXYebYmXY,12561
+rnsr/ingestion/vision_retrieval.py,sha256=an5IOIQlWavZ1rgNHz8pdP_QVTLaBsBfWnxqj7gSoxI,31000
+rnsr/ingestion/xy_cut.py,sha256=2KvC9RFtWZ0c2JhPkKtT5saOOuLaWAGgCMh40Fx1_7Q,18025
+rnsr-0.1.0.dist-info/licenses/LICENSE,sha256=VplyxAvQdVnyS4R-X2Iakr_ffeFSgRAiIfEU7kZgxf8,1074
+rnsr-0.1.0.dist-info/METADATA,sha256=P2lAd_Y1WkACXjbOIib1zTlHF5n-0eA4BqOrut35OeQ,19867
+rnsr-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+rnsr-0.1.0.dist-info/entry_points.txt,sha256=WFGf4rTMJbDEOt_PvjaHHtHRmJHNk0qfaJpghduMaM8,44
+rnsr-0.1.0.dist-info/top_level.txt,sha256=hylIhN9Hbr5V92x_rNExhkodqj8VvlbhZ8Evog443dY,5
+rnsr-0.1.0.dist-info/RECORD,,

rnsr-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.10.2)
+Root-Is-Purelib: true
+Tag: py3-none-any

rnsr-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ rnsr = rnsr.__main__:main

rnsr-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 RNSR Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.