rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
rnsr/__init__.py ADDED
@@ -0,0 +1,118 @@
1
+ """
2
+ RNSR - Recursive Neural-Symbolic Retriever
3
+
4
+ State-of-the-art document retrieval system combining:
5
+ - PageIndex: Vectorless, reasoning-based tree search
6
+ - RLMs: REPL environment with recursive sub-LLM calls
7
+ - Vision: OCR-free image-based document analysis
8
+
9
+ This is the hybrid recursive visual-symbolic retriever that achieves
10
+ superior performance on complex document understanding tasks.
11
+
12
+ Key Features:
13
+ - Font Histogram Algorithm (NOT vision models for structure)
14
+ - Recursive XY-Cut (Visual-geometric segmentation)
15
+ - Hierarchical Clustering (Multi-resolution topics)
16
+ - Skeleton Index pattern (summaries + KV store)
17
+ - Pointer-based Variable Stitching (prevents context pollution)
18
+ - Pre-LLM Filtering (keyword/regex before expensive ToT)
19
+ - Deep Recursive Sub-LLM Calls (configurable depth)
20
+ - Answer Verification (sub-LLM validation)
21
+ - Vision-based Retrieval (OCR-free page image analysis)
22
+ - Hybrid Text+Vision Mode (best of both worlds)
23
+ - Multi-provider LLM support (OpenAI, Anthropic, Gemini)
24
+
25
+ Usage:
26
+ from rnsr import RNSRClient
27
+
28
+ # Simple one-line Q&A
29
+ client = RNSRClient()
30
+ answer = client.ask("contract.pdf", "What are the payment terms?")
31
+
32
+ # Advanced RLM navigation with full features
33
+ result = client.ask_advanced(
34
+ "complex_report.pdf",
35
+ "Compare liability clauses in sections 5 and 8",
36
+ enable_verification=True,
37
+ max_recursion_depth=3,
38
+ )
39
+
40
+ # Vision-based analysis (for scanned docs, charts)
41
+ result = client.ask_vision(
42
+ "scanned_document.pdf",
43
+ "What does the revenue chart show?",
44
+ )
45
+
46
+ # Low-level API
47
+ from rnsr import ingest_document, build_skeleton_index, run_rlm_navigator
48
+
49
+ result = ingest_document("contract.pdf")
50
+ skeleton, kv_store = build_skeleton_index(result.tree)
51
+ answer = run_rlm_navigator("What are the terms?", skeleton, kv_store)
52
+
53
+ LLM Provider Configuration:
54
+ Set one of these environment variables:
55
+ - GOOGLE_API_KEY (Gemini)
56
+ - OPENAI_API_KEY (OpenAI)
57
+ - ANTHROPIC_API_KEY (Anthropic)
58
+ """
59
+
60
+ __version__ = "0.2.0" # Major update with RLM + Vision
61
+
62
+ # Re-export main entry points
63
+ from rnsr.ingestion import ingest_document, IngestionResult
64
+ from rnsr.ingestion.pipeline import ingest_document_enhanced
65
+ from rnsr.indexing import build_skeleton_index, SQLiteKVStore, InMemoryKVStore
66
+ from rnsr.indexing import save_index, load_index, get_index_info, list_indexes
67
+ from rnsr.agent import (
68
+ run_navigator,
69
+ VariableStore,
70
+ # RLM Navigator (State-of-the-Art)
71
+ RLMNavigator,
72
+ RLMConfig,
73
+ run_rlm_navigator,
74
+ create_rlm_navigator,
75
+ PreFilterEngine,
76
+ RecursiveSubLLMEngine,
77
+ AnswerVerificationEngine,
78
+ )
79
+ from rnsr.document_store import DocumentStore
80
+ from rnsr.client import RNSRClient
81
+ from rnsr.llm import get_llm, get_embed_model, LLMProvider
82
+
83
+ __all__ = [
84
+ # Version
85
+ "__version__",
86
+ # High-Level Client (Simplest API)
87
+ "RNSRClient",
88
+ # Ingestion
89
+ "ingest_document",
90
+ "ingest_document_enhanced",
91
+ "IngestionResult",
92
+ # Indexing
93
+ "build_skeleton_index",
94
+ "SQLiteKVStore",
95
+ "InMemoryKVStore",
96
+ # Persistence
97
+ "save_index",
98
+ "load_index",
99
+ "get_index_info",
100
+ "list_indexes",
101
+ # Document Store
102
+ "DocumentStore",
103
+ # Standard Navigator
104
+ "run_navigator",
105
+ "VariableStore",
106
+ # RLM Navigator (State-of-the-Art)
107
+ "RLMNavigator",
108
+ "RLMConfig",
109
+ "run_rlm_navigator",
110
+ "create_rlm_navigator",
111
+ "PreFilterEngine",
112
+ "RecursiveSubLLMEngine",
113
+ "AnswerVerificationEngine",
114
+ # LLM
115
+ "get_llm",
116
+ "get_embed_model",
117
+ "LLMProvider",
118
+ ]
rnsr/__main__.py ADDED
@@ -0,0 +1,242 @@
1
+ """
2
+ RNSR CLI - Command Line Interface
3
+
4
+ Usage:
5
+ python -m rnsr ingest document.pdf
6
+ python -m rnsr query "What are the payment terms?"
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import json
13
+ import sys
14
+ from pathlib import Path
15
+
16
+ import structlog
17
+
18
+ structlog.configure(
19
+ processors=[
20
+ structlog.stdlib.add_log_level,
21
+ structlog.dev.ConsoleRenderer(),
22
+ ]
23
+ )
24
+
25
+ logger = structlog.get_logger(__name__)
26
+
27
+
28
+ def cmd_ingest(args):
29
+ """Ingest a PDF document."""
30
+ from rnsr.ingestion import ingest_document
31
+
32
+ pdf_path = Path(args.file)
33
+ if not pdf_path.exists():
34
+ print(f"Error: File not found: {pdf_path}")
35
+ sys.exit(1)
36
+
37
+ print(f"Ingesting: {pdf_path}")
38
+ result = ingest_document(pdf_path)
39
+
40
+ print(f"\n✓ Ingestion complete!")
41
+ print(f" Tier used: {result.tier_used} ({result.method})")
42
+ print(f" Total nodes: {result.tree.total_nodes}")
43
+
44
+ if result.warnings:
45
+ print(f"\nWarnings:")
46
+ for w in result.warnings:
47
+ print(f" - {w}")
48
+
49
+ if args.output:
50
+ output_path = Path(args.output)
51
+ with open(output_path, "w") as f:
52
+ json.dump(result.tree.model_dump(), f, indent=2)
53
+ print(f"\nTree saved to: {output_path}")
54
+
55
+ return result
56
+
57
+
58
+ def cmd_index(args):
59
+ """Build skeleton index from ingested document."""
60
+ from rnsr.indexing import SQLiteKVStore, build_skeleton_index
61
+ from rnsr.ingestion import ingest_document
62
+
63
+ pdf_path = Path(args.file)
64
+ if not pdf_path.exists():
65
+ print(f"Error: File not found: {pdf_path}")
66
+ sys.exit(1)
67
+
68
+ # Ingest first
69
+ print(f"Ingesting: {pdf_path}")
70
+ result = ingest_document(pdf_path)
71
+
72
+ # Build index
73
+ db_path = args.db or f"{pdf_path.stem}_index.db"
74
+ kv_store = SQLiteKVStore(db_path)
75
+ skeleton, _ = build_skeleton_index(result.tree, kv_store)
76
+
77
+ print(f"\n✓ Index built!")
78
+ print(f" Skeleton nodes: {len(skeleton)}")
79
+ print(f" KV entries: {kv_store.count()}")
80
+ print(f" Database: {db_path}")
81
+
82
+ return skeleton, kv_store
83
+
84
+
85
+ def cmd_query(args):
86
+ """Query a document."""
87
+ from rnsr.agent import run_navigator
88
+ from rnsr.indexing import SQLiteKVStore, build_skeleton_index
89
+ from rnsr.ingestion import ingest_document
90
+
91
+ pdf_path = Path(args.file)
92
+ if not pdf_path.exists():
93
+ print(f"Error: File not found: {pdf_path}")
94
+ sys.exit(1)
95
+
96
+ # Ingest
97
+ print(f"Ingesting: {pdf_path}")
98
+ result = ingest_document(pdf_path)
99
+
100
+ # Build index
101
+ skeleton, kv_store = build_skeleton_index(result.tree)
102
+
103
+ # Run query
104
+ print(f"\nQuery: {args.query}")
105
+ print("-" * 40)
106
+
107
+ answer = run_navigator(
108
+ question=args.query,
109
+ skeleton=skeleton,
110
+ kv_store=kv_store,
111
+ max_iterations=args.max_iter,
112
+ )
113
+
114
+ print(f"\nAnswer:")
115
+ print(answer["answer"])
116
+ print(f"\nConfidence: {answer['confidence']:.2f}")
117
+ print(f"Nodes visited: {len(answer['nodes_visited'])}")
118
+ print(f"Variables used: {len(answer['variables_used'])}")
119
+
120
+ if args.trace:
121
+ print(f"\nTrace:")
122
+ for entry in answer["trace"]:
123
+ print(f" [{entry['node_type']}] {entry['action']}")
124
+
125
+
126
+ def cmd_benchmark(args):
127
+ """Run benchmarks on the RNSR system."""
128
+ from .benchmarks import BenchmarkRunner, BenchmarkConfig
129
+
130
+ # Check files are provided
131
+ if not args.config and not args.files:
132
+ print("❌ Error: Provide --files or --config for benchmarking")
133
+ return
134
+
135
+ # Load config if provided
136
+ if args.config:
137
+ config = BenchmarkConfig.from_json(args.config)
138
+ else:
139
+ config = BenchmarkConfig(
140
+ pdf_paths=[Path(f) for f in (args.files or [])],
141
+ iterations=args.iterations,
142
+ compute_quality=args.quality or args.all,
143
+ )
144
+
145
+ print("=" * 60)
146
+ print("RNSR Benchmark Suite")
147
+ print("=" * 60)
148
+ print(f"Files: {len(config.pdf_paths)}")
149
+ print(f"Iterations: {config.iterations}")
150
+
151
+ # Run benchmarks
152
+ runner = BenchmarkRunner(config)
153
+ report = runner.run()
154
+
155
+ # Print summary
156
+ report.print_summary()
157
+
158
+ # Save results
159
+ output_dir = args.output or "benchmark_results"
160
+ output_path = Path(output_dir)
161
+ report_file = output_path / f"benchmark_report_{report.timestamp.replace(':', '-')}.json"
162
+ report.to_json(report_file)
163
+
164
+ print(f"\n📄 Report saved to: {report_file}")
165
+
166
+
167
+ def main():
168
+ parser = argparse.ArgumentParser(
169
+ description="RNSR - Recursive Neural-Symbolic Retriever"
170
+ )
171
+ subparsers = parser.add_subparsers(dest="command", help="Commands")
172
+
173
+ # Ingest command
174
+ ingest_parser = subparsers.add_parser("ingest", help="Ingest a PDF document")
175
+ ingest_parser.add_argument("file", help="Path to PDF file")
176
+ ingest_parser.add_argument("-o", "--output", help="Output JSON file for tree")
177
+
178
+ # Index command
179
+ index_parser = subparsers.add_parser("index", help="Build skeleton index")
180
+ index_parser.add_argument("file", help="Path to PDF file")
181
+ index_parser.add_argument("--db", help="SQLite database path")
182
+
183
+ # Query command
184
+ query_parser = subparsers.add_parser("query", help="Query a document")
185
+ query_parser.add_argument("file", help="Path to PDF file")
186
+ query_parser.add_argument("query", help="Question to ask")
187
+ query_parser.add_argument("--max-iter", type=int, default=20, help="Max iterations")
188
+ query_parser.add_argument("--trace", action="store_true", help="Show trace")
189
+
190
+ # Benchmark command
191
+ bench_parser = subparsers.add_parser("benchmark", help="Run benchmarks")
192
+ bench_parser.add_argument(
193
+ "--config", "-c",
194
+ help="Path to benchmark config JSON file"
195
+ )
196
+ bench_parser.add_argument(
197
+ "--files", "-f",
198
+ nargs="+",
199
+ help="PDF files to benchmark"
200
+ )
201
+ bench_parser.add_argument(
202
+ "--iterations", "-n",
203
+ type=int,
204
+ default=3,
205
+ help="Number of iterations per benchmark (default: 3)"
206
+ )
207
+ bench_parser.add_argument(
208
+ "--output", "-o",
209
+ help="Output directory for results"
210
+ )
211
+ bench_parser.add_argument(
212
+ "--performance", "-p",
213
+ action="store_true",
214
+ help="Run performance benchmarks"
215
+ )
216
+ bench_parser.add_argument(
217
+ "--quality", "-q",
218
+ action="store_true",
219
+ help="Run quality benchmarks"
220
+ )
221
+ bench_parser.add_argument(
222
+ "--all", "-a",
223
+ action="store_true",
224
+ help="Run all benchmarks"
225
+ )
226
+
227
+ args = parser.parse_args()
228
+
229
+ if args.command == "ingest":
230
+ cmd_ingest(args)
231
+ elif args.command == "index":
232
+ cmd_index(args)
233
+ elif args.command == "query":
234
+ cmd_query(args)
235
+ elif args.command == "benchmark":
236
+ cmd_benchmark(args)
237
+ else:
238
+ parser.print_help()
239
+
240
+
241
+ if __name__ == "__main__":
242
+ main()
rnsr/agent/__init__.py ADDED
@@ -0,0 +1,218 @@
1
+ """
2
+ Agent Module - Recursive Navigator with Full RLM Support
3
+
4
+ Implements the state-of-the-art hybrid retrieval system combining:
5
+ - PageIndex: Vectorless, reasoning-based tree search
6
+ - RLMs: REPL environment with recursive sub-LLM calls
7
+ - RNSR: Latent hierarchy reconstruction + variable stitching
8
+
9
+ Key Features:
10
+ 1. RLM Navigator - Full recursive language model with pre-filtering
11
+ 2. REPLEnvironment - Python REPL with DOC_VAR and code execution
12
+ 3. Variable Store - Pointer-based stitching to prevent context pollution
13
+ 4. Tree of Thoughts (ToT) - LLM-based navigation decisions
14
+ 5. Pre-filtering - Keyword/regex filtering before LLM calls
15
+ 6. Deep Recursion - Multi-level recursive sub-LLM calls
16
+ 7. Answer Verification - Sub-LLM validation of answers
17
+ 8. Async Processing - Parallel sub-LLM execution
18
+
19
+ Enhanced Features (New):
20
+ 9. Provenance System - Traceable citations for every answer
21
+ 10. LLM Cache - Semantic-aware caching for performance
22
+ 11. Self-Reflection - Iterative self-correction loop
23
+ 12. Reasoning Memory - Learn from successful query chains
24
+ 13. Query Clarification - Handle ambiguous queries
25
+
26
+ Inspired by:
27
+ - PageIndex (VectifyAI): https://github.com/VectifyAI/PageIndex
28
+ - Recursive Language Models: https://arxiv.org/html/2512.24601v1
29
+ """
30
+
31
+ from rnsr.agent.graph import (
32
+ AgentState,
33
+ build_navigator_graph,
34
+ create_initial_state,
35
+ create_navigator_tools,
36
+ run_navigator,
37
+ # Tree of Thoughts (Section 7.2)
38
+ evaluate_children_with_tot,
39
+ backtrack_to_parent,
40
+ TOT_SYSTEM_PROMPT,
41
+ # RLM Recursive Execution (Section 2.2)
42
+ execute_sub_task_with_llm,
43
+ batch_execute_sub_tasks,
44
+ process_pending_questions,
45
+ DECOMPOSITION_PROMPT,
46
+ )
47
+ from rnsr.agent.variable_store import VariableStore, generate_pointer_name
48
+ from rnsr.agent.navigator_api import (
49
+ NavigatorAPI,
50
+ create_navigator,
51
+ execute_rap_query,
52
+ )
53
+ from rnsr.agent.repl_env import (
54
+ REPLEnvironment,
55
+ create_repl_environment,
56
+ RLM_SYSTEM_PROMPT,
57
+ batch_process_async,
58
+ )
59
+ from rnsr.agent.rlm_navigator import (
60
+ RLMNavigator,
61
+ RLMConfig,
62
+ RLMAgentState,
63
+ PreFilterEngine,
64
+ RecursiveSubLLMEngine,
65
+ AnswerVerificationEngine,
66
+ EntityAwareDecomposer,
67
+ create_rlm_navigator,
68
+ run_rlm_navigator,
69
+ # Adaptive Learning
70
+ LearnedStopWords,
71
+ LearnedQueryPatterns,
72
+ get_learned_stop_words,
73
+ get_learned_query_patterns,
74
+ )
75
+ from rnsr.agent.cross_doc_navigator import (
76
+ CrossDocNavigator,
77
+ CrossDocQuery,
78
+ CrossDocAnswer,
79
+ DocumentResult,
80
+ create_cross_doc_navigator,
81
+ )
82
+
83
+ # New Enhancement Modules
84
+ from rnsr.agent.provenance import (
85
+ ProvenanceTracker,
86
+ ProvenanceRecord,
87
+ Citation,
88
+ Contradiction,
89
+ CitationStrength,
90
+ create_citation,
91
+ format_citations_for_display,
92
+ )
93
+ from rnsr.agent.llm_cache import (
94
+ LLMCache,
95
+ CachedLLM,
96
+ get_global_cache,
97
+ wrap_llm_with_cache,
98
+ )
99
+ from rnsr.agent.self_reflection import (
100
+ SelfReflectionEngine,
101
+ ReflectionResult,
102
+ CritiqueResult,
103
+ reflect_on_answer,
104
+ )
105
+ from rnsr.agent.reasoning_memory import (
106
+ ReasoningChainMemory,
107
+ ReasoningChain,
108
+ ReasoningStep,
109
+ ChainMatch,
110
+ get_reasoning_memory,
111
+ store_reasoning_chain,
112
+ find_similar_chains,
113
+ )
114
+ from rnsr.agent.query_clarifier import (
115
+ QueryClarifier,
116
+ AmbiguityAnalysis,
117
+ ClarificationRequest,
118
+ ClarificationResult,
119
+ needs_clarification,
120
+ clarify_query,
121
+ )
122
+
123
+ __all__ = [
124
+ # RLM Navigator (State-of-the-Art)
125
+ "RLMNavigator",
126
+ "RLMConfig",
127
+ "RLMAgentState",
128
+ "PreFilterEngine",
129
+ "RecursiveSubLLMEngine",
130
+ "AnswerVerificationEngine",
131
+ "EntityAwareDecomposer",
132
+ "create_rlm_navigator",
133
+ "run_rlm_navigator",
134
+
135
+ # Cross-Document Navigator
136
+ "CrossDocNavigator",
137
+ "CrossDocQuery",
138
+ "CrossDocAnswer",
139
+ "DocumentResult",
140
+ "create_cross_doc_navigator",
141
+
142
+ # REPL Environment (Section 2.1 - Prompt-as-Environment)
143
+ "REPLEnvironment",
144
+ "create_repl_environment",
145
+ "RLM_SYSTEM_PROMPT",
146
+ "batch_process_async",
147
+
148
+ # Navigator API (Section 5.1 Phase III)
149
+ "NavigatorAPI",
150
+ "create_navigator",
151
+ "execute_rap_query",
152
+
153
+ # Variable Store
154
+ "VariableStore",
155
+ "generate_pointer_name",
156
+
157
+ # Agent Graph
158
+ "AgentState",
159
+ "build_navigator_graph",
160
+ "create_initial_state",
161
+ "create_navigator_tools",
162
+ "run_navigator",
163
+
164
+ # Tree of Thoughts (Section 7.2)
165
+ "evaluate_children_with_tot",
166
+ "backtrack_to_parent",
167
+ "TOT_SYSTEM_PROMPT",
168
+
169
+ # RLM Recursive Execution (Section 2.2)
170
+ "execute_sub_task_with_llm",
171
+ "batch_execute_sub_tasks",
172
+ "process_pending_questions",
173
+ "DECOMPOSITION_PROMPT",
174
+
175
+ # Adaptive Learning
176
+ "LearnedStopWords",
177
+ "LearnedQueryPatterns",
178
+ "get_learned_stop_words",
179
+ "get_learned_query_patterns",
180
+
181
+ # Provenance System (NEW)
182
+ "ProvenanceTracker",
183
+ "ProvenanceRecord",
184
+ "Citation",
185
+ "Contradiction",
186
+ "CitationStrength",
187
+ "create_citation",
188
+ "format_citations_for_display",
189
+
190
+ # LLM Cache (NEW)
191
+ "LLMCache",
192
+ "CachedLLM",
193
+ "get_global_cache",
194
+ "wrap_llm_with_cache",
195
+
196
+ # Self-Reflection (NEW)
197
+ "SelfReflectionEngine",
198
+ "ReflectionResult",
199
+ "CritiqueResult",
200
+ "reflect_on_answer",
201
+
202
+ # Reasoning Memory (NEW)
203
+ "ReasoningChainMemory",
204
+ "ReasoningChain",
205
+ "ReasoningStep",
206
+ "ChainMatch",
207
+ "get_reasoning_memory",
208
+ "store_reasoning_chain",
209
+ "find_similar_chains",
210
+
211
+ # Query Clarification (NEW)
212
+ "QueryClarifier",
213
+ "AmbiguityAnalysis",
214
+ "ClarificationRequest",
215
+ "ClarificationResult",
216
+ "needs_clarification",
217
+ "clarify_query",
218
+ ]