rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,232 @@
1
+ """
2
+ RNSR Extraction Data Models
3
+
4
+ Pydantic models for entity extraction, relationships, and ontological linking.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime
10
+ from enum import Enum
11
+ from typing import Any
12
+ from uuid import uuid4
13
+
14
+ from pydantic import BaseModel, Field
15
+
16
+
17
+ # =============================================================================
18
+ # Entity Types
19
+ # =============================================================================
20
+
21
+
22
+ class EntityType(str, Enum):
23
+ """
24
+ Types of entities that can be extracted from documents.
25
+
26
+ Note: The OTHER type is used as a fallback for any entity types
27
+ the LLM identifies that don't match a predefined category.
28
+ The original type string is preserved in entity.metadata["original_type"].
29
+ """
30
+
31
+ PERSON = "person" # Names, roles, parties (plaintiff, defendant, witness)
32
+ ORGANIZATION = "organization" # Companies, agencies, courts
33
+ LEGAL_CONCEPT = "legal_concept" # Claims, breaches, obligations, remedies
34
+ DATE = "date" # Key dates, events, deadlines
35
+ EVENT = "event" # Significant occurrences
36
+ LOCATION = "location" # Places, addresses, jurisdictions
37
+ REFERENCE = "reference" # Section references, document citations
38
+ MONETARY = "monetary" # Dollar amounts, financial figures
39
+ DOCUMENT = "document" # Referenced documents (exhibits, contracts)
40
+
41
+ # Fallback for any type not in the predefined list
42
+ OTHER = "other" # Catch-all for novel/custom entity types
43
+
44
+
45
+ class RelationType(str, Enum):
46
+ """
47
+ Types of relationships between entities and sections.
48
+
49
+ Note: The OTHER type is used as a fallback for any relationship types
50
+ the LLM identifies that don't match a predefined category.
51
+ The original type string is preserved in relationship.metadata["original_type"].
52
+ """
53
+
54
+ # Entity-to-Section relationships
55
+ MENTIONS = "mentions" # Entity X is mentioned in Section Y
56
+ DEFINED_IN = "defined_in" # Entity X is defined/introduced in Section Y
57
+
58
+ # Entity-to-Entity relationships
59
+ TEMPORAL_BEFORE = "temporal_before" # Event X occurred before Event Y
60
+ TEMPORAL_AFTER = "temporal_after" # Event X occurred after Event Y
61
+ CAUSAL = "causal" # Action X caused/led to Outcome Y
62
+ AFFILIATED_WITH = "affiliated_with" # Person X is affiliated with Org Y
63
+ PARTY_TO = "party_to" # Entity X is party to Document/Event Y
64
+
65
+ # Section-to-Section relationships
66
+ SUPPORTS = "supports" # Section X supports claim in Section Y
67
+ CONTRADICTS = "contradicts" # Section X contradicts Section Y
68
+ REFERENCES = "references" # Section X references Document/Section Y
69
+ SUPERSEDES = "supersedes" # Section X supersedes/overrides Section Y
70
+ AMENDS = "amends" # Section X amends Section Y
71
+
72
+ # Fallback for any relationship type not in the predefined list
73
+ OTHER = "other" # Catch-all for novel/custom relationship types
74
+
75
+
76
+ # =============================================================================
77
+ # Mention Model
78
+ # =============================================================================
79
+
80
+
81
+ class Mention(BaseModel):
82
+ """A specific occurrence of an entity in a document section."""
83
+
84
+ id: str = Field(default_factory=lambda: f"mention_{str(uuid4())[:8]}")
85
+ node_id: str # Which skeleton node contains this mention
86
+ doc_id: str # Which document (for multi-doc bundles)
87
+ span_start: int | None = None # Character offset start (optional)
88
+ span_end: int | None = None # Character offset end (optional)
89
+ context: str = "" # Surrounding text snippet for grounding
90
+ page_num: int | None = None # Page number if available
91
+ confidence: float = 1.0 # Extraction confidence (0.0-1.0)
92
+
93
+ class Config:
94
+ frozen = False
95
+
96
+
97
+ # =============================================================================
98
+ # Entity Model
99
+ # =============================================================================
100
+
101
+
102
+ class Entity(BaseModel):
103
+ """
104
+ An extracted entity from a document.
105
+
106
+ Entities represent named concepts (people, organizations, dates, etc.)
107
+ that can be tracked across document sections and linked across documents.
108
+ """
109
+
110
+ id: str = Field(default_factory=lambda: f"ent_{str(uuid4())[:8]}")
111
+ type: EntityType
112
+ canonical_name: str # Normalized/canonical name
113
+ aliases: list[str] = Field(default_factory=list) # Alternative names/spellings
114
+ mentions: list[Mention] = Field(default_factory=list) # Where this entity appears
115
+ metadata: dict[str, Any] = Field(default_factory=dict) # Type-specific metadata
116
+
117
+ # Tracking
118
+ source_doc_id: str | None = None # Original document where first extracted
119
+ created_at: datetime = Field(default_factory=datetime.utcnow)
120
+
121
+ class Config:
122
+ frozen = False
123
+
124
+ def add_mention(self, mention: Mention) -> None:
125
+ """Add a new mention of this entity."""
126
+ self.mentions.append(mention)
127
+
128
+ def add_alias(self, alias: str) -> None:
129
+ """Add an alternative name for this entity."""
130
+ normalized = alias.strip()
131
+ if normalized and normalized not in self.aliases and normalized != self.canonical_name:
132
+ self.aliases.append(normalized)
133
+
134
+ @property
135
+ def all_names(self) -> list[str]:
136
+ """Get all names (canonical + aliases)."""
137
+ return [self.canonical_name] + self.aliases
138
+
139
+ @property
140
+ def document_ids(self) -> set[str]:
141
+ """Get all document IDs where this entity appears."""
142
+ return {m.doc_id for m in self.mentions}
143
+
144
+ @property
145
+ def node_ids(self) -> set[str]:
146
+ """Get all node IDs where this entity appears."""
147
+ return {m.node_id for m in self.mentions}
148
+
149
+
150
+ # =============================================================================
151
+ # Relationship Model
152
+ # =============================================================================
153
+
154
+
155
+ class Relationship(BaseModel):
156
+ """
157
+ A relationship between entities or between sections.
158
+
159
+ Relationships capture semantic connections that enable cross-document
160
+ understanding and complex query resolution.
161
+ """
162
+
163
+ id: str = Field(default_factory=lambda: f"rel_{str(uuid4())[:8]}")
164
+ type: RelationType
165
+ source_id: str # Entity ID or Node ID
166
+ target_id: str # Entity ID or Node ID
167
+ source_type: str = "entity" # "entity" or "node"
168
+ target_type: str = "entity" # "entity" or "node"
169
+
170
+ doc_id: str | None = None # Source document
171
+ confidence: float = 1.0 # Extraction confidence (0.0-1.0)
172
+ evidence: str = "" # Supporting text that establishes the relationship
173
+
174
+ # Metadata
175
+ metadata: dict[str, Any] = Field(default_factory=dict)
176
+ created_at: datetime = Field(default_factory=datetime.utcnow)
177
+
178
+ class Config:
179
+ frozen = False
180
+
181
+
182
+ # =============================================================================
183
+ # Entity Link Model (for cross-document linking)
184
+ # =============================================================================
185
+
186
+
187
+ class EntityLink(BaseModel):
188
+ """
189
+ A link between two entities that represent the same real-world entity.
190
+
191
+ Used for cross-document entity resolution.
192
+ """
193
+
194
+ entity_id_1: str # First entity ID
195
+ entity_id_2: str # Second entity ID
196
+ confidence: float = 1.0 # Link confidence (0.0-1.0)
197
+ link_method: str = "exact" # How the link was established (exact, fuzzy, llm)
198
+ evidence: str = "" # Why these entities are considered the same
199
+
200
+ created_at: datetime = Field(default_factory=datetime.utcnow)
201
+
202
+ class Config:
203
+ frozen = False
204
+
205
+ @property
206
+ def entity_ids(self) -> tuple[str, str]:
207
+ """Get both entity IDs as a sorted tuple for consistent ordering."""
208
+ return tuple(sorted([self.entity_id_1, self.entity_id_2]))
209
+
210
+
211
+ # =============================================================================
212
+ # Extraction Result Model
213
+ # =============================================================================
214
+
215
+
216
+ class ExtractionResult(BaseModel):
217
+ """
218
+ Result of entity/relationship extraction from a document section.
219
+ """
220
+
221
+ node_id: str # Which node was processed
222
+ doc_id: str # Which document
223
+ entities: list[Entity] = Field(default_factory=list)
224
+ relationships: list[Relationship] = Field(default_factory=list)
225
+
226
+ # Processing metadata
227
+ extraction_method: str = "llm" # Method used (llm, rule-based, hybrid)
228
+ processing_time_ms: float = 0.0
229
+ warnings: list[str] = Field(default_factory=list)
230
+
231
+ class Config:
232
+ frozen = False