rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,511 @@
1
+ """
2
+ RNSR Relationship Pattern Extractor
3
+
4
+ Pre-extracts relationship candidates using patterns, similar to entity extraction.
5
+ This provides GROUNDED relationship candidates that are validated by LLM/ToT.
6
+
7
+ Patterns detect:
8
+ 1. Entity proximity (co-occurrence signals relationships)
9
+ 2. Explicit relationship markers (verbs, prepositions)
10
+ 3. Reference patterns (citations, exhibits)
11
+ 4. Temporal markers (before, after, during)
12
+ 5. Causal markers (caused, led to, resulted in)
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import re
18
+ from dataclasses import dataclass, field
19
+ from typing import Any
20
+
21
+ import structlog
22
+
23
+ from rnsr.extraction.candidate_extractor import EntityCandidate
24
+ from rnsr.extraction.models import Entity, RelationType
25
+
26
+ logger = structlog.get_logger(__name__)
27
+
28
+
29
+ @dataclass
30
+ class RelationshipCandidate:
31
+ """
32
+ A candidate relationship extracted from text before LLM validation.
33
+
34
+ Grounded in actual text - tied to specific spans and patterns.
35
+ """
36
+
37
+ source_text: str # Source entity text
38
+ target_text: str # Target entity text
39
+ relationship_type: str # Suggested relationship type
40
+ evidence: str # The text that indicates the relationship
41
+ span_start: int # Start of relationship evidence
42
+ span_end: int # End of relationship evidence
43
+ confidence: float = 0.5 # Pattern match confidence
44
+ pattern_name: str = "" # Which pattern matched
45
+ source_entity_id: str | None = None
46
+ target_entity_id: str | None = None
47
+ metadata: dict = field(default_factory=dict)
48
+
49
+
50
+ # =============================================================================
51
+ # Relationship Pattern Definitions
52
+ # =============================================================================
53
+
54
+ # Affiliation patterns: "X of Y", "X at Y", "X, [title] of Y"
55
+ AFFILIATION_PATTERNS = [
56
+ # "John Smith, CEO of Acme Corp"
57
+ (r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+),?\s+(?:CEO|President|Director|Manager|Partner|Attorney|Counsel|Agent|Representative)\s+(?:of|at|for)\s+([A-Z][A-Za-z\s&]+(?:Inc\.|LLC|Corp\.?|Company)?)', "title_of"),
58
+
59
+ # "employed by", "works for"
60
+ (r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s+(?:is\s+)?(?:employed|hired|engaged)\s+by\s+([A-Z][A-Za-z\s&]+)', "employed_by"),
61
+
62
+ # "X, an employee of Y"
63
+ (r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+),?\s+(?:an?\s+)?(?:employee|officer|director|member)\s+of\s+([A-Z][A-Za-z\s&]+)', "member_of"),
64
+ ]
65
+
66
+ # Party-to patterns: parties to agreements, cases
67
+ PARTY_TO_PATTERNS = [
68
+ # "X entered into [agreement] with Y"
69
+ (r'([A-Z][A-Za-z\s]+?)\s+(?:entered\s+into|executed|signed)\s+(?:the\s+)?(?:Agreement|Contract|Lease|License)\s+with\s+([A-Z][A-Za-z\s]+)', "entered_into"),
70
+
71
+ # "between X and Y"
72
+ (r'between\s+([A-Z][A-Za-z\s,]+?)\s+and\s+([A-Z][A-Za-z\s,]+?)(?:,|\.|;)', "between_parties"),
73
+
74
+ # "X v. Y" (legal case)
75
+ (r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+v\.\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)', "versus"),
76
+
77
+ # "Plaintiff X" / "Defendant Y"
78
+ (r'(?:Plaintiff|Petitioner|Appellant)\s+([A-Z][A-Za-z\s]+?)(?:,|and|;|\.|filed)', "plaintiff"),
79
+ (r'(?:Defendant|Respondent|Appellee)\s+([A-Z][A-Za-z\s]+?)(?:,|and|;|\.)', "defendant"),
80
+ ]
81
+
82
+ # Temporal patterns: before, after, during
83
+ TEMPORAL_PATTERNS = [
84
+ # "X before Y"
85
+ (r'([A-Z][A-Za-z\s]+?|(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4})\s+(?:before|prior\s+to|preceding)\s+([A-Z][A-Za-z\s]+?|(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4})', "temporal_before"),
86
+
87
+ # "X after Y"
88
+ (r'([A-Z][A-Za-z\s]+?|(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4})\s+(?:after|following|subsequent\s+to)\s+([A-Z][A-Za-z\s]+?)', "temporal_after"),
89
+
90
+ # "from X to Y"
91
+ (r'from\s+([A-Z][A-Za-z\s,\d]+?)\s+(?:to|until|through)\s+([A-Z][A-Za-z\s,\d]+?)(?:,|\.)', "temporal_range"),
92
+ ]
93
+
94
+ # Causal patterns: caused, led to, resulted in
95
+ CAUSAL_PATTERNS = [
96
+ # "X caused Y"
97
+ (r'([A-Z][A-Za-z\s]+?)\s+(?:caused|led\s+to|resulted\s+in|gave\s+rise\s+to)\s+([A-Z][A-Za-z\s]+?)(?:,|\.)', "caused"),
98
+
99
+ # "X as a result of Y"
100
+ (r'([A-Z][A-Za-z\s]+?)\s+(?:as\s+a\s+result\s+of|due\s+to|because\s+of|arising\s+from)\s+([A-Z][A-Za-z\s]+?)(?:,|\.)', "result_of"),
101
+
102
+ # "X breach ... damages"
103
+ (r'([A-Z][A-Za-z\s]+?)\s+(?:breach(?:ed)?|violat(?:ed|ion))\s+.{0,100}(damages|injury|harm|loss)', "breach_damages"),
104
+ ]
105
+
106
+ # Reference patterns: citations, exhibits
107
+ REFERENCE_PATTERNS = [
108
+ # "See Exhibit A"
109
+ (r'(?:See|see|per|Per|As\s+(?:shown|stated|set\s+forth)\s+in)\s+(Exhibit\s+[A-Z0-9]+)', "see_exhibit"),
110
+
111
+ # "pursuant to Section 3.2"
112
+ (r'(?:pursuant\s+to|under|per|in\s+accordance\s+with)\s+(Section\s+[\d\.]+|Article\s+[IVX\d]+)', "pursuant_to"),
113
+
114
+ # Legal citations "123 F.3d 456"
115
+ (r'([A-Z][a-z]+\s+v\.\s+[A-Z][a-z]+),?\s+(\d+\s+[A-Z]\.\s*\d*[a-z]*\s+\d+)', "case_citation"),
116
+ ]
117
+
118
+ # Support/Contradict patterns
119
+ SUPPORT_CONTRADICT_PATTERNS = [
120
+ # "consistent with", "in accordance with"
121
+ (r'(?:consistent\s+with|in\s+accordance\s+with|supports|confirms)\s+([A-Z][A-Za-z\s]+?)(?:,|\.)', "supports"),
122
+
123
+ # "contrary to", "inconsistent with"
124
+ (r'(?:contrary\s+to|inconsistent\s+with|contradicts|conflicts\s+with)\s+([A-Z][A-Za-z\s]+?)(?:,|\.)', "contradicts"),
125
+ ]
126
+
127
+ # Supersedes/Amends patterns
128
+ AMENDMENT_PATTERNS = [
129
+ # "supersedes"
130
+ (r'([A-Z][A-Za-z\s]+?)\s+(?:supersedes|replaces|terminates)\s+([A-Z][A-Za-z\s]+?)(?:,|\.)', "supersedes"),
131
+
132
+ # "amends"
133
+ (r'([A-Z][A-Za-z\s]+?)\s+(?:amends|modifies|supplements)\s+([A-Z][A-Za-z\s]+?)(?:,|\.)', "amends"),
134
+ ]
135
+
136
+
137
+ # Compile all patterns with relationship type mapping
138
+ COMPILED_RELATIONSHIP_PATTERNS: dict[str, list[tuple[re.Pattern, str, str]]] = {
139
+ "AFFILIATED_WITH": [
140
+ (re.compile(p, re.IGNORECASE), n, "AFFILIATED_WITH")
141
+ for p, n in AFFILIATION_PATTERNS
142
+ ],
143
+ "PARTY_TO": [
144
+ (re.compile(p, re.IGNORECASE), n, "PARTY_TO")
145
+ for p, n in PARTY_TO_PATTERNS
146
+ ],
147
+ "TEMPORAL": [
148
+ (re.compile(p, re.IGNORECASE), n,
149
+ "TEMPORAL_BEFORE" if "before" in n else "TEMPORAL_AFTER")
150
+ for p, n in TEMPORAL_PATTERNS
151
+ ],
152
+ "CAUSAL": [
153
+ (re.compile(p, re.IGNORECASE), n, "CAUSAL")
154
+ for p, n in CAUSAL_PATTERNS
155
+ ],
156
+ "REFERENCES": [
157
+ (re.compile(p, re.IGNORECASE), n, "REFERENCES")
158
+ for p, n in REFERENCE_PATTERNS
159
+ ],
160
+ "SUPPORT_CONTRADICT": [
161
+ (re.compile(p, re.IGNORECASE), n,
162
+ "SUPPORTS" if "support" in n else "CONTRADICTS")
163
+ for p, n in SUPPORT_CONTRADICT_PATTERNS
164
+ ],
165
+ "AMENDMENT": [
166
+ (re.compile(p, re.IGNORECASE), n,
167
+ "SUPERSEDES" if "supersedes" in n else "AMENDS")
168
+ for p, n in AMENDMENT_PATTERNS
169
+ ],
170
+ }
171
+
172
+
173
+ # =============================================================================
174
+ # Relationship Pattern Extractor
175
+ # =============================================================================
176
+
177
+
178
+ class RelationshipPatternExtractor:
179
+ """
180
+ Extracts relationship candidates from text using patterns.
181
+
182
+ Provides GROUNDED candidates - every relationship is tied to
183
+ actual text evidence, preventing hallucination.
184
+ """
185
+
186
+ def __init__(
187
+ self,
188
+ context_window: int = 150,
189
+ min_confidence: float = 0.4,
190
+ ):
191
+ """
192
+ Initialize the relationship pattern extractor.
193
+
194
+ Args:
195
+ context_window: Characters of context around matches.
196
+ min_confidence: Minimum confidence to include.
197
+ """
198
+ self.context_window = context_window
199
+ self.min_confidence = min_confidence
200
+
201
+ def extract_candidates(
202
+ self,
203
+ text: str,
204
+ entities: list[Entity] | None = None,
205
+ relationship_types: list[str] | None = None,
206
+ ) -> list[RelationshipCandidate]:
207
+ """
208
+ Extract relationship candidates from text.
209
+
210
+ Args:
211
+ text: Text to extract from.
212
+ entities: Optional list of known entities for matching.
213
+ relationship_types: Optional filter for relationship types.
214
+
215
+ Returns:
216
+ List of RelationshipCandidate objects.
217
+ """
218
+ if not text:
219
+ return []
220
+
221
+ candidates = []
222
+ types_to_check = relationship_types or list(COMPILED_RELATIONSHIP_PATTERNS.keys())
223
+
224
+ for rel_category in types_to_check:
225
+ patterns = COMPILED_RELATIONSHIP_PATTERNS.get(rel_category, [])
226
+
227
+ for pattern, pattern_name, rel_type in patterns:
228
+ for match in pattern.finditer(text):
229
+ candidate = self._create_candidate_from_match(
230
+ match=match,
231
+ pattern_name=pattern_name,
232
+ relationship_type=rel_type,
233
+ text=text,
234
+ entities=entities,
235
+ )
236
+
237
+ if candidate and candidate.confidence >= self.min_confidence:
238
+ candidates.append(candidate)
239
+
240
+ # Also extract co-occurrence relationships
241
+ if entities:
242
+ cooccurrence_candidates = self._extract_cooccurrence_candidates(
243
+ text=text,
244
+ entities=entities,
245
+ )
246
+ candidates.extend(cooccurrence_candidates)
247
+
248
+ logger.debug(
249
+ "relationship_candidates_extracted",
250
+ total=len(candidates),
251
+ by_type={t: sum(1 for c in candidates if c.relationship_type == t)
252
+ for t in set(c.relationship_type for c in candidates)},
253
+ )
254
+
255
+ return candidates
256
+
257
+ def _create_candidate_from_match(
258
+ self,
259
+ match: re.Match,
260
+ pattern_name: str,
261
+ relationship_type: str,
262
+ text: str,
263
+ entities: list[Entity] | None,
264
+ ) -> RelationshipCandidate | None:
265
+ """Create a relationship candidate from a regex match."""
266
+ groups = match.groups()
267
+
268
+ if len(groups) < 1:
269
+ return None
270
+
271
+ # For single-group patterns (like "supports X"), the target is the match
272
+ if len(groups) == 1:
273
+ source_text = "this_section" # Will be resolved to node_id
274
+ target_text = groups[0].strip()
275
+ else:
276
+ source_text = groups[0].strip()
277
+ target_text = groups[1].strip() if len(groups) > 1 else ""
278
+
279
+ if not source_text or not target_text:
280
+ return None
281
+
282
+ # Calculate confidence
283
+ confidence = self._calculate_confidence(match, pattern_name)
284
+
285
+ # Get evidence context
286
+ start = max(0, match.start() - self.context_window)
287
+ end = min(len(text), match.end() + self.context_window)
288
+ evidence = text[start:end]
289
+
290
+ # Try to match to known entities
291
+ source_entity_id = None
292
+ target_entity_id = None
293
+
294
+ if entities:
295
+ source_entity_id = self._match_to_entity(source_text, entities)
296
+ target_entity_id = self._match_to_entity(target_text, entities)
297
+
298
+ return RelationshipCandidate(
299
+ source_text=source_text,
300
+ target_text=target_text,
301
+ relationship_type=relationship_type,
302
+ evidence=match.group(),
303
+ span_start=match.start(),
304
+ span_end=match.end(),
305
+ confidence=confidence,
306
+ pattern_name=pattern_name,
307
+ source_entity_id=source_entity_id,
308
+ target_entity_id=target_entity_id,
309
+ metadata={
310
+ "full_context": evidence,
311
+ "pattern_groups": groups,
312
+ },
313
+ )
314
+
315
+ def _calculate_confidence(
316
+ self,
317
+ match: re.Match,
318
+ pattern_name: str,
319
+ ) -> float:
320
+ """Calculate confidence for a pattern match."""
321
+ base_confidence = 0.6
322
+
323
+ # High confidence patterns
324
+ high_confidence_patterns = {
325
+ "versus": 0.95, # X v. Y is very explicit
326
+ "see_exhibit": 0.9,
327
+ "case_citation": 0.9,
328
+ "entered_into": 0.85,
329
+ "caused": 0.8,
330
+ "supersedes": 0.85,
331
+ }
332
+
333
+ if pattern_name in high_confidence_patterns:
334
+ return high_confidence_patterns[pattern_name]
335
+
336
+ # Boost for longer, more specific matches
337
+ match_length = len(match.group())
338
+ if match_length > 50:
339
+ base_confidence += 0.15
340
+ elif match_length > 25:
341
+ base_confidence += 0.1
342
+
343
+ return min(base_confidence, 0.95)
344
+
345
+ def _match_to_entity(
346
+ self,
347
+ text: str,
348
+ entities: list[Entity],
349
+ ) -> str | None:
350
+ """Try to match extracted text to a known entity."""
351
+ text_lower = text.lower().strip()
352
+
353
+ for entity in entities:
354
+ # Check canonical name
355
+ if entity.canonical_name.lower() == text_lower:
356
+ return entity.id
357
+
358
+ # Check aliases
359
+ for alias in entity.aliases:
360
+ if alias.lower() == text_lower:
361
+ return entity.id
362
+
363
+ # Fuzzy match (one contains the other)
364
+ if text_lower in entity.canonical_name.lower() or \
365
+ entity.canonical_name.lower() in text_lower:
366
+ return entity.id
367
+
368
+ return None
369
+
370
+ def _extract_cooccurrence_candidates(
371
+ self,
372
+ text: str,
373
+ entities: list[Entity],
374
+ window_size: int = 100,
375
+ ) -> list[RelationshipCandidate]:
376
+ """
377
+ Extract relationship candidates based on entity co-occurrence.
378
+
379
+ Entities mentioned close together often have relationships.
380
+ """
381
+ candidates = []
382
+
383
+ # Find all entity mentions in text
384
+ entity_positions = []
385
+ for entity in entities:
386
+ # Search for canonical name
387
+ for match in re.finditer(re.escape(entity.canonical_name), text, re.IGNORECASE):
388
+ entity_positions.append({
389
+ "entity": entity,
390
+ "start": match.start(),
391
+ "end": match.end(),
392
+ "text": match.group(),
393
+ })
394
+
395
+ # Search for aliases
396
+ for alias in entity.aliases:
397
+ for match in re.finditer(re.escape(alias), text, re.IGNORECASE):
398
+ entity_positions.append({
399
+ "entity": entity,
400
+ "start": match.start(),
401
+ "end": match.end(),
402
+ "text": match.group(),
403
+ })
404
+
405
+ # Sort by position
406
+ entity_positions.sort(key=lambda x: x["start"])
407
+
408
+ # Find co-occurring pairs within window
409
+ for i, pos1 in enumerate(entity_positions):
410
+ for pos2 in entity_positions[i+1:]:
411
+ # Skip if same entity
412
+ if pos1["entity"].id == pos2["entity"].id:
413
+ continue
414
+
415
+ # Check if within window
416
+ distance = pos2["start"] - pos1["end"]
417
+ if distance > window_size:
418
+ break # Too far, no need to check further
419
+
420
+ if distance < 0:
421
+ continue # Overlapping, skip
422
+
423
+ # Create co-occurrence candidate
424
+ evidence_start = pos1["start"]
425
+ evidence_end = pos2["end"]
426
+ evidence = text[evidence_start:evidence_end]
427
+
428
+ # Determine relationship type based on entity types
429
+ rel_type = self._infer_cooccurrence_type(
430
+ pos1["entity"], pos2["entity"], evidence
431
+ )
432
+
433
+ # Lower confidence for co-occurrence (needs validation)
434
+ confidence = 0.4 + (1 - distance / window_size) * 0.2
435
+
436
+ candidates.append(RelationshipCandidate(
437
+ source_text=pos1["text"],
438
+ target_text=pos2["text"],
439
+ relationship_type=rel_type,
440
+ evidence=evidence,
441
+ span_start=evidence_start,
442
+ span_end=evidence_end,
443
+ confidence=confidence,
444
+ pattern_name="co_occurrence",
445
+ source_entity_id=pos1["entity"].id,
446
+ target_entity_id=pos2["entity"].id,
447
+ metadata={
448
+ "distance": distance,
449
+ "source_type": pos1["entity"].type.value,
450
+ "target_type": pos2["entity"].type.value,
451
+ },
452
+ ))
453
+
454
+ return candidates
455
+
456
+ def _infer_cooccurrence_type(
457
+ self,
458
+ entity1: Entity,
459
+ entity2: Entity,
460
+ evidence: str,
461
+ ) -> str:
462
+ """Infer relationship type from co-occurring entities."""
463
+ from rnsr.extraction.models import EntityType
464
+
465
+ type1 = entity1.type
466
+ type2 = entity2.type
467
+ evidence_lower = evidence.lower()
468
+
469
+ # Person + Organization → likely AFFILIATED_WITH
470
+ if (type1 == EntityType.PERSON and type2 == EntityType.ORGANIZATION) or \
471
+ (type1 == EntityType.ORGANIZATION and type2 == EntityType.PERSON):
472
+ return "AFFILIATED_WITH"
473
+
474
+ # Date + Event → likely TEMPORAL
475
+ if type1 == EntityType.DATE or type2 == EntityType.DATE:
476
+ return "TEMPORAL_BEFORE" # Will be refined by validator
477
+
478
+ # Event + Event → could be CAUSAL
479
+ if type1 == EntityType.EVENT and type2 == EntityType.EVENT:
480
+ if any(word in evidence_lower for word in ["caused", "led", "resulted"]):
481
+ return "CAUSAL"
482
+ return "TEMPORAL_BEFORE"
483
+
484
+ # Reference patterns
485
+ if type1 == EntityType.REFERENCE or type2 == EntityType.REFERENCE:
486
+ return "REFERENCES"
487
+
488
+ # Document + anything → likely MENTIONS
489
+ if type1 == EntityType.DOCUMENT or type2 == EntityType.DOCUMENT:
490
+ return "MENTIONS"
491
+
492
+ # Default
493
+ return "MENTIONS"
494
+
495
+
496
+ def extract_relationship_candidates(
497
+ text: str,
498
+ entities: list[Entity] | None = None,
499
+ ) -> list[RelationshipCandidate]:
500
+ """
501
+ Convenience function to extract relationship candidates.
502
+
503
+ Args:
504
+ text: Text to extract from.
505
+ entities: Optional known entities for matching.
506
+
507
+ Returns:
508
+ List of RelationshipCandidate objects.
509
+ """
510
+ extractor = RelationshipPatternExtractor()
511
+ return extractor.extract_candidates(text, entities)