rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,589 @@
1
+ """
2
+ RNSR RLM Entity Extractor
3
+
4
+ Implements the TRUE RLM pattern for entity extraction:
5
+ 1. LLM writes its own regex/Python code based on the document
6
+ 2. Code executes on DOC_VAR (grounded in actual text)
7
+ 3. LLM validates and classifies results
8
+
9
+ This is more powerful than pre-defined patterns because:
10
+ - LLM adapts to domain-specific patterns it discovers
11
+ - Can write complex extraction logic we didn't anticipate
12
+ - Still grounded because code executes on actual text
13
+ - Recursive - can use sub_llm for complex validation
14
+
15
+ From the RLM paper:
16
+ "The Neural Network generates code to interact with the document,
17
+ rather than having the document in its context window."
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import re
24
+ import time
25
+ from dataclasses import dataclass, field
26
+ from typing import Any, TYPE_CHECKING
27
+
28
+ import structlog
29
+
30
+ from rnsr.extraction.models import Entity, EntityType, ExtractionResult, Mention
31
+ from rnsr.extraction.learned_types import get_learned_type_registry
32
+ from rnsr.llm import get_llm
33
+
34
+ if TYPE_CHECKING:
35
+ from rnsr.agent.repl_env import REPLEnvironment
36
+ from rnsr.models import DocumentTree
37
+
38
+ logger = structlog.get_logger(__name__)
39
+
40
+
41
+ # =============================================================================
42
+ # RLM Extraction Prompts
43
+ # =============================================================================
44
+
45
+ RLM_ENTITY_EXTRACTION_SYSTEM = """You are an RLM (Recursive Language Model) extracting entities from a document.
46
+
47
+ CRITICAL: You do NOT have the full document in context. It is stored in DOC_VAR.
48
+ You must write Python code to extract entities from DOC_VAR.
49
+
50
+ ## Available Variables:
51
+ - DOC_VAR: The document text (string). Use slicing, regex, etc.
52
+ - SECTION_CONTENT: Current section content (string, smaller than DOC_VAR)
53
+
54
+ ## Available Functions:
55
+ - search_text(pattern): Search DOC_VAR for regex pattern, returns list of (start, end, match)
56
+ - len(DOC_VAR): Get document length
57
+ - DOC_VAR[i:j]: Slice document
58
+ - re.findall(pattern, text): Standard regex
59
+ - re.finditer(pattern, text): Iterate matches with positions
60
+ - store_variable(name, content): Store findings for later
61
+
62
+ ## Your Task:
63
+ Extract entities (people, organizations, dates, etc.) by writing Python code.
64
+
65
+ IMPORTANT: Your code should:
66
+ 1. Write regex patterns tailored to THIS document
67
+ 2. Execute patterns to find matches (grounded in text)
68
+ 3. Return structured results with exact positions
69
+
70
+ ## Output Format:
71
+ Write Python code that produces a list of entity dictionaries:
72
+ ```python
73
+ entities = []
74
+
75
+ # Example: Find person names with titles
76
+ for match in re.finditer(r'(?:Mr\.|Mrs\.|Dr\.)\s+([A-Z][a-z]+\s+[A-Z][a-z]+)', SECTION_CONTENT):
77
+ entities.append({
78
+ "text": match.group(),
79
+ "canonical_name": match.group(1), # Without title
80
+ "type": "PERSON",
81
+ "start": match.start(),
82
+ "end": match.end(),
83
+ "confidence": 0.9
84
+ })
85
+
86
+ # Example: Find dollar amounts
87
+ for match in re.finditer(r'\$[\d,]+(?:\.\d{2})?(?:\s*(?:million|billion))?', SECTION_CONTENT):
88
+ entities.append({
89
+ "text": match.group(),
90
+ "type": "MONETARY",
91
+ "start": match.start(),
92
+ "end": match.end(),
93
+ "confidence": 0.95
94
+ })
95
+
96
+ # Store results
97
+ store_variable("ENTITIES", entities)
98
+ ```
99
+
100
+ Write code appropriate for the document type and content shown."""
101
+
102
+
103
+ RLM_EXTRACTION_PROMPT = """Document section to extract entities from:
104
+
105
+ Section Header: {header}
106
+ Section Content (first 2000 chars):
107
+ ---
108
+ {content_preview}
109
+ ---
110
+
111
+ Total section length: {content_length} characters
112
+
113
+ Based on this content, write Python code to extract all significant entities.
114
+ Consider:
115
+ 1. What types of entities appear in this document? (people, companies, dates, money, etc.)
116
+ 2. What patterns would match them? (titles, suffixes, formats, etc.)
117
+ 3. Are there domain-specific entities? (legal terms, technical concepts, etc.)
118
+
119
+ Write Python code that will execute on SECTION_CONTENT to extract entities.
120
+ End your code with: store_variable("ENTITIES", entities)"""
121
+
122
+
123
+ RLM_VALIDATION_PROMPT = """You extracted these entity candidates from the document.
124
+ Validate each one and determine if it's a real, significant entity.
125
+
126
+ Candidates:
127
+ {candidates_json}
128
+
129
+ For each candidate, provide:
130
+ 1. valid: true if significant entity, false if noise
131
+ 2. type: Entity type (PERSON, ORGANIZATION, DATE, MONETARY, LOCATION, etc.)
132
+ 3. canonical_name: Cleaned/normalized name
133
+ 4. confidence: 0.0-1.0
134
+
135
+ Return JSON array:
136
+ ```json
137
+ [
138
+ {{"id": 0, "valid": true, "type": "PERSON", "canonical_name": "John Smith", "confidence": 0.9}},
139
+ {{"id": 1, "valid": false, "reason": "Generic term, not specific entity"}}
140
+ ]
141
+ ```"""
142
+
143
+
144
+ # =============================================================================
145
+ # Lightweight REPL for Extraction (if full REPL not available)
146
+ # =============================================================================
147
+
148
+ class LightweightREPL:
149
+ """
150
+ Lightweight REPL for entity extraction.
151
+
152
+ Provides the core DOC_VAR + code execution pattern
153
+ without the full REPL infrastructure.
154
+ """
155
+
156
+ def __init__(self, document_text: str, section_content: str = ""):
157
+ """Initialize with document text."""
158
+ self.document_text = document_text
159
+ self.section_content = section_content or document_text
160
+ self.variables: dict[str, Any] = {}
161
+
162
+ self._namespace = self._build_namespace()
163
+
164
+ def _build_namespace(self) -> dict[str, Any]:
165
+ """Build Python namespace for code execution."""
166
+ return {
167
+ # Core variables
168
+ "DOC_VAR": self.document_text,
169
+ "SECTION_CONTENT": self.section_content,
170
+ "VARIABLES": self.variables,
171
+
172
+ # Built-ins
173
+ "len": len,
174
+ "str": str,
175
+ "int": int,
176
+ "float": float,
177
+ "list": list,
178
+ "dict": dict,
179
+ "range": range,
180
+ "enumerate": enumerate,
181
+ "sorted": sorted,
182
+ "min": min,
183
+ "max": max,
184
+ "re": re,
185
+
186
+ # Functions
187
+ "search_text": self._search_text,
188
+ "store_variable": self._store_variable,
189
+ "get_variable": self._get_variable,
190
+ }
191
+
192
+ def _search_text(self, pattern: str) -> list[tuple[int, int, str]]:
193
+ """Search document for regex pattern."""
194
+ results = []
195
+ try:
196
+ for match in re.finditer(pattern, self.section_content, re.IGNORECASE):
197
+ results.append((match.start(), match.end(), match.group()))
198
+ except re.error as e:
199
+ logger.warning("regex_error", pattern=pattern, error=str(e))
200
+ return results
201
+
202
+ def _store_variable(self, name: str, content: Any) -> str:
203
+ """Store a variable."""
204
+ self.variables[name] = content
205
+ return f"Stored ${name}"
206
+
207
+ def _get_variable(self, name: str) -> Any:
208
+ """Retrieve a variable."""
209
+ return self.variables.get(name)
210
+
211
+ def execute(self, code: str) -> dict[str, Any]:
212
+ """Execute Python code."""
213
+ result = {
214
+ "success": False,
215
+ "output": None,
216
+ "error": None,
217
+ "variables": list(self.variables.keys()),
218
+ }
219
+
220
+ # Clean code
221
+ code = self._clean_code(code)
222
+
223
+ try:
224
+ # Compile and execute
225
+ compiled = compile(code, "<rlm_extraction>", "exec")
226
+ exec(compiled, self._namespace)
227
+
228
+ result["success"] = True
229
+ result["variables"] = list(self.variables.keys())
230
+ result["output"] = self.variables.get("ENTITIES", [])
231
+
232
+ except Exception as e:
233
+ result["error"] = str(e)
234
+ logger.warning("rlm_execution_error", error=str(e), code=code[:200])
235
+
236
+ return result
237
+
238
+ def _clean_code(self, code: str) -> str:
239
+ """Remove markdown code blocks."""
240
+ code = re.sub(r'^```python\s*', '', code, flags=re.MULTILINE)
241
+ code = re.sub(r'^```\s*$', '', code, flags=re.MULTILINE)
242
+ return code.strip()
243
+
244
+
245
+ # =============================================================================
246
+ # RLM Entity Extractor
247
+ # =============================================================================
248
+
249
+ @dataclass
250
+ class RLMExtractionResult:
251
+ """Result of RLM-based extraction."""
252
+
253
+ entities: list[Entity] = field(default_factory=list)
254
+ code_generated: str = ""
255
+ code_executed: bool = False
256
+ execution_output: Any = None
257
+ raw_candidates: list[dict] = field(default_factory=list)
258
+ processing_time_ms: float = 0.0
259
+ warnings: list[str] = field(default_factory=list)
260
+
261
+
262
+ class RLMEntityExtractor:
263
+ """
264
+ RLM-based entity extractor.
265
+
266
+ The LLM writes its own extraction code based on the document,
267
+ then the code executes on DOC_VAR (grounded).
268
+
269
+ Flow:
270
+ 1. Show LLM a preview of the document
271
+ 2. LLM writes Python code to extract entities
272
+ 3. Code executes on actual document (grounded)
273
+ 4. LLM validates/classifies the extracted candidates
274
+ """
275
+
276
+ def __init__(
277
+ self,
278
+ llm: Any | None = None,
279
+ repl_environment: "REPLEnvironment | None" = None,
280
+ enable_type_learning: bool = True,
281
+ max_code_attempts: int = 3,
282
+ validate_with_llm: bool = True,
283
+ ):
284
+ """
285
+ Initialize the RLM extractor.
286
+
287
+ Args:
288
+ llm: LLM instance.
289
+ repl_environment: Optional full REPL environment.
290
+ enable_type_learning: Learn new entity types.
291
+ max_code_attempts: Max attempts if code fails.
292
+ validate_with_llm: Validate candidates with LLM.
293
+ """
294
+ self.llm = llm
295
+ self.repl_environment = repl_environment
296
+ self.enable_type_learning = enable_type_learning
297
+ self.max_code_attempts = max_code_attempts
298
+ self.validate_with_llm = validate_with_llm
299
+
300
+ self._llm_initialized = False
301
+ self._type_registry = get_learned_type_registry() if enable_type_learning else None
302
+
303
+ def _get_llm(self) -> Any:
304
+ """Get or initialize LLM."""
305
+ if self.llm is None and not self._llm_initialized:
306
+ self.llm = get_llm()
307
+ self._llm_initialized = True
308
+ return self.llm
309
+
310
+ def extract_from_node(
311
+ self,
312
+ node_id: str,
313
+ doc_id: str,
314
+ header: str,
315
+ content: str,
316
+ page_num: int | None = None,
317
+ document_text: str | None = None,
318
+ ) -> ExtractionResult:
319
+ """
320
+ Extract entities using RLM approach.
321
+
322
+ The LLM writes code to extract entities, which is then
323
+ executed on the actual document text.
324
+
325
+ Args:
326
+ node_id: Section node ID.
327
+ doc_id: Document ID.
328
+ header: Section header.
329
+ content: Section content.
330
+ page_num: Page number.
331
+ document_text: Full document text for DOC_VAR.
332
+
333
+ Returns:
334
+ ExtractionResult with extracted entities.
335
+ """
336
+ start_time = time.time()
337
+
338
+ result = ExtractionResult(
339
+ node_id=node_id,
340
+ doc_id=doc_id,
341
+ extraction_method="rlm",
342
+ )
343
+
344
+ if len(content.strip()) < 50:
345
+ return result
346
+
347
+ llm = self._get_llm()
348
+ if llm is None:
349
+ result.warnings.append("No LLM available for RLM extraction")
350
+ return result
351
+
352
+ # STEP 1: LLM generates extraction code
353
+ rlm_result = self._generate_and_execute_code(
354
+ header=header,
355
+ content=content,
356
+ document_text=document_text or content,
357
+ )
358
+
359
+ if not rlm_result.code_executed:
360
+ result.warnings.append(f"Code execution failed: {rlm_result.warnings}")
361
+ return result
362
+
363
+ # STEP 2: Validate candidates with LLM
364
+ if self.validate_with_llm and rlm_result.raw_candidates:
365
+ validated = self._validate_candidates(rlm_result.raw_candidates)
366
+ else:
367
+ validated = rlm_result.raw_candidates
368
+
369
+ # STEP 3: Convert to Entity objects
370
+ entities = self._candidates_to_entities(
371
+ candidates=validated,
372
+ node_id=node_id,
373
+ doc_id=doc_id,
374
+ content=content,
375
+ page_num=page_num,
376
+ )
377
+
378
+ result.entities = entities
379
+ result.processing_time_ms = (time.time() - start_time) * 1000
380
+
381
+ logger.info(
382
+ "rlm_extraction_complete",
383
+ node_id=node_id,
384
+ candidates=len(rlm_result.raw_candidates),
385
+ validated=len(entities),
386
+ time_ms=result.processing_time_ms,
387
+ )
388
+
389
+ return result
390
+
391
+ def _generate_and_execute_code(
392
+ self,
393
+ header: str,
394
+ content: str,
395
+ document_text: str,
396
+ ) -> RLMExtractionResult:
397
+ """Generate extraction code and execute it."""
398
+ result = RLMExtractionResult()
399
+
400
+ llm = self._get_llm()
401
+
402
+ # Create REPL environment
403
+ repl = self.repl_environment or LightweightREPL(
404
+ document_text=document_text,
405
+ section_content=content,
406
+ )
407
+
408
+ # Generate code prompt
409
+ prompt = f"{RLM_ENTITY_EXTRACTION_SYSTEM}\n\n{RLM_EXTRACTION_PROMPT.format(
410
+ header=header,
411
+ content_preview=content[:2000],
412
+ content_length=len(content),
413
+ )}"
414
+
415
+ for attempt in range(self.max_code_attempts):
416
+ try:
417
+ # LLM generates extraction code
418
+ response = llm.complete(prompt)
419
+ code = str(response) if not isinstance(response, str) else response
420
+ result.code_generated = code
421
+
422
+ # Execute the code
423
+ exec_result = repl.execute(code)
424
+
425
+ if exec_result["success"]:
426
+ result.code_executed = True
427
+ result.execution_output = exec_result["output"]
428
+ result.raw_candidates = exec_result.get("output", [])
429
+
430
+ # Get ENTITIES from variables if not in output
431
+ if not result.raw_candidates and hasattr(repl, 'variables'):
432
+ result.raw_candidates = repl.variables.get("ENTITIES", [])
433
+
434
+ break
435
+ else:
436
+ result.warnings.append(f"Attempt {attempt + 1}: {exec_result['error']}")
437
+ # Add error to prompt for retry
438
+ prompt += f"\n\nPrevious code had error: {exec_result['error']}\nPlease fix and try again."
439
+
440
+ except Exception as e:
441
+ result.warnings.append(f"Attempt {attempt + 1}: {str(e)}")
442
+
443
+ return result
444
+
445
+ def _validate_candidates(
446
+ self,
447
+ candidates: list[dict],
448
+ ) -> list[dict]:
449
+ """Validate extracted candidates with LLM."""
450
+ if not candidates:
451
+ return []
452
+
453
+ llm = self._get_llm()
454
+
455
+ # Format candidates for validation
456
+ candidates_json = json.dumps([
457
+ {
458
+ "id": i,
459
+ "text": c.get("text", ""),
460
+ "type": c.get("type", "UNKNOWN"),
461
+ "context": c.get("context", "")[:100] if c.get("context") else "",
462
+ }
463
+ for i, c in enumerate(candidates[:30]) # Limit
464
+ ], indent=2)
465
+
466
+ prompt = RLM_VALIDATION_PROMPT.format(candidates_json=candidates_json)
467
+
468
+ try:
469
+ response = llm.complete(prompt)
470
+ response_text = str(response) if not isinstance(response, str) else response
471
+
472
+ # Parse validation response
473
+ json_match = re.search(r'\[[\s\S]*\]', response_text)
474
+ if not json_match:
475
+ return candidates
476
+
477
+ validations = json.loads(json_match.group())
478
+
479
+ # Merge validations with candidates
480
+ validated = []
481
+ validation_by_id = {v.get("id"): v for v in validations}
482
+
483
+ for i, candidate in enumerate(candidates):
484
+ validation = validation_by_id.get(i, {})
485
+
486
+ if validation.get("valid", True):
487
+ candidate["type"] = validation.get("type", candidate.get("type", "OTHER"))
488
+ candidate["canonical_name"] = validation.get("canonical_name", candidate.get("text", ""))
489
+ candidate["confidence"] = validation.get("confidence", candidate.get("confidence", 0.5))
490
+ validated.append(candidate)
491
+
492
+ return validated
493
+
494
+ except Exception as e:
495
+ logger.warning("rlm_validation_failed", error=str(e))
496
+ return candidates
497
+
498
+ def _candidates_to_entities(
499
+ self,
500
+ candidates: list[dict],
501
+ node_id: str,
502
+ doc_id: str,
503
+ content: str,
504
+ page_num: int | None,
505
+ ) -> list[Entity]:
506
+ """Convert validated candidates to Entity objects."""
507
+ entities = []
508
+
509
+ for candidate in candidates:
510
+ if not candidate.get("text"):
511
+ continue
512
+
513
+ # Map entity type
514
+ entity_type = self._map_entity_type(candidate.get("type", "OTHER"))
515
+
516
+ # Learn new types
517
+ if entity_type == EntityType.OTHER and self._type_registry:
518
+ self._type_registry.record_type(
519
+ type_name=candidate.get("type", "unknown").lower(),
520
+ context=candidate.get("context", content[:100]),
521
+ entity_name=candidate.get("text", ""),
522
+ )
523
+
524
+ # Create mention
525
+ mention = Mention(
526
+ node_id=node_id,
527
+ doc_id=doc_id,
528
+ span_start=candidate.get("start"),
529
+ span_end=candidate.get("end"),
530
+ context=candidate.get("context", content[
531
+ max(0, (candidate.get("start") or 0) - 50):
532
+ (candidate.get("end") or 0) + 50
533
+ ]),
534
+ page_num=page_num,
535
+ confidence=candidate.get("confidence", 0.5),
536
+ )
537
+
538
+ # Build metadata
539
+ metadata = {
540
+ "rlm_extracted": True,
541
+ "grounded": candidate.get("start") is not None,
542
+ }
543
+
544
+ if entity_type == EntityType.OTHER:
545
+ metadata["original_type"] = candidate.get("type", "").lower()
546
+
547
+ entity = Entity(
548
+ type=entity_type,
549
+ canonical_name=candidate.get("canonical_name", candidate.get("text", "")),
550
+ aliases=[candidate.get("text")] if candidate.get("canonical_name") != candidate.get("text") else [],
551
+ mentions=[mention],
552
+ metadata=metadata,
553
+ source_doc_id=doc_id,
554
+ )
555
+ entities.append(entity)
556
+
557
+ return entities
558
+
559
+ def _map_entity_type(self, type_str: str) -> EntityType:
560
+ """Map type string to EntityType enum."""
561
+ type_str = type_str.upper()
562
+
563
+ mapping = {
564
+ "PERSON": EntityType.PERSON,
565
+ "PEOPLE": EntityType.PERSON,
566
+ "NAME": EntityType.PERSON,
567
+ "ORGANIZATION": EntityType.ORGANIZATION,
568
+ "ORG": EntityType.ORGANIZATION,
569
+ "COMPANY": EntityType.ORGANIZATION,
570
+ "DATE": EntityType.DATE,
571
+ "TIME": EntityType.DATE,
572
+ "MONETARY": EntityType.MONETARY,
573
+ "MONEY": EntityType.MONETARY,
574
+ "AMOUNT": EntityType.MONETARY,
575
+ "LOCATION": EntityType.LOCATION,
576
+ "PLACE": EntityType.LOCATION,
577
+ "ADDRESS": EntityType.LOCATION,
578
+ "REFERENCE": EntityType.REFERENCE,
579
+ "CITATION": EntityType.REFERENCE,
580
+ "DOCUMENT": EntityType.DOCUMENT,
581
+ "EVENT": EntityType.EVENT,
582
+ "LEGAL_CONCEPT": EntityType.LEGAL_CONCEPT,
583
+ "LEGAL": EntityType.LEGAL_CONCEPT,
584
+ }
585
+
586
+ try:
587
+ return EntityType(type_str.lower())
588
+ except ValueError:
589
+ return mapping.get(type_str, EntityType.OTHER)