rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,581 @@
1
+ """
2
+ RNSR Entity Extractor
3
+
4
+ DEPRECATED: This extractor uses LLM-first approach which can hallucinate.
5
+ Use RLMUnifiedExtractor instead for grounded, accurate extraction.
6
+
7
+ LLM-based entity extraction from document sections.
8
+ Extracts people, organizations, dates, legal concepts, and other entities.
9
+
10
+ Features adaptive learning: when the LLM discovers new entity types, they are
11
+ stored in a learned types registry and used in future extraction prompts.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import re
18
+ import time
19
+ import warnings
20
+ from typing import Any
21
+
22
+ import structlog
23
+
24
+ from rnsr.extraction.models import (
25
+ Entity,
26
+ EntityType,
27
+ ExtractionResult,
28
+ Mention,
29
+ )
30
+ from rnsr.extraction.learned_types import (
31
+ get_learned_type_registry,
32
+ record_learned_type,
33
+ )
34
+ from rnsr.llm import get_llm
35
+
36
+ logger = structlog.get_logger(__name__)
37
+
38
+ # Deprecation warning
39
+ _DEPRECATION_WARNING = """
40
+ EntityExtractor is deprecated and may hallucinate entities.
41
+ Use RLMUnifiedExtractor instead for grounded, accurate extraction:
42
+
43
+ from rnsr.extraction import RLMUnifiedExtractor
44
+ extractor = RLMUnifiedExtractor()
45
+ result = extractor.extract(node_id, doc_id, header, content)
46
+ """
47
+
48
+
49
+ # Entity extraction prompt template
50
+ ENTITY_EXTRACTION_PROMPT = """You are an expert entity extractor for legal and business documents.
51
+
52
+ Analyze the following document section and extract all significant entities.
53
+
54
+ Document Section:
55
+ ---
56
+ {content}
57
+ ---
58
+
59
+ Section ID: {node_id}
60
+ Document ID: {doc_id}
61
+ Section Header: {header}
62
+
63
+ Extract entities of the following types:
64
+ - PERSON: Names of individuals, including their roles if mentioned (e.g., "plaintiff", "defendant", "witness", "CEO")
65
+ - ORGANIZATION: Companies, agencies, courts, government bodies
66
+ - LEGAL_CONCEPT: Legal claims, breaches, obligations, remedies, causes of action
67
+ - DATE: Specific dates, time periods, deadlines
68
+ - EVENT: Significant occurrences (hearings, signings, breaches, filings)
69
+ - LOCATION: Places, addresses, jurisdictions
70
+ - REFERENCE: Section references, exhibit numbers, document citations
71
+ - MONETARY: Dollar amounts, financial figures
72
+ - DOCUMENT: Referenced documents (contracts, exhibits, agreements)
73
+ {learned_types_section}
74
+ For each entity, provide:
75
+ 1. type: One of the types above (or your own descriptive type if none fit)
76
+ 2. canonical_name: The standardized/normalized name
77
+ 3. aliases: Any alternative names or spellings found
78
+ 4. context: The surrounding sentence or phrase where the entity appears
79
+ 5. metadata: Any additional relevant information (roles, dates, amounts)
80
+
81
+ Return your response as a JSON array of entities:
82
+ ```json
83
+ [
84
+ {{
85
+ "type": "PERSON",
86
+ "canonical_name": "John Smith",
87
+ "aliases": ["Mr. Smith", "J. Smith"],
88
+ "context": "John Smith, the defendant, filed a motion...",
89
+ "metadata": {{"role": "defendant"}}
90
+ }},
91
+ ...
92
+ ]
93
+ ```
94
+
95
+ If no entities are found, return an empty array: []
96
+
97
+ Important:
98
+ - Be thorough but precise - only extract clearly identifiable entities
99
+ - Normalize names (e.g., "Mr. John Smith" -> "John Smith")
100
+ - Include context that helps understand the entity's role
101
+ - For legal concepts, use standardized legal terminology
102
+ - If an entity doesn't fit the predefined types, use your own descriptive type name
103
+ """
104
+
105
+
106
+ class EntityExtractor:
107
+ """
108
+ DEPRECATED: Extracts entities from document sections using LLM-first approach.
109
+
110
+ This extractor can hallucinate entities. Use RLMUnifiedExtractor instead.
111
+
112
+ Supports batch processing, caching, and adaptive learning of entity types.
113
+ When new entity types are discovered, they are stored and used in future prompts.
114
+ """
115
+
116
+ def __init__(
117
+ self,
118
+ llm: Any | None = None,
119
+ min_content_length: int = 50,
120
+ max_content_length: int = 8000,
121
+ enable_type_learning: bool = True,
122
+ learned_type_min_count: int = 2,
123
+ suppress_deprecation_warning: bool = False,
124
+ ):
125
+ # Emit deprecation warning
126
+ if not suppress_deprecation_warning:
127
+ warnings.warn(
128
+ _DEPRECATION_WARNING,
129
+ DeprecationWarning,
130
+ stacklevel=2,
131
+ )
132
+ logger.warning("deprecated_extractor_used", extractor="EntityExtractor")
133
+ """
134
+ Initialize the entity extractor.
135
+
136
+ Args:
137
+ llm: LLM instance to use. If None, uses get_llm().
138
+ min_content_length: Minimum content length to process.
139
+ max_content_length: Maximum content length per extraction call.
140
+ enable_type_learning: Whether to learn new entity types.
141
+ learned_type_min_count: Minimum occurrences before a learned type
142
+ is included in extraction prompts.
143
+ """
144
+ self.llm = llm or get_llm()
145
+ self.min_content_length = min_content_length
146
+ self.max_content_length = max_content_length
147
+ self.enable_type_learning = enable_type_learning
148
+ self.learned_type_min_count = learned_type_min_count
149
+
150
+ # Cache for extracted entities (node_id -> entities)
151
+ self._cache: dict[str, list[Entity]] = {}
152
+
153
+ # Get learned type registry
154
+ self._type_registry = get_learned_type_registry() if enable_type_learning else None
155
+
156
+ def extract_from_node(
157
+ self,
158
+ node_id: str,
159
+ doc_id: str,
160
+ header: str,
161
+ content: str,
162
+ page_num: int | None = None,
163
+ ) -> ExtractionResult:
164
+ """
165
+ Extract entities from a single document node.
166
+
167
+ Args:
168
+ node_id: Skeleton node ID.
169
+ doc_id: Document ID.
170
+ header: Section header text.
171
+ content: Full section content.
172
+ page_num: Page number if available.
173
+
174
+ Returns:
175
+ ExtractionResult with extracted entities.
176
+ """
177
+ start_time = time.time()
178
+ result = ExtractionResult(
179
+ node_id=node_id,
180
+ doc_id=doc_id,
181
+ extraction_method="llm",
182
+ )
183
+
184
+ # Skip very short content
185
+ if len(content.strip()) < self.min_content_length:
186
+ logger.debug(
187
+ "skipping_short_content",
188
+ node_id=node_id,
189
+ content_length=len(content),
190
+ )
191
+ return result
192
+
193
+ # Check cache
194
+ cache_key = f"{doc_id}:{node_id}"
195
+ if cache_key in self._cache:
196
+ result.entities = self._cache[cache_key]
197
+ logger.debug("using_cached_entities", node_id=node_id)
198
+ return result
199
+
200
+ # Truncate content if too long
201
+ if len(content) > self.max_content_length:
202
+ content = content[:self.max_content_length] + "..."
203
+ result.warnings.append(f"Content truncated from {len(content)} chars")
204
+
205
+ try:
206
+ entities = self._extract_with_llm(
207
+ node_id=node_id,
208
+ doc_id=doc_id,
209
+ header=header,
210
+ content=content,
211
+ page_num=page_num,
212
+ )
213
+ result.entities = entities
214
+
215
+ # Cache results
216
+ self._cache[cache_key] = entities
217
+
218
+ except Exception as e:
219
+ logger.error(
220
+ "entity_extraction_failed",
221
+ node_id=node_id,
222
+ error=str(e),
223
+ )
224
+ result.warnings.append(f"Extraction failed: {str(e)}")
225
+
226
+ result.processing_time_ms = (time.time() - start_time) * 1000
227
+
228
+ logger.info(
229
+ "entities_extracted",
230
+ node_id=node_id,
231
+ entity_count=len(result.entities),
232
+ processing_time_ms=result.processing_time_ms,
233
+ )
234
+
235
+ return result
236
+
237
+ def _extract_with_llm(
238
+ self,
239
+ node_id: str,
240
+ doc_id: str,
241
+ header: str,
242
+ content: str,
243
+ page_num: int | None = None,
244
+ ) -> list[Entity]:
245
+ """
246
+ Use LLM to extract entities from content.
247
+
248
+ Args:
249
+ node_id: Skeleton node ID.
250
+ doc_id: Document ID.
251
+ header: Section header.
252
+ content: Section content.
253
+ page_num: Page number.
254
+
255
+ Returns:
256
+ List of extracted Entity objects.
257
+ """
258
+ # Build learned types section for prompt
259
+ learned_types_section = ""
260
+ if self._type_registry:
261
+ learned_types = self._type_registry.get_types_for_prompt(
262
+ min_count=self.learned_type_min_count,
263
+ limit=15,
264
+ )
265
+ if learned_types:
266
+ types_list = ", ".join(learned_types).upper()
267
+ learned_types_section = f"\nAdditionally, these domain-specific types have been learned from previous documents:\n- {types_list}\n"
268
+
269
+ prompt = ENTITY_EXTRACTION_PROMPT.format(
270
+ content=content,
271
+ node_id=node_id,
272
+ doc_id=doc_id,
273
+ header=header,
274
+ learned_types_section=learned_types_section,
275
+ )
276
+
277
+ # Call LLM
278
+ response = self.llm.complete(prompt)
279
+ response_text = str(response) if not isinstance(response, str) else response
280
+
281
+ # Parse JSON from response
282
+ entities = self._parse_llm_response(
283
+ response_text=response_text,
284
+ node_id=node_id,
285
+ doc_id=doc_id,
286
+ page_num=page_num,
287
+ )
288
+
289
+ return entities
290
+
291
+ def _parse_llm_response(
292
+ self,
293
+ response_text: str,
294
+ node_id: str,
295
+ doc_id: str,
296
+ page_num: int | None = None,
297
+ ) -> list[Entity]:
298
+ """
299
+ Parse LLM response into Entity objects.
300
+
301
+ Args:
302
+ response_text: Raw LLM response.
303
+ node_id: Source node ID.
304
+ doc_id: Source document ID.
305
+ page_num: Page number.
306
+
307
+ Returns:
308
+ List of Entity objects.
309
+ """
310
+ # Extract JSON from response (may be wrapped in markdown code block)
311
+ json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', response_text)
312
+ if json_match:
313
+ json_str = json_match.group(1)
314
+ else:
315
+ # Try to find a JSON array directly
316
+ json_match = re.search(r'\[[\s\S]*\]', response_text)
317
+ if json_match:
318
+ json_str = json_match.group(0)
319
+ else:
320
+ logger.warning(
321
+ "no_json_found_in_response",
322
+ response_preview=response_text[:200],
323
+ )
324
+ return []
325
+
326
+ try:
327
+ raw_entities = json.loads(json_str)
328
+ except json.JSONDecodeError as e:
329
+ logger.warning(
330
+ "json_parse_error",
331
+ error=str(e),
332
+ json_preview=json_str[:200],
333
+ )
334
+ return []
335
+
336
+ if not isinstance(raw_entities, list):
337
+ logger.warning("expected_list_of_entities", got=type(raw_entities).__name__)
338
+ return []
339
+
340
+ entities = []
341
+ for raw in raw_entities:
342
+ try:
343
+ entity = self._create_entity_from_raw(
344
+ raw=raw,
345
+ node_id=node_id,
346
+ doc_id=doc_id,
347
+ page_num=page_num,
348
+ )
349
+ if entity:
350
+ entities.append(entity)
351
+ except Exception as e:
352
+ logger.debug(
353
+ "failed_to_create_entity",
354
+ raw=raw,
355
+ error=str(e),
356
+ )
357
+
358
+ return entities
359
+
360
+ def _create_entity_from_raw(
361
+ self,
362
+ raw: dict[str, Any],
363
+ node_id: str,
364
+ doc_id: str,
365
+ page_num: int | None = None,
366
+ ) -> Entity | None:
367
+ """
368
+ Create an Entity object from raw LLM output.
369
+
370
+ Args:
371
+ raw: Raw entity dict from LLM.
372
+ node_id: Source node ID.
373
+ doc_id: Source document ID.
374
+ page_num: Page number.
375
+
376
+ Returns:
377
+ Entity object or None if invalid.
378
+ """
379
+ # Parse entity type
380
+ type_str = raw.get("type", "").upper()
381
+ original_type = type_str # Preserve for metadata
382
+
383
+ try:
384
+ entity_type = EntityType(type_str.lower())
385
+ except ValueError:
386
+ # Try mapping common variations
387
+ type_mapping = {
388
+ "PERSON": EntityType.PERSON,
389
+ "PEOPLE": EntityType.PERSON,
390
+ "INDIVIDUAL": EntityType.PERSON,
391
+ "NAME": EntityType.PERSON,
392
+ "ORGANIZATION": EntityType.ORGANIZATION,
393
+ "ORG": EntityType.ORGANIZATION,
394
+ "COMPANY": EntityType.ORGANIZATION,
395
+ "AGENCY": EntityType.ORGANIZATION,
396
+ "COURT": EntityType.ORGANIZATION,
397
+ "LEGAL_CONCEPT": EntityType.LEGAL_CONCEPT,
398
+ "LEGAL": EntityType.LEGAL_CONCEPT,
399
+ "CONCEPT": EntityType.LEGAL_CONCEPT,
400
+ "CLAIM": EntityType.LEGAL_CONCEPT,
401
+ "OBLIGATION": EntityType.LEGAL_CONCEPT,
402
+ "DATE": EntityType.DATE,
403
+ "TIME": EntityType.DATE,
404
+ "DATETIME": EntityType.DATE,
405
+ "PERIOD": EntityType.DATE,
406
+ "EVENT": EntityType.EVENT,
407
+ "OCCURRENCE": EntityType.EVENT,
408
+ "INCIDENT": EntityType.EVENT,
409
+ "LOCATION": EntityType.LOCATION,
410
+ "PLACE": EntityType.LOCATION,
411
+ "ADDRESS": EntityType.LOCATION,
412
+ "JURISDICTION": EntityType.LOCATION,
413
+ "REFERENCE": EntityType.REFERENCE,
414
+ "REF": EntityType.REFERENCE,
415
+ "CITATION": EntityType.REFERENCE,
416
+ "SECTION": EntityType.REFERENCE,
417
+ "MONETARY": EntityType.MONETARY,
418
+ "MONEY": EntityType.MONETARY,
419
+ "AMOUNT": EntityType.MONETARY,
420
+ "CURRENCY": EntityType.MONETARY,
421
+ "FINANCIAL": EntityType.MONETARY,
422
+ "DOCUMENT": EntityType.DOCUMENT,
423
+ "DOC": EntityType.DOCUMENT,
424
+ "CONTRACT": EntityType.DOCUMENT,
425
+ "AGREEMENT": EntityType.DOCUMENT,
426
+ "EXHIBIT": EntityType.DOCUMENT,
427
+ }
428
+ entity_type = type_mapping.get(type_str)
429
+
430
+ if not entity_type:
431
+ # Check if we have a learned mapping for this type
432
+ if self._type_registry:
433
+ mappings = self._type_registry.get_mappings()
434
+ if type_str.lower() in mappings:
435
+ mapped_type = mappings[type_str.lower()]
436
+ try:
437
+ entity_type = EntityType(mapped_type.lower())
438
+ logger.debug(
439
+ "using_learned_mapping",
440
+ original=type_str,
441
+ mapped_to=mapped_type,
442
+ )
443
+ except ValueError:
444
+ pass
445
+
446
+ if not entity_type:
447
+ # Use OTHER as fallback - never drop entities
448
+ logger.debug("unmapped_entity_type_using_other", type=type_str)
449
+ entity_type = EntityType.OTHER
450
+
451
+ # Get canonical name
452
+ canonical_name = raw.get("canonical_name", "").strip()
453
+ if not canonical_name:
454
+ canonical_name = raw.get("name", "").strip()
455
+ if not canonical_name:
456
+ return None
457
+
458
+ # Get aliases
459
+ aliases = raw.get("aliases", [])
460
+ if isinstance(aliases, str):
461
+ aliases = [aliases]
462
+ aliases = [a.strip() for a in aliases if a and a.strip()]
463
+
464
+ # Get context
465
+ context = raw.get("context", "").strip()
466
+
467
+ # Get metadata
468
+ metadata = raw.get("metadata", {})
469
+ if not isinstance(metadata, dict):
470
+ metadata = {}
471
+
472
+ # Preserve original type if we used the OTHER fallback
473
+ if entity_type == EntityType.OTHER and original_type:
474
+ metadata["original_type"] = original_type.lower()
475
+
476
+ # Record this type for adaptive learning
477
+ if self._type_registry and self.enable_type_learning:
478
+ self._type_registry.record_type(
479
+ type_name=original_type.lower(),
480
+ context=context,
481
+ entity_name=canonical_name,
482
+ )
483
+
484
+ # Create mention
485
+ mention = Mention(
486
+ node_id=node_id,
487
+ doc_id=doc_id,
488
+ context=context,
489
+ page_num=page_num,
490
+ confidence=1.0,
491
+ )
492
+
493
+ # Create entity
494
+ entity = Entity(
495
+ type=entity_type,
496
+ canonical_name=canonical_name,
497
+ aliases=aliases,
498
+ mentions=[mention],
499
+ metadata=metadata,
500
+ source_doc_id=doc_id,
501
+ )
502
+
503
+ return entity
504
+
505
+ def extract_batch(
506
+ self,
507
+ nodes: list[dict[str, Any]],
508
+ ) -> list[ExtractionResult]:
509
+ """
510
+ Extract entities from multiple nodes.
511
+
512
+ Args:
513
+ nodes: List of node dicts with keys: node_id, doc_id, header, content, page_num
514
+
515
+ Returns:
516
+ List of ExtractionResult objects.
517
+ """
518
+ results = []
519
+
520
+ for node in nodes:
521
+ result = self.extract_from_node(
522
+ node_id=node.get("node_id", ""),
523
+ doc_id=node.get("doc_id", ""),
524
+ header=node.get("header", ""),
525
+ content=node.get("content", ""),
526
+ page_num=node.get("page_num"),
527
+ )
528
+ results.append(result)
529
+
530
+ return results
531
+
532
+ def clear_cache(self) -> None:
533
+ """Clear the entity cache."""
534
+ self._cache.clear()
535
+
536
+
537
+ def merge_entities(entities: list[Entity]) -> list[Entity]:
538
+ """
539
+ Merge duplicate entities based on canonical name and type.
540
+
541
+ Combines mentions and aliases from duplicates.
542
+
543
+ Args:
544
+ entities: List of entities to merge.
545
+
546
+ Returns:
547
+ Deduplicated list of entities.
548
+ """
549
+ # Group by (type, normalized canonical_name)
550
+ grouped: dict[tuple[EntityType, str], list[Entity]] = {}
551
+
552
+ for entity in entities:
553
+ key = (entity.type, entity.canonical_name.lower().strip())
554
+ if key not in grouped:
555
+ grouped[key] = []
556
+ grouped[key].append(entity)
557
+
558
+ # Merge each group
559
+ merged = []
560
+ for entities_group in grouped.values():
561
+ if len(entities_group) == 1:
562
+ merged.append(entities_group[0])
563
+ else:
564
+ # Merge into first entity
565
+ primary = entities_group[0]
566
+ for other in entities_group[1:]:
567
+ # Merge mentions
568
+ primary.mentions.extend(other.mentions)
569
+
570
+ # Merge aliases
571
+ for alias in other.aliases:
572
+ primary.add_alias(alias)
573
+
574
+ # Merge metadata (prefer primary's values on conflict)
575
+ for k, v in other.metadata.items():
576
+ if k not in primary.metadata:
577
+ primary.metadata[k] = v
578
+
579
+ merged.append(primary)
580
+
581
+ return merged