rnsr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. rnsr/__init__.py +118 -0
  2. rnsr/__main__.py +242 -0
  3. rnsr/agent/__init__.py +218 -0
  4. rnsr/agent/cross_doc_navigator.py +767 -0
  5. rnsr/agent/graph.py +1557 -0
  6. rnsr/agent/llm_cache.py +575 -0
  7. rnsr/agent/navigator_api.py +497 -0
  8. rnsr/agent/provenance.py +772 -0
  9. rnsr/agent/query_clarifier.py +617 -0
  10. rnsr/agent/reasoning_memory.py +736 -0
  11. rnsr/agent/repl_env.py +709 -0
  12. rnsr/agent/rlm_navigator.py +2108 -0
  13. rnsr/agent/self_reflection.py +602 -0
  14. rnsr/agent/variable_store.py +308 -0
  15. rnsr/benchmarks/__init__.py +118 -0
  16. rnsr/benchmarks/comprehensive_benchmark.py +733 -0
  17. rnsr/benchmarks/evaluation_suite.py +1210 -0
  18. rnsr/benchmarks/finance_bench.py +147 -0
  19. rnsr/benchmarks/pdf_merger.py +178 -0
  20. rnsr/benchmarks/performance.py +321 -0
  21. rnsr/benchmarks/quality.py +321 -0
  22. rnsr/benchmarks/runner.py +298 -0
  23. rnsr/benchmarks/standard_benchmarks.py +995 -0
  24. rnsr/client.py +560 -0
  25. rnsr/document_store.py +394 -0
  26. rnsr/exceptions.py +74 -0
  27. rnsr/extraction/__init__.py +172 -0
  28. rnsr/extraction/candidate_extractor.py +357 -0
  29. rnsr/extraction/entity_extractor.py +581 -0
  30. rnsr/extraction/entity_linker.py +825 -0
  31. rnsr/extraction/grounded_extractor.py +722 -0
  32. rnsr/extraction/learned_types.py +599 -0
  33. rnsr/extraction/models.py +232 -0
  34. rnsr/extraction/relationship_extractor.py +600 -0
  35. rnsr/extraction/relationship_patterns.py +511 -0
  36. rnsr/extraction/relationship_validator.py +392 -0
  37. rnsr/extraction/rlm_extractor.py +589 -0
  38. rnsr/extraction/rlm_unified_extractor.py +990 -0
  39. rnsr/extraction/tot_validator.py +610 -0
  40. rnsr/extraction/unified_extractor.py +342 -0
  41. rnsr/indexing/__init__.py +60 -0
  42. rnsr/indexing/knowledge_graph.py +1128 -0
  43. rnsr/indexing/kv_store.py +313 -0
  44. rnsr/indexing/persistence.py +323 -0
  45. rnsr/indexing/semantic_retriever.py +237 -0
  46. rnsr/indexing/semantic_search.py +320 -0
  47. rnsr/indexing/skeleton_index.py +395 -0
  48. rnsr/ingestion/__init__.py +161 -0
  49. rnsr/ingestion/chart_parser.py +569 -0
  50. rnsr/ingestion/document_boundary.py +662 -0
  51. rnsr/ingestion/font_histogram.py +334 -0
  52. rnsr/ingestion/header_classifier.py +595 -0
  53. rnsr/ingestion/hierarchical_cluster.py +515 -0
  54. rnsr/ingestion/layout_detector.py +356 -0
  55. rnsr/ingestion/layout_model.py +379 -0
  56. rnsr/ingestion/ocr_fallback.py +177 -0
  57. rnsr/ingestion/pipeline.py +936 -0
  58. rnsr/ingestion/semantic_fallback.py +417 -0
  59. rnsr/ingestion/table_parser.py +799 -0
  60. rnsr/ingestion/text_builder.py +460 -0
  61. rnsr/ingestion/tree_builder.py +402 -0
  62. rnsr/ingestion/vision_retrieval.py +965 -0
  63. rnsr/ingestion/xy_cut.py +555 -0
  64. rnsr/llm.py +733 -0
  65. rnsr/models.py +167 -0
  66. rnsr/py.typed +2 -0
  67. rnsr-0.1.0.dist-info/METADATA +592 -0
  68. rnsr-0.1.0.dist-info/RECORD +72 -0
  69. rnsr-0.1.0.dist-info/WHEEL +5 -0
  70. rnsr-0.1.0.dist-info/entry_points.txt +2 -0
  71. rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
  72. rnsr-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,313 @@
1
+ """
2
+ KV Store - SQLite-Backed Key-Value Storage for Full Text
3
+
4
+ The Skeleton Index pattern requires separating:
5
+ - Summaries (stored in vector index for retrieval)
6
+ - Full Text (stored externally in this KV Store)
7
+
8
+ This prevents full text from polluting the LLM context until
9
+ explicitly requested during synthesis.
10
+
11
+ Usage:
12
+ kv = SQLiteKVStore("./data/document_kv.db")
13
+ kv.put("node_123", "Full text content here...")
14
+ content = kv.get("node_123")
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import hashlib
20
+ import sqlite3
21
+ from contextlib import contextmanager
22
+ from pathlib import Path
23
+ from typing import Iterator, Union
24
+
25
+ import structlog
26
+
27
+ from rnsr.exceptions import IndexingError
28
+
29
+ logger = structlog.get_logger(__name__)
30
+
31
+
32
+ class SQLiteKVStore:
33
+ """
34
+ SQLite-backed key-value store for document content.
35
+
36
+ Stores full text content separately from the vector index,
37
+ allowing the skeleton index to contain only summaries.
38
+
39
+ Attributes:
40
+ db_path: Path to the SQLite database file.
41
+ """
42
+
43
+ def __init__(self, db_path: Path | str):
44
+ """
45
+ Initialize the KV store.
46
+
47
+ Args:
48
+ db_path: Path to the SQLite database file.
49
+ Will be created if it doesn't exist.
50
+ """
51
+ self.db_path = Path(db_path)
52
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
53
+
54
+ self._init_db()
55
+
56
+ logger.info("kv_store_initialized", db_path=str(self.db_path))
57
+
58
+ def _init_db(self) -> None:
59
+ """Create the database schema if it doesn't exist."""
60
+ with self._connect() as conn:
61
+ conn.execute("""
62
+ CREATE TABLE IF NOT EXISTS documents (
63
+ node_id TEXT PRIMARY KEY,
64
+ content TEXT NOT NULL,
65
+ content_hash TEXT NOT NULL,
66
+ char_count INTEGER NOT NULL,
67
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
68
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
69
+ )
70
+ """)
71
+
72
+ conn.execute("""
73
+ CREATE INDEX IF NOT EXISTS idx_content_hash
74
+ ON documents(content_hash)
75
+ """)
76
+
77
+ conn.commit()
78
+
79
+ @contextmanager
80
+ def _connect(self) -> Iterator[sqlite3.Connection]:
81
+ """Context manager for database connections."""
82
+ conn = sqlite3.connect(self.db_path)
83
+ conn.row_factory = sqlite3.Row
84
+ try:
85
+ yield conn
86
+ finally:
87
+ conn.close()
88
+
89
+ def put(self, node_id: str, content: str) -> str:
90
+ """
91
+ Store content for a node.
92
+
93
+ Args:
94
+ node_id: Unique identifier for the node.
95
+ content: Full text content to store.
96
+
97
+ Returns:
98
+ SHA256 hash of the content.
99
+ """
100
+ content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
101
+ char_count = len(content)
102
+
103
+ with self._connect() as conn:
104
+ conn.execute(
105
+ """
106
+ INSERT INTO documents (node_id, content, content_hash, char_count)
107
+ VALUES (?, ?, ?, ?)
108
+ ON CONFLICT(node_id) DO UPDATE SET
109
+ content = excluded.content,
110
+ content_hash = excluded.content_hash,
111
+ char_count = excluded.char_count,
112
+ updated_at = CURRENT_TIMESTAMP
113
+ """,
114
+ (node_id, content, content_hash, char_count),
115
+ )
116
+ conn.commit()
117
+
118
+ logger.debug(
119
+ "kv_put",
120
+ node_id=node_id,
121
+ char_count=char_count,
122
+ hash=content_hash,
123
+ )
124
+
125
+ return content_hash
126
+
127
+ def get(self, node_id: str) -> str | None:
128
+ """
129
+ Retrieve content for a node.
130
+
131
+ Args:
132
+ node_id: Unique identifier for the node.
133
+
134
+ Returns:
135
+ Full text content, or None if not found.
136
+ """
137
+ with self._connect() as conn:
138
+ cursor = conn.execute(
139
+ "SELECT content FROM documents WHERE node_id = ?",
140
+ (node_id,),
141
+ )
142
+ row = cursor.fetchone()
143
+
144
+ if row is None:
145
+ logger.debug("kv_miss", node_id=node_id)
146
+ return None
147
+
148
+ logger.debug("kv_hit", node_id=node_id)
149
+ return row["content"]
150
+
151
+ def get_batch(self, node_ids: list[str]) -> dict[str, str | None]:
152
+ """
153
+ Retrieve content for multiple nodes.
154
+
155
+ Args:
156
+ node_ids: List of node identifiers.
157
+
158
+ Returns:
159
+ Dictionary mapping node_id to content (or None if not found).
160
+ """
161
+ result: dict[str, str | None] = {nid: None for nid in node_ids}
162
+
163
+ if not node_ids:
164
+ return result
165
+
166
+ placeholders = ",".join("?" * len(node_ids))
167
+
168
+ with self._connect() as conn:
169
+ cursor = conn.execute(
170
+ f"SELECT node_id, content FROM documents WHERE node_id IN ({placeholders})",
171
+ node_ids,
172
+ )
173
+ for row in cursor:
174
+ result[row["node_id"]] = row["content"]
175
+
176
+ found = sum(1 for v in result.values() if v is not None)
177
+ logger.debug("kv_batch_get", requested=len(node_ids), found=found)
178
+
179
+ return result
180
+
181
+ def delete(self, node_id: str) -> bool:
182
+ """
183
+ Delete content for a node.
184
+
185
+ Args:
186
+ node_id: Unique identifier for the node.
187
+
188
+ Returns:
189
+ True if deleted, False if not found.
190
+ """
191
+ with self._connect() as conn:
192
+ cursor = conn.execute(
193
+ "DELETE FROM documents WHERE node_id = ?",
194
+ (node_id,),
195
+ )
196
+ conn.commit()
197
+ deleted = cursor.rowcount > 0
198
+
199
+ logger.debug("kv_delete", node_id=node_id, deleted=deleted)
200
+ return deleted
201
+
202
+ def exists(self, node_id: str) -> bool:
203
+ """Check if a node exists in the store."""
204
+ with self._connect() as conn:
205
+ cursor = conn.execute(
206
+ "SELECT 1 FROM documents WHERE node_id = ? LIMIT 1",
207
+ (node_id,),
208
+ )
209
+ return cursor.fetchone() is not None
210
+
211
+ def count(self) -> int:
212
+ """Get the total number of stored documents."""
213
+ with self._connect() as conn:
214
+ cursor = conn.execute("SELECT COUNT(*) FROM documents")
215
+ return cursor.fetchone()[0]
216
+
217
+ def get_metadata(self, node_id: str) -> dict | None:
218
+ """
219
+ Get metadata about a stored document.
220
+
221
+ Args:
222
+ node_id: Unique identifier for the node.
223
+
224
+ Returns:
225
+ Dictionary with hash, char_count, timestamps, or None.
226
+ """
227
+ with self._connect() as conn:
228
+ cursor = conn.execute(
229
+ """
230
+ SELECT content_hash, char_count, created_at, updated_at
231
+ FROM documents WHERE node_id = ?
232
+ """,
233
+ (node_id,),
234
+ )
235
+ row = cursor.fetchone()
236
+
237
+ if row is None:
238
+ return None
239
+
240
+ return {
241
+ "content_hash": row["content_hash"],
242
+ "char_count": row["char_count"],
243
+ "created_at": row["created_at"],
244
+ "updated_at": row["updated_at"],
245
+ }
246
+
247
+ def clear(self) -> int:
248
+ """
249
+ Delete all documents from the store.
250
+
251
+ Returns:
252
+ Number of documents deleted.
253
+ """
254
+ with self._connect() as conn:
255
+ cursor = conn.execute("DELETE FROM documents")
256
+ count = cursor.rowcount
257
+ conn.commit()
258
+
259
+ logger.warning("kv_store_cleared", count=count)
260
+ return count
261
+
262
+
263
+ class InMemoryKVStore:
264
+ """
265
+ In-memory key-value store for testing and ephemeral usage.
266
+
267
+ API-compatible with SQLiteKVStore.
268
+ """
269
+
270
+ def __init__(self):
271
+ self._store: dict[str, str] = {}
272
+ self._metadata: dict[str, dict] = {}
273
+
274
+ def put(self, node_id: str, content: str) -> str:
275
+ content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
276
+ self._store[node_id] = content
277
+ self._metadata[node_id] = {
278
+ "content_hash": content_hash,
279
+ "char_count": len(content),
280
+ }
281
+ return content_hash
282
+
283
+ def get(self, node_id: str) -> str | None:
284
+ return self._store.get(node_id)
285
+
286
+ def get_batch(self, node_ids: list[str]) -> dict[str, str | None]:
287
+ return {nid: self._store.get(nid) for nid in node_ids}
288
+
289
+ def delete(self, node_id: str) -> bool:
290
+ if node_id in self._store:
291
+ del self._store[node_id]
292
+ del self._metadata[node_id]
293
+ return True
294
+ return False
295
+
296
+ def exists(self, node_id: str) -> bool:
297
+ return node_id in self._store
298
+
299
+ def count(self) -> int:
300
+ return len(self._store)
301
+
302
+ def get_metadata(self, node_id: str) -> dict | None:
303
+ return self._metadata.get(node_id)
304
+
305
+ def clear(self) -> int:
306
+ count = len(self._store)
307
+ self._store.clear()
308
+ self._metadata.clear()
309
+ return count
310
+
311
+
312
+ # Type alias for either store
313
+ KVStore = Union[SQLiteKVStore, InMemoryKVStore]
@@ -0,0 +1,323 @@
1
+ """
2
+ Persistence Module - Save and Load RNSR Indexes
3
+
4
+ Provides functionality to persist and restore:
5
+ - Skeleton Index (SkeletonNode structures)
6
+ - KV Store (already SQLite-backed, but needs export/import)
7
+ - Document metadata
8
+
9
+ Usage:
10
+ from rnsr.indexing import save_index, load_index
11
+
12
+ # Save after indexing
13
+ skeleton, kv_store = build_skeleton_index(tree)
14
+ save_index(skeleton, kv_store, "./my_document_index/")
15
+
16
+ # Load later (no re-processing needed!)
17
+ skeleton, kv_store = load_index("./my_document_index/")
18
+ answer = run_navigator("question", skeleton, kv_store)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ import shutil
25
+ from dataclasses import asdict
26
+ from datetime import datetime
27
+ from pathlib import Path
28
+ from typing import Any
29
+
30
+ import structlog
31
+
32
+ from rnsr.exceptions import IndexingError
33
+ from rnsr.indexing.kv_store import InMemoryKVStore, KVStore, SQLiteKVStore
34
+ from rnsr.models import SkeletonNode
35
+
36
+ logger = structlog.get_logger(__name__)
37
+
38
+ # Version for format compatibility
39
+ INDEX_FORMAT_VERSION = "1.0"
40
+
41
+
42
+ def save_index(
43
+ skeleton: dict[str, SkeletonNode],
44
+ kv_store: KVStore,
45
+ index_dir: str | Path,
46
+ metadata: dict[str, Any] | None = None,
47
+ ) -> Path:
48
+ """
49
+ Save a skeleton index and KV store to disk.
50
+
51
+ Creates a directory structure:
52
+ index_dir/
53
+ manifest.json # Version, metadata, timestamps
54
+ skeleton.json # SkeletonNode structures
55
+ content.db # SQLite KV store (copied or created)
56
+
57
+ Args:
58
+ skeleton: Dictionary of node_id -> SkeletonNode
59
+ kv_store: KV store containing full text
60
+ index_dir: Directory to save the index
61
+ metadata: Optional metadata (title, source, etc.)
62
+
63
+ Returns:
64
+ Path to the index directory
65
+
66
+ Example:
67
+ skeleton, kv = build_skeleton_index(tree)
68
+ save_index(skeleton, kv, "./indexes/contract_2024/")
69
+ """
70
+ index_path = Path(index_dir)
71
+ index_path.mkdir(parents=True, exist_ok=True)
72
+
73
+ # Build manifest
74
+ manifest = {
75
+ "version": INDEX_FORMAT_VERSION,
76
+ "created_at": datetime.now().isoformat(),
77
+ "node_count": len(skeleton),
78
+ "metadata": metadata or {},
79
+ }
80
+
81
+ # Find root node for extra info
82
+ root = next((n for n in skeleton.values() if n.level == 0), None)
83
+ if root:
84
+ manifest["root_id"] = root.node_id
85
+ manifest["root_header"] = root.header
86
+
87
+ # Save manifest
88
+ manifest_path = index_path / "manifest.json"
89
+ with open(manifest_path, "w") as f:
90
+ json.dump(manifest, f, indent=2)
91
+
92
+ # Save skeleton nodes
93
+ skeleton_path = index_path / "skeleton.json"
94
+ skeleton_data = {
95
+ node_id: _skeleton_node_to_dict(node)
96
+ for node_id, node in skeleton.items()
97
+ }
98
+ with open(skeleton_path, "w") as f:
99
+ json.dump(skeleton_data, f, indent=2)
100
+
101
+ # Handle KV store
102
+ content_path = index_path / "content.db"
103
+
104
+ if isinstance(kv_store, SQLiteKVStore):
105
+ # Copy the existing database
106
+ if kv_store.db_path != content_path:
107
+ shutil.copy2(kv_store.db_path, content_path)
108
+ elif isinstance(kv_store, InMemoryKVStore):
109
+ # Export in-memory store to SQLite
110
+ sqlite_kv = SQLiteKVStore(content_path)
111
+ for node_id in skeleton.keys():
112
+ content = kv_store.get(node_id)
113
+ if content:
114
+ sqlite_kv.put(node_id, content)
115
+ else:
116
+ raise IndexingError(f"Unsupported KV store type: {type(kv_store)}")
117
+
118
+ logger.info(
119
+ "index_saved",
120
+ path=str(index_path),
121
+ nodes=len(skeleton),
122
+ )
123
+
124
+ return index_path
125
+
126
+
127
+ def load_index(
128
+ index_dir: str | Path,
129
+ in_memory: bool = False,
130
+ ) -> tuple[dict[str, SkeletonNode], KVStore]:
131
+ """
132
+ Load a skeleton index and KV store from disk.
133
+
134
+ Args:
135
+ index_dir: Directory containing the saved index
136
+ in_memory: If True, load KV store into memory (faster but uses more RAM)
137
+
138
+ Returns:
139
+ Tuple of (skeleton dict, kv_store)
140
+
141
+ Example:
142
+ skeleton, kv = load_index("./indexes/contract_2024/")
143
+ answer = run_navigator("What are the payment terms?", skeleton, kv)
144
+ """
145
+ index_path = Path(index_dir)
146
+
147
+ if not index_path.exists():
148
+ raise IndexingError(f"Index directory not found: {index_path}")
149
+
150
+ # Load and validate manifest
151
+ manifest_path = index_path / "manifest.json"
152
+ if not manifest_path.exists():
153
+ raise IndexingError(f"Manifest not found: {manifest_path}")
154
+
155
+ with open(manifest_path) as f:
156
+ manifest = json.load(f)
157
+
158
+ version = manifest.get("version", "unknown")
159
+ if version != INDEX_FORMAT_VERSION:
160
+ logger.warning(
161
+ "index_version_mismatch",
162
+ expected=INDEX_FORMAT_VERSION,
163
+ found=version,
164
+ )
165
+
166
+ # Load skeleton nodes
167
+ skeleton_path = index_path / "skeleton.json"
168
+ if not skeleton_path.exists():
169
+ raise IndexingError(f"Skeleton index not found: {skeleton_path}")
170
+
171
+ with open(skeleton_path) as f:
172
+ skeleton_data = json.load(f)
173
+
174
+ skeleton: dict[str, SkeletonNode] = {
175
+ node_id: _dict_to_skeleton_node(data)
176
+ for node_id, data in skeleton_data.items()
177
+ }
178
+
179
+ # Load KV store
180
+ content_path = index_path / "content.db"
181
+ if not content_path.exists():
182
+ raise IndexingError(f"Content database not found: {content_path}")
183
+
184
+ if in_memory:
185
+ # Load into memory for faster access
186
+ sqlite_kv = SQLiteKVStore(content_path)
187
+ kv_store = InMemoryKVStore()
188
+ for node_id in skeleton.keys():
189
+ content = sqlite_kv.get(node_id)
190
+ if content:
191
+ kv_store.put(node_id, content)
192
+ else:
193
+ # Use SQLite directly
194
+ kv_store = SQLiteKVStore(content_path)
195
+
196
+ logger.info(
197
+ "index_loaded",
198
+ path=str(index_path),
199
+ nodes=len(skeleton),
200
+ version=version,
201
+ )
202
+
203
+ return skeleton, kv_store
204
+
205
+
206
+ def get_index_info(index_dir: str | Path) -> dict[str, Any]:
207
+ """
208
+ Get information about a saved index without loading it.
209
+
210
+ Args:
211
+ index_dir: Directory containing the saved index
212
+
213
+ Returns:
214
+ Dictionary with index metadata
215
+
216
+ Example:
217
+ info = get_index_info("./indexes/contract_2024/")
218
+ print(f"Index has {info['node_count']} nodes")
219
+ """
220
+ index_path = Path(index_dir)
221
+ manifest_path = index_path / "manifest.json"
222
+
223
+ if not manifest_path.exists():
224
+ raise IndexingError(f"Manifest not found: {manifest_path}")
225
+
226
+ with open(manifest_path) as f:
227
+ manifest = json.load(f)
228
+
229
+ # Add file size info
230
+ content_path = index_path / "content.db"
231
+ if content_path.exists():
232
+ manifest["content_size_bytes"] = content_path.stat().st_size
233
+
234
+ skeleton_path = index_path / "skeleton.json"
235
+ if skeleton_path.exists():
236
+ manifest["skeleton_size_bytes"] = skeleton_path.stat().st_size
237
+
238
+ return manifest
239
+
240
+
241
+ def delete_index(index_dir: str | Path) -> bool:
242
+ """
243
+ Delete a saved index.
244
+
245
+ Args:
246
+ index_dir: Directory containing the saved index
247
+
248
+ Returns:
249
+ True if deleted, False if not found
250
+ """
251
+ index_path = Path(index_dir)
252
+
253
+ if not index_path.exists():
254
+ return False
255
+
256
+ shutil.rmtree(index_path)
257
+ logger.info("index_deleted", path=str(index_path))
258
+ return True
259
+
260
+
261
+ def list_indexes(base_dir: str | Path) -> list[dict[str, Any]]:
262
+ """
263
+ List all indexes in a directory.
264
+
265
+ Args:
266
+ base_dir: Directory to search for indexes
267
+
268
+ Returns:
269
+ List of index info dictionaries
270
+
271
+ Example:
272
+ indexes = list_indexes("./indexes/")
273
+ for idx in indexes:
274
+ print(f"{idx['path']}: {idx['node_count']} nodes")
275
+ """
276
+ base_path = Path(base_dir)
277
+ indexes = []
278
+
279
+ if not base_path.exists():
280
+ return indexes
281
+
282
+ for item in base_path.iterdir():
283
+ if item.is_dir() and (item / "manifest.json").exists():
284
+ try:
285
+ info = get_index_info(item)
286
+ info["path"] = str(item)
287
+ indexes.append(info)
288
+ except Exception as e:
289
+ logger.warning("failed_to_read_index", path=str(item), error=str(e))
290
+
291
+ return indexes
292
+
293
+
294
+ # =============================================================================
295
+ # Serialization Helpers
296
+ # =============================================================================
297
+
298
+ def _skeleton_node_to_dict(node: SkeletonNode) -> dict[str, Any]:
299
+ """Convert SkeletonNode to JSON-serializable dict."""
300
+ return {
301
+ "node_id": node.node_id,
302
+ "parent_id": node.parent_id,
303
+ "level": node.level,
304
+ "header": node.header,
305
+ "summary": node.summary,
306
+ "child_ids": node.child_ids,
307
+ "page_num": node.page_num,
308
+ "metadata": node.metadata,
309
+ }
310
+
311
+
312
+ def _dict_to_skeleton_node(data: dict[str, Any]) -> SkeletonNode:
313
+ """Convert dict back to SkeletonNode."""
314
+ return SkeletonNode(
315
+ node_id=data["node_id"],
316
+ parent_id=data.get("parent_id"),
317
+ level=data["level"],
318
+ header=data.get("header", ""),
319
+ summary=data.get("summary", ""),
320
+ child_ids=data.get("child_ids", []),
321
+ page_num=data.get("page_num"),
322
+ metadata=data.get("metadata", {}),
323
+ )