causaliq-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. causaliq_knowledge/__init__.py +6 -3
  2. causaliq_knowledge/action.py +480 -0
  3. causaliq_knowledge/cache/__init__.py +18 -0
  4. causaliq_knowledge/cache/encoders/__init__.py +13 -0
  5. causaliq_knowledge/cache/encoders/base.py +90 -0
  6. causaliq_knowledge/cache/encoders/json_encoder.py +430 -0
  7. causaliq_knowledge/cache/token_cache.py +666 -0
  8. causaliq_knowledge/cli/__init__.py +15 -0
  9. causaliq_knowledge/cli/cache.py +478 -0
  10. causaliq_knowledge/cli/generate.py +410 -0
  11. causaliq_knowledge/cli/main.py +172 -0
  12. causaliq_knowledge/cli/models.py +309 -0
  13. causaliq_knowledge/graph/__init__.py +78 -0
  14. causaliq_knowledge/graph/generator.py +457 -0
  15. causaliq_knowledge/graph/loader.py +222 -0
  16. causaliq_knowledge/graph/models.py +426 -0
  17. causaliq_knowledge/graph/params.py +175 -0
  18. causaliq_knowledge/graph/prompts.py +445 -0
  19. causaliq_knowledge/graph/response.py +392 -0
  20. causaliq_knowledge/graph/view_filter.py +154 -0
  21. causaliq_knowledge/llm/base_client.py +147 -1
  22. causaliq_knowledge/llm/cache.py +443 -0
  23. causaliq_knowledge/py.typed +0 -0
  24. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/METADATA +10 -6
  25. causaliq_knowledge-0.4.0.dist-info/RECORD +42 -0
  26. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/WHEEL +1 -1
  27. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/entry_points.txt +3 -0
  28. causaliq_knowledge/cli.py +0 -414
  29. causaliq_knowledge-0.2.0.dist-info/RECORD +0 -22
  30. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/licenses/LICENSE +0 -0
  31. {causaliq_knowledge-0.2.0.dist-info → causaliq_knowledge-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,666 @@
1
+ """
2
+ TokenCache: SQLite-backed cache with shared token dictionary.
3
+
4
+ Provides efficient storage for cache entries with:
5
+ - Fast indexed key lookup via SQLite
6
+ - In-memory mode via :memory:
7
+ - Concurrency support via SQLite locking
8
+ - Shared token dictionary for cross-entry compression
9
+
10
+ Note: This module is designed for future migration to causaliq-core.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import sqlite3
16
+ from contextlib import contextmanager
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import TYPE_CHECKING, Any, Iterator
20
+
21
+ if TYPE_CHECKING: # pragma: no cover
22
+ from causaliq_knowledge.cache.encoders.base import EntryEncoder
23
+
24
+
25
+ class TokenCache:
26
+ """SQLite-backed cache with shared token dictionary.
27
+
28
+ Attributes:
29
+ db_path: Path to SQLite database file, or ":memory:" for in-memory.
30
+ conn: SQLite connection (None until open() called or context entered).
31
+
32
+ Example:
33
+ >>> with TokenCache(":memory:") as cache:
34
+ ... cache.put("abc123", "test", b"hello")
35
+ ... data = cache.get("abc123", "test")
36
+ """
37
+
38
+ # SQL statements for schema creation
39
+ _SCHEMA_SQL = """
40
+ -- Token dictionary (grows dynamically, shared across encoders)
41
+ CREATE TABLE IF NOT EXISTS tokens (
42
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
43
+ token TEXT UNIQUE NOT NULL,
44
+ frequency INTEGER DEFAULT 1
45
+ );
46
+
47
+ -- Generic cache entries
48
+ CREATE TABLE IF NOT EXISTS cache_entries (
49
+ hash TEXT NOT NULL,
50
+ entry_type TEXT NOT NULL,
51
+ data BLOB NOT NULL,
52
+ created_at TEXT NOT NULL,
53
+ metadata BLOB,
54
+ hit_count INTEGER DEFAULT 0,
55
+ last_accessed_at TEXT,
56
+ PRIMARY KEY (hash, entry_type)
57
+ );
58
+
59
+ -- Indexes for common queries
60
+ CREATE INDEX IF NOT EXISTS idx_entry_type
61
+ ON cache_entries(entry_type);
62
+ CREATE INDEX IF NOT EXISTS idx_created_at
63
+ ON cache_entries(created_at);
64
+ """
65
+
66
+ def __init__(self, db_path: str | Path) -> None:
67
+ """Initialise TokenCache.
68
+
69
+ Args:
70
+ db_path: Path to SQLite database file. Use ":memory:" for
71
+ in-memory database (fast, non-persistent).
72
+ """
73
+ self.db_path = str(db_path)
74
+ self._conn: sqlite3.Connection | None = None
75
+ # In-memory token dictionary for fast lookup
76
+ self._token_to_id: dict[str, int] = {}
77
+ self._id_to_token: dict[int, str] = {}
78
+ # Registered encoders for auto-encoding (entry_type -> encoder)
79
+ self._encoders: dict[str, EntryEncoder] = {}
80
+
81
+ @property
82
+ def conn(self) -> sqlite3.Connection:
83
+ """Get the database connection, raising if not connected."""
84
+ if self._conn is None:
85
+ raise RuntimeError(
86
+ "TokenCache not connected. Use 'with cache:' or call open()."
87
+ )
88
+ return self._conn
89
+
90
+ @property
91
+ def is_open(self) -> bool:
92
+ """Check if the cache connection is open."""
93
+ return self._conn is not None
94
+
95
+ @property
96
+ def is_memory(self) -> bool:
97
+ """Check if this is an in-memory database."""
98
+ return self.db_path == ":memory:"
99
+
100
+ def open(self) -> TokenCache:
101
+ """Open the database connection and initialise schema.
102
+
103
+ Returns:
104
+ self for method chaining.
105
+
106
+ Raises:
107
+ RuntimeError: If already connected.
108
+ """
109
+ if self._conn is not None:
110
+ raise RuntimeError("TokenCache already connected.")
111
+
112
+ self._conn = sqlite3.connect(
113
+ self.db_path,
114
+ check_same_thread=False, # Allow multi-threaded access
115
+ )
116
+ # Enable foreign keys and WAL mode for better concurrency
117
+ self._conn.execute("PRAGMA foreign_keys = ON")
118
+ if not self.is_memory:
119
+ self._conn.execute("PRAGMA journal_mode = WAL")
120
+
121
+ self._init_schema()
122
+ return self
123
+
124
+ def close(self) -> None:
125
+ """Close the database connection."""
126
+ if self._conn is not None:
127
+ self._conn.close()
128
+ self._conn = None
129
+
130
+ def _init_schema(self) -> None:
131
+ """Create database tables if they don't exist."""
132
+ self.conn.executescript(self._SCHEMA_SQL)
133
+ self.conn.commit()
134
+ self._load_token_dict()
135
+
136
+ def _load_token_dict(self) -> None:
137
+ """Load token dictionary from database into memory."""
138
+ cursor = self.conn.execute("SELECT id, token FROM tokens")
139
+ self._token_to_id.clear()
140
+ self._id_to_token.clear()
141
+ for row in cursor:
142
+ token_id, token = row[0], row[1]
143
+ self._token_to_id[token] = token_id
144
+ self._id_to_token[token_id] = token
145
+
146
+ def __enter__(self) -> TokenCache:
147
+ """Context manager entry - opens connection."""
148
+ return self.open()
149
+
150
+ def __exit__(
151
+ self,
152
+ exc_type: type[BaseException] | None,
153
+ exc_val: BaseException | None,
154
+ exc_tb: object,
155
+ ) -> None:
156
+ """Context manager exit - closes connection."""
157
+ self.close()
158
+
159
+ @contextmanager
160
+ def transaction(self) -> Iterator[sqlite3.Cursor]:
161
+ """Context manager for a database transaction.
162
+
163
+ Commits on success, rolls back on exception.
164
+
165
+ Yields:
166
+ SQLite cursor for executing statements.
167
+ """
168
+ cursor = self.conn.cursor()
169
+ try:
170
+ yield cursor
171
+ self.conn.commit()
172
+ except Exception:
173
+ self.conn.rollback()
174
+ raise
175
+ finally:
176
+ cursor.close()
177
+
178
+ def _utcnow_iso(self) -> str:
179
+ """Get current UTC time as ISO 8601 string."""
180
+ return datetime.now(timezone.utc).isoformat()
181
+
182
+ def table_exists(self, table_name: str) -> bool:
183
+ """Check if a table exists in the database.
184
+
185
+ Args:
186
+ table_name: Name of the table to check.
187
+
188
+ Returns:
189
+ True if table exists, False otherwise.
190
+ """
191
+ cursor = self.conn.execute(
192
+ "SELECT name FROM sqlite_master " "WHERE type='table' AND name=?",
193
+ (table_name,),
194
+ )
195
+ return cursor.fetchone() is not None
196
+
197
+ def entry_count(self, entry_type: str | None = None) -> int:
198
+ """Count cache entries, optionally filtered by type.
199
+
200
+ Args:
201
+ entry_type: If provided, count only entries of this type.
202
+
203
+ Returns:
204
+ Number of matching entries.
205
+ """
206
+ if entry_type is None:
207
+ cursor = self.conn.execute("SELECT COUNT(*) FROM cache_entries")
208
+ else:
209
+ cursor = self.conn.execute(
210
+ "SELECT COUNT(*) FROM cache_entries WHERE entry_type = ?",
211
+ (entry_type,),
212
+ )
213
+ row = cursor.fetchone()
214
+ return int(row[0]) if row else 0
215
+
216
+ def list_entry_types(self) -> list[str]:
217
+ """List all distinct entry types in the cache.
218
+
219
+ Returns:
220
+ List of entry type names found in the cache.
221
+
222
+ Example:
223
+ >>> with TokenCache(":memory:") as cache:
224
+ ... cache.register_encoder("llm", LLMEntryEncoder())
225
+ ... cache.put_data("h1", "llm", {"data": "test"})
226
+ ... cache.list_entry_types()
227
+ ['llm']
228
+ """
229
+ cursor = self.conn.execute(
230
+ "SELECT DISTINCT entry_type FROM cache_entries ORDER BY entry_type"
231
+ )
232
+ return [row[0] for row in cursor.fetchall()]
233
+
234
+ def token_count(self) -> int:
235
+ """Count tokens in the dictionary.
236
+
237
+ Returns:
238
+ Number of tokens.
239
+ """
240
+ cursor = self.conn.execute("SELECT COUNT(*) FROM tokens")
241
+ row = cursor.fetchone()
242
+ return int(row[0]) if row else 0
243
+
244
+ def total_hits(self, entry_type: str | None = None) -> int:
245
+ """Get total cache hits across all entries.
246
+
247
+ Args:
248
+ entry_type: If provided, count only hits for this entry type.
249
+
250
+ Returns:
251
+ Total hit count.
252
+ """
253
+ if entry_type is None:
254
+ cursor = self.conn.execute(
255
+ "SELECT COALESCE(SUM(hit_count), 0) FROM cache_entries"
256
+ )
257
+ else:
258
+ cursor = self.conn.execute(
259
+ "SELECT COALESCE(SUM(hit_count), 0) FROM cache_entries "
260
+ "WHERE entry_type = ?",
261
+ (entry_type,),
262
+ )
263
+ row = cursor.fetchone()
264
+ return int(row[0]) if row else 0
265
+
266
+ def get_or_create_token(self, token: str) -> int:
267
+ """Get token ID, creating a new entry if needed.
268
+
269
+ This method is used by encoders to compress strings to integer IDs.
270
+ The token dictionary grows dynamically as new tokens are encountered.
271
+
272
+ Args:
273
+ token: The string token to look up or create.
274
+
275
+ Returns:
276
+ Integer ID for the token (1-65535 range).
277
+
278
+ Raises:
279
+ ValueError: If token dictionary exceeds uint16 capacity.
280
+ """
281
+ # Fast path: check in-memory cache
282
+ if token in self._token_to_id:
283
+ return self._token_to_id[token]
284
+
285
+ # Slow path: insert into database
286
+ cursor = self.conn.execute(
287
+ "INSERT INTO tokens (token) VALUES (?) RETURNING id",
288
+ (token,),
289
+ )
290
+ token_id: int = cursor.fetchone()[0]
291
+ self.conn.commit()
292
+
293
+ # Check uint16 capacity (max 65,535 tokens)
294
+ if token_id > 65535: # pragma: no cover
295
+ raise ValueError(
296
+ f"Token dictionary exceeded uint16 capacity: {token_id}"
297
+ )
298
+
299
+ # Update in-memory cache
300
+ self._token_to_id[token] = token_id
301
+ self._id_to_token[token_id] = token
302
+
303
+ return token_id
304
+
305
+ def get_token(self, token_id: int) -> str | None:
306
+ """Get token string by ID.
307
+
308
+ This method is used by decoders to expand integer IDs back to strings.
309
+
310
+ Args:
311
+ token_id: The integer ID to look up.
312
+
313
+ Returns:
314
+ The token string, or None if not found.
315
+ """
316
+ return self._id_to_token.get(token_id)
317
+
318
+ # ========================================================================
319
+ # Cache entry operations
320
+ # ========================================================================
321
+
322
+ def put(
323
+ self,
324
+ hash: str,
325
+ entry_type: str,
326
+ data: bytes,
327
+ metadata: bytes | None = None,
328
+ ) -> None:
329
+ """Store a cache entry.
330
+
331
+ Args:
332
+ hash: Unique identifier for the entry (e.g. SHA-256 truncated).
333
+ entry_type: Type of entry (e.g. 'llm', 'graph', 'score').
334
+ data: Binary data to store.
335
+ metadata: Optional binary metadata.
336
+ """
337
+ self.conn.execute(
338
+ "INSERT OR REPLACE INTO cache_entries "
339
+ "(hash, entry_type, data, created_at, metadata) "
340
+ "VALUES (?, ?, ?, ?, ?)",
341
+ (hash, entry_type, data, self._utcnow_iso(), metadata),
342
+ )
343
+ self.conn.commit()
344
+
345
+ def get(self, hash: str, entry_type: str) -> bytes | None:
346
+ """Retrieve a cache entry and increment hit count.
347
+
348
+ Args:
349
+ hash: Unique identifier for the entry.
350
+ entry_type: Type of entry to retrieve.
351
+
352
+ Returns:
353
+ Binary data if found, None otherwise.
354
+ """
355
+ cursor = self.conn.execute(
356
+ "SELECT data FROM cache_entries "
357
+ "WHERE hash = ? AND entry_type = ?",
358
+ (hash, entry_type),
359
+ )
360
+ row = cursor.fetchone()
361
+ if row:
362
+ # Increment hit count and update last accessed time
363
+ self.conn.execute(
364
+ "UPDATE cache_entries SET hit_count = hit_count + 1, "
365
+ "last_accessed_at = ? WHERE hash = ? AND entry_type = ?",
366
+ (self._utcnow_iso(), hash, entry_type),
367
+ )
368
+ self.conn.commit()
369
+ result: bytes = row[0]
370
+ return result
371
+ return None
372
+
373
+ def get_with_metadata(
374
+ self, hash: str, entry_type: str
375
+ ) -> tuple[bytes, bytes | None] | None:
376
+ """Retrieve a cache entry with its metadata.
377
+
378
+ Args:
379
+ hash: Unique identifier for the entry.
380
+ entry_type: Type of entry to retrieve.
381
+
382
+ Returns:
383
+ Tuple of (data, metadata) if found, None otherwise.
384
+ """
385
+ cursor = self.conn.execute(
386
+ "SELECT data, metadata FROM cache_entries "
387
+ "WHERE hash = ? AND entry_type = ?",
388
+ (hash, entry_type),
389
+ )
390
+ row = cursor.fetchone()
391
+ return (row[0], row[1]) if row else None
392
+
393
+ def exists(self, hash: str, entry_type: str) -> bool:
394
+ """Check if a cache entry exists.
395
+
396
+ Args:
397
+ hash: Unique identifier for the entry.
398
+ entry_type: Type of entry to check.
399
+
400
+ Returns:
401
+ True if entry exists, False otherwise.
402
+ """
403
+ cursor = self.conn.execute(
404
+ "SELECT 1 FROM cache_entries " "WHERE hash = ? AND entry_type = ?",
405
+ (hash, entry_type),
406
+ )
407
+ return cursor.fetchone() is not None
408
+
409
+ def delete(self, hash: str, entry_type: str) -> bool:
410
+ """Delete a cache entry.
411
+
412
+ Args:
413
+ hash: Unique identifier for the entry.
414
+ entry_type: Type of entry to delete.
415
+
416
+ Returns:
417
+ True if entry was deleted, False if it didn't exist.
418
+ """
419
+ cursor = self.conn.execute(
420
+ "DELETE FROM cache_entries WHERE hash = ? AND entry_type = ?",
421
+ (hash, entry_type),
422
+ )
423
+ self.conn.commit()
424
+ return cursor.rowcount > 0
425
+
426
+ # ========================================================================
427
+ # Encoder registration and auto-encoding operations
428
+ # ========================================================================
429
+
430
+ def register_encoder(self, entry_type: str, encoder: EntryEncoder) -> None:
431
+ """Register an encoder for a specific entry type.
432
+
433
+ Once registered, `put_data()` and `get_data()` will automatically
434
+ encode/decode entries of this type using the registered encoder.
435
+
436
+ Args:
437
+ entry_type: Type identifier (e.g. 'llm', 'json', 'score').
438
+ encoder: EntryEncoder instance for this type.
439
+
440
+ Example:
441
+ >>> from causaliq_knowledge.cache.encoders import JsonEncoder
442
+ >>> with TokenCache(":memory:") as cache:
443
+ ... cache.register_encoder("json", JsonEncoder())
444
+ ... cache.put_data("key1", "json", {"msg": "hello"})
445
+ """
446
+ self._encoders[entry_type] = encoder
447
+
448
+ def get_encoder(self, entry_type: str) -> EntryEncoder | None:
449
+ """Get the registered encoder for an entry type.
450
+
451
+ Args:
452
+ entry_type: Type identifier to look up.
453
+
454
+ Returns:
455
+ The registered encoder, or None if not registered.
456
+ """
457
+ return self._encoders.get(entry_type)
458
+
459
+ def has_encoder(self, entry_type: str) -> bool:
460
+ """Check if an encoder is registered for an entry type.
461
+
462
+ Args:
463
+ entry_type: Type identifier to check.
464
+
465
+ Returns:
466
+ True if encoder is registered, False otherwise.
467
+ """
468
+ return entry_type in self._encoders
469
+
470
+ def put_data(
471
+ self,
472
+ hash: str,
473
+ entry_type: str,
474
+ data: Any,
475
+ metadata: Any | None = None,
476
+ ) -> None:
477
+ """Store data using the registered encoder for the entry type.
478
+
479
+ This method automatically encodes the data using the encoder
480
+ registered for the given entry_type. Use `put()` for raw bytes.
481
+
482
+ Args:
483
+ hash: Unique identifier for the entry.
484
+ entry_type: Type of entry (must have registered encoder).
485
+ data: Data to encode and store.
486
+ metadata: Optional metadata to encode and store.
487
+
488
+ Raises:
489
+ KeyError: If no encoder is registered for entry_type.
490
+
491
+ Example:
492
+ >>> with TokenCache(":memory:") as cache:
493
+ ... cache.register_encoder("json", JsonEncoder())
494
+ ... cache.put_data("abc", "json", {"key": "value"})
495
+ """
496
+ encoder = self._encoders[entry_type]
497
+ blob = encoder.encode(data, self)
498
+ meta_blob = (
499
+ encoder.encode(metadata, self) if metadata is not None else None
500
+ )
501
+ self.put(hash, entry_type, blob, meta_blob)
502
+
503
+ def get_data(self, hash: str, entry_type: str) -> Any | None:
504
+ """Retrieve and decode data using the registered encoder.
505
+
506
+ This method automatically decodes the data using the encoder
507
+ registered for the given entry_type. Use `get()` for raw bytes.
508
+
509
+ Args:
510
+ hash: Unique identifier for the entry.
511
+ entry_type: Type of entry (must have registered encoder).
512
+
513
+ Returns:
514
+ Decoded data if found, None otherwise.
515
+
516
+ Raises:
517
+ KeyError: If no encoder is registered for entry_type.
518
+
519
+ Example:
520
+ >>> with TokenCache(":memory:") as cache:
521
+ ... cache.register_encoder("json", JsonEncoder())
522
+ ... cache.put_data("abc", "json", {"key": "value"})
523
+ ... data = cache.get_data("abc", "json")
524
+ """
525
+ blob = self.get(hash, entry_type)
526
+ if blob is None:
527
+ return None
528
+ encoder = self._encoders[entry_type]
529
+ return encoder.decode(blob, self)
530
+
531
+ def get_data_with_metadata(
532
+ self, hash: str, entry_type: str
533
+ ) -> tuple[Any, Any | None] | None:
534
+ """Retrieve and decode data with metadata using registered encoder.
535
+
536
+ Args:
537
+ hash: Unique identifier for the entry.
538
+ entry_type: Type of entry (must have registered encoder).
539
+
540
+ Returns:
541
+ Tuple of (decoded_data, decoded_metadata) if found, None otherwise.
542
+ metadata may be None if not stored.
543
+
544
+ Raises:
545
+ KeyError: If no encoder is registered for entry_type.
546
+ """
547
+ result = self.get_with_metadata(hash, entry_type)
548
+ if result is None:
549
+ return None
550
+ data_blob, meta_blob = result
551
+ encoder = self._encoders[entry_type]
552
+ decoded_data = encoder.decode(data_blob, self)
553
+ decoded_meta = encoder.decode(meta_blob, self) if meta_blob else None
554
+ return (decoded_data, decoded_meta)
555
+
556
+ # ========================================================================
557
+ # Import/Export operations
558
+ # ========================================================================
559
+
560
+ def export_entries(
561
+ self,
562
+ output_dir: Path,
563
+ entry_type: str,
564
+ fmt: str | None = None,
565
+ ) -> int:
566
+ """Export cache entries to human-readable files.
567
+
568
+ Each entry is exported to a separate file named `{hash}.{ext}` where
569
+ ext is determined by the format or encoder's default_export_format.
570
+
571
+ Args:
572
+ output_dir: Directory to write exported files to. Created if
573
+ it doesn't exist.
574
+ entry_type: Type of entries to export (must have registered
575
+ encoder).
576
+ fmt: Export format (e.g. 'json', 'yaml'). If None, uses the
577
+ encoder's default_export_format.
578
+
579
+ Returns:
580
+ Number of entries exported.
581
+
582
+ Raises:
583
+ KeyError: If no encoder is registered for entry_type.
584
+
585
+ Example:
586
+ >>> from pathlib import Path
587
+ >>> from causaliq_knowledge.cache import TokenCache
588
+ >>> from causaliq_knowledge.cache.encoders import JsonEncoder
589
+ >>> with TokenCache(":memory:") as cache:
590
+ ... cache.register_encoder("json", JsonEncoder())
591
+ ... cache.put_data("abc123", "json", {"key": "value"})
592
+ ... count = cache.export_entries(Path("./export"), "json")
593
+ ... # Creates ./export/abc123.json
594
+ """
595
+ encoder = self._encoders[entry_type]
596
+ ext = fmt or encoder.default_export_format
597
+
598
+ # Create output directory if needed
599
+ output_dir.mkdir(parents=True, exist_ok=True)
600
+
601
+ # Query all entries of this type
602
+ cursor = self.conn.execute(
603
+ "SELECT hash, data FROM cache_entries WHERE entry_type = ?",
604
+ (entry_type,),
605
+ )
606
+
607
+ count = 0
608
+ for hash_val, blob in cursor:
609
+ # Decode the blob to get original data
610
+ data = encoder.decode(blob, self)
611
+ # Export to file using encoder's export method
612
+ file_path = output_dir / f"{hash_val}.{ext}"
613
+ encoder.export(data, file_path)
614
+ count += 1
615
+
616
+ return count
617
+
618
+ def import_entries(
619
+ self,
620
+ input_dir: Path,
621
+ entry_type: str,
622
+ ) -> int:
623
+ """Import human-readable files into the cache.
624
+
625
+ Each file is imported with its stem (filename without extension)
626
+ used as the cache hash. The encoder's import_() method reads the
627
+ file and the data is encoded before storage.
628
+
629
+ Args:
630
+ input_dir: Directory containing files to import.
631
+ entry_type: Type to assign to imported entries (must have
632
+ registered encoder).
633
+
634
+ Returns:
635
+ Number of entries imported.
636
+
637
+ Raises:
638
+ KeyError: If no encoder is registered for entry_type.
639
+ FileNotFoundError: If input_dir doesn't exist.
640
+
641
+ Example:
642
+ >>> from pathlib import Path
643
+ >>> from causaliq_knowledge.cache import TokenCache
644
+ >>> from causaliq_knowledge.cache.encoders import JsonEncoder
645
+ >>> with TokenCache(":memory:") as cache:
646
+ ... cache.register_encoder("json", JsonEncoder())
647
+ ... count = cache.import_entries(Path("./import"), "json")
648
+ ... # Imports all files from ./import as "json" entries
649
+ """
650
+ encoder = self._encoders[entry_type]
651
+
652
+ if not input_dir.exists():
653
+ raise FileNotFoundError(f"Input directory not found: {input_dir}")
654
+
655
+ count = 0
656
+ for file_path in input_dir.iterdir():
657
+ if file_path.is_file():
658
+ # Use filename (without extension) as hash
659
+ hash_val = file_path.stem
660
+ # Import data using encoder
661
+ data = encoder.import_(file_path)
662
+ # Encode and store
663
+ self.put_data(hash_val, entry_type, data)
664
+ count += 1
665
+
666
+ return count
@@ -0,0 +1,15 @@
1
+ """Command-line interface for causaliq-knowledge.
2
+
3
+ This package provides the CLI implementation split into logical modules:
4
+
5
+ - main: Core CLI entry point and query command
6
+ - cache: Cache management commands (stats, export, import)
7
+ - generate: Graph generation commands
8
+ - models: Model listing command
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from causaliq_knowledge.cli.main import cli, main
14
+
15
+ __all__ = ["cli", "main"]