ebk 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +450 -0
  4. ebk/ai/llm_providers/__init__.py +26 -0
  5. ebk/ai/llm_providers/anthropic.py +209 -0
  6. ebk/ai/llm_providers/base.py +295 -0
  7. ebk/ai/llm_providers/gemini.py +285 -0
  8. ebk/ai/llm_providers/ollama.py +294 -0
  9. ebk/ai/metadata_enrichment.py +394 -0
  10. ebk/ai/question_generator.py +328 -0
  11. ebk/ai/reading_companion.py +224 -0
  12. ebk/ai/semantic_search.py +433 -0
  13. ebk/ai/text_extractor.py +393 -0
  14. ebk/calibre_import.py +66 -0
  15. ebk/cli.py +6433 -0
  16. ebk/config.py +230 -0
  17. ebk/db/__init__.py +37 -0
  18. ebk/db/migrations.py +507 -0
  19. ebk/db/models.py +725 -0
  20. ebk/db/session.py +144 -0
  21. ebk/decorators.py +1 -0
  22. ebk/exports/__init__.py +0 -0
  23. ebk/exports/base_exporter.py +218 -0
  24. ebk/exports/echo_export.py +279 -0
  25. ebk/exports/html_library.py +1743 -0
  26. ebk/exports/html_utils.py +87 -0
  27. ebk/exports/hugo.py +59 -0
  28. ebk/exports/jinja_export.py +286 -0
  29. ebk/exports/multi_facet_export.py +159 -0
  30. ebk/exports/opds_export.py +232 -0
  31. ebk/exports/symlink_dag.py +479 -0
  32. ebk/exports/zip.py +25 -0
  33. ebk/extract_metadata.py +341 -0
  34. ebk/ident.py +89 -0
  35. ebk/library_db.py +1440 -0
  36. ebk/opds.py +748 -0
  37. ebk/plugins/__init__.py +42 -0
  38. ebk/plugins/base.py +502 -0
  39. ebk/plugins/hooks.py +442 -0
  40. ebk/plugins/registry.py +499 -0
  41. ebk/repl/__init__.py +9 -0
  42. ebk/repl/find.py +126 -0
  43. ebk/repl/grep.py +173 -0
  44. ebk/repl/shell.py +1677 -0
  45. ebk/repl/text_utils.py +320 -0
  46. ebk/search_parser.py +413 -0
  47. ebk/server.py +3608 -0
  48. ebk/services/__init__.py +28 -0
  49. ebk/services/annotation_extraction.py +351 -0
  50. ebk/services/annotation_service.py +380 -0
  51. ebk/services/export_service.py +577 -0
  52. ebk/services/import_service.py +447 -0
  53. ebk/services/personal_metadata_service.py +347 -0
  54. ebk/services/queue_service.py +253 -0
  55. ebk/services/tag_service.py +281 -0
  56. ebk/services/text_extraction.py +317 -0
  57. ebk/services/view_service.py +12 -0
  58. ebk/similarity/__init__.py +77 -0
  59. ebk/similarity/base.py +154 -0
  60. ebk/similarity/core.py +471 -0
  61. ebk/similarity/extractors.py +168 -0
  62. ebk/similarity/metrics.py +376 -0
  63. ebk/skills/SKILL.md +182 -0
  64. ebk/skills/__init__.py +1 -0
  65. ebk/vfs/__init__.py +101 -0
  66. ebk/vfs/base.py +298 -0
  67. ebk/vfs/library_vfs.py +122 -0
  68. ebk/vfs/nodes/__init__.py +54 -0
  69. ebk/vfs/nodes/authors.py +196 -0
  70. ebk/vfs/nodes/books.py +480 -0
  71. ebk/vfs/nodes/files.py +155 -0
  72. ebk/vfs/nodes/metadata.py +385 -0
  73. ebk/vfs/nodes/root.py +100 -0
  74. ebk/vfs/nodes/similar.py +165 -0
  75. ebk/vfs/nodes/subjects.py +184 -0
  76. ebk/vfs/nodes/tags.py +371 -0
  77. ebk/vfs/resolver.py +228 -0
  78. ebk/vfs_router.py +275 -0
  79. ebk/views/__init__.py +32 -0
  80. ebk/views/dsl.py +668 -0
  81. ebk/views/service.py +619 -0
  82. ebk-0.4.4.dist-info/METADATA +755 -0
  83. ebk-0.4.4.dist-info/RECORD +87 -0
  84. ebk-0.4.4.dist-info/WHEEL +5 -0
  85. ebk-0.4.4.dist-info/entry_points.txt +2 -0
  86. ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
  87. ebk-0.4.4.dist-info/top_level.txt +1 -0
ebk/similarity/base.py ADDED
@@ -0,0 +1,154 @@
1
+ """Base classes for the similarity system.
2
+
3
+ This module defines the core abstractions:
4
+ - Extractor: Extracts values from books
5
+ - Metric: Computes similarity between values
6
+ - Feature: Combines an extractor and a metric
7
+ """
8
+
9
+ from abc import ABC, abstractmethod
10
+ from pathlib import Path
11
+ from typing import Any, Dict, Generic, TypeVar
12
+
13
+ from ebk.db.models import Book
14
+
15
+ T = TypeVar("T")
16
+
17
+
18
+ class Extractor(ABC, Generic[T]):
19
+ """Extracts a value from a book for similarity comparison.
20
+
21
+ Examples:
22
+ - ContentExtractor: Extracts full text
23
+ - AuthorsExtractor: Extracts set of author names
24
+ - SubjectsExtractor: Extracts set of subjects
25
+ - PublicationYearExtractor: Extracts publication year
26
+ """
27
+
28
+ @abstractmethod
29
+ def extract(self, book: Book) -> T:
30
+ """Extract a value from the book.
31
+
32
+ Args:
33
+ book: Book to extract value from
34
+
35
+ Returns:
36
+ Extracted value (type depends on extractor)
37
+ """
38
+ pass
39
+
40
+
41
+ class Metric(ABC, Generic[T]):
42
+ """Computes similarity between two values.
43
+
44
+ All similarity scores must be normalized to [0, 1] where:
45
+ - 0 = completely dissimilar
46
+ - 1 = identical
47
+
48
+ Examples:
49
+ - TfidfMetric: Computes cosine similarity of TF-IDF vectors
50
+ - JaccardMetric: Computes set overlap
51
+ - ExactMatchMetric: Returns 1 if equal, 0 otherwise
52
+ - TemporalDecayMetric: Gaussian decay based on time difference
53
+ """
54
+
55
+ @abstractmethod
56
+ def similarity(self, value1: T, value2: T) -> float:
57
+ """Compute similarity between two values.
58
+
59
+ Args:
60
+ value1: First value
61
+ value2: Second value
62
+
63
+ Returns:
64
+ Similarity score in [0, 1]
65
+ """
66
+ pass
67
+
68
+ def fit(self, data: Dict[int, T]) -> None:
69
+ """Fit metric on a corpus (optional).
70
+
71
+ Override this for metrics that need pre-computation, such as:
72
+ - TF-IDF: Fit vectorizer and cache vectors
73
+ - Embeddings: Compute and cache embeddings
74
+
75
+ Default implementation is no-op for metrics that don't need fitting
76
+ (e.g., Jaccard, exact match, temporal decay).
77
+
78
+ Args:
79
+ data: Dictionary mapping book IDs to extracted values
80
+ """
81
+ pass # No-op by default
82
+
83
+ def save(self, path: Path) -> None:
84
+ """Save fitted state to disk (optional).
85
+
86
+ Override this for metrics that cache expensive computations.
87
+ Default implementation is no-op.
88
+
89
+ Args:
90
+ path: Path to save fitted state
91
+ """
92
+ pass # No-op by default
93
+
94
+ def load(self, path: Path) -> None:
95
+ """Load fitted state from disk (optional).
96
+
97
+ Override this for metrics that cache expensive computations.
98
+ Default implementation is no-op.
99
+
100
+ Args:
101
+ path: Path to load fitted state from
102
+ """
103
+ pass # No-op by default
104
+
105
+
106
+ class Feature:
107
+ """Combines an extractor and a metric with a weight.
108
+
109
+ A Feature represents one aspect of book similarity, such as:
110
+ - Content similarity (text + TF-IDF)
111
+ - Author overlap (authors + Jaccard)
112
+ - Temporal proximity (pub year + Gaussian decay)
113
+
114
+ Attributes:
115
+ extractor: Extractor for getting values from books
116
+ metric: Metric for computing similarity between values
117
+ weight: Weight for this feature (default 1.0)
118
+ name: Optional name for this feature
119
+ """
120
+
121
+ def __init__(
122
+ self,
123
+ extractor: Extractor,
124
+ metric: Metric,
125
+ weight: float = 1.0,
126
+ name: str = None,
127
+ ):
128
+ """Initialize a feature.
129
+
130
+ Args:
131
+ extractor: Extractor for getting values from books
132
+ metric: Metric for computing similarity between values
133
+ weight: Weight for this feature (default 1.0)
134
+ name: Optional name for this feature
135
+ """
136
+ self.extractor = extractor
137
+ self.metric = metric
138
+ self.weight = weight
139
+ self.name = name or f"{extractor.__class__.__name__}+{metric.__class__.__name__}"
140
+
141
+ def similarity(self, book1: Book, book2: Book) -> float:
142
+ """Compute weighted similarity between two books.
143
+
144
+ Args:
145
+ book1: First book
146
+ book2: Second book
147
+
148
+ Returns:
149
+ Weighted similarity score
150
+ """
151
+ value1 = self.extractor.extract(book1)
152
+ value2 = self.extractor.extract(book2)
153
+ sim = self.metric.similarity(value1, value2)
154
+ return sim * self.weight
ebk/similarity/core.py ADDED
@@ -0,0 +1,471 @@
1
+ """Core BookSimilarity class with fluent API."""
2
+
3
+ from pathlib import Path
4
+ from typing import Dict, List, Optional, Tuple
5
+
6
+ import numpy as np
7
+
8
+ from ebk.db.models import Book
9
+ from ebk.similarity.base import Feature, Metric
10
+ from ebk.similarity.extractors import (
11
+ AuthorsExtractor,
12
+ ContentExtractor,
13
+ DescriptionExtractor,
14
+ LanguageExtractor,
15
+ PageCountExtractor,
16
+ PublicationYearExtractor,
17
+ PublisherExtractor,
18
+ SubjectsExtractor,
19
+ )
20
+ from ebk.similarity.metrics import (
21
+ CosineMetric,
22
+ ExactMatchMetric,
23
+ JaccardMetric,
24
+ NumericProximityMetric,
25
+ TemporalDecayMetric,
26
+ TfidfMetric,
27
+ )
28
+
29
+
30
+ class BookSimilarity:
31
+ """Compute similarity between books using multiple features.
32
+
33
+ This class uses a fluent API for configuration:
34
+
35
+ Example:
36
+ >>> sim = (BookSimilarity()
37
+ ... .content(weight=4.0)
38
+ ... .authors(weight=2.0)
39
+ ... .subjects(weight=1.0)
40
+ ... .temporal(weight=0.5))
41
+ >>> sim.fit(books)
42
+ >>> score = sim.similarity(book1, book2)
43
+
44
+ Each method adds a feature (extractor + metric + weight) to the similarity
45
+ computation. The final similarity is the weighted average of all features.
46
+
47
+ Three-tier API:
48
+ - Tier 1: Presets (.balanced(), .content_only())
49
+ - Tier 2: Semantic methods (.content(), .authors()) with defaults
50
+ - Tier 3: Escape hatch (.custom()) for power users
51
+ """
52
+
53
+ def __init__(self):
54
+ """Initialize empty similarity configuration."""
55
+ self.features: List[Feature] = []
56
+ self._fitted = False
57
+
58
+ # ===== Tier 1: Presets =====
59
+
60
+ def balanced(self) -> "BookSimilarity":
61
+ """Balanced preset with reasonable defaults.
62
+
63
+ Weights:
64
+ - Content (TF-IDF): 4.0
65
+ - Authors (Jaccard): 2.0
66
+ - Subjects (Jaccard): 1.0
67
+ - Temporal (Gaussian): 0.5
68
+
69
+ Returns:
70
+ Self for chaining
71
+ """
72
+ return (
73
+ self.content(weight=4.0)
74
+ .authors(weight=2.0)
75
+ .subjects(weight=1.0)
76
+ .temporal(weight=0.5)
77
+ )
78
+
79
+ def content_only(self, metric: Optional[Metric] = None) -> "BookSimilarity":
80
+ """Content-only preset (pure semantic similarity).
81
+
82
+ Uses TF-IDF by default, but can override metric.
83
+
84
+ Args:
85
+ metric: Optional custom metric (default TfidfMetric)
86
+
87
+ Returns:
88
+ Self for chaining
89
+ """
90
+ return self.content(weight=1.0, metric=metric)
91
+
92
+ def metadata_only(self) -> "BookSimilarity":
93
+ """Metadata-only preset (no content similarity).
94
+
95
+ Weights:
96
+ - Authors (Jaccard): 3.0
97
+ - Subjects (Jaccard): 2.0
98
+ - Temporal (Gaussian): 1.0
99
+ - Language (Exact): 1.0
100
+ - Publisher (Exact): 0.5
101
+
102
+ Returns:
103
+ Self for chaining
104
+ """
105
+ return (
106
+ self.authors(weight=3.0)
107
+ .subjects(weight=2.0)
108
+ .temporal(weight=1.0)
109
+ .language(weight=1.0)
110
+ .publisher(weight=0.5)
111
+ )
112
+
113
+ def sparse_friendly(self) -> "BookSimilarity":
114
+ """Preset optimized for sparse data (limited or no extracted text).
115
+
116
+ Uses metadata-based similarity with optional description matching.
117
+ Works well when books lack extracted text or have minimal content.
118
+
119
+ Weights:
120
+ - Description (TF-IDF): 2.0 (if available)
121
+ - Authors (Jaccard): 3.0
122
+ - Subjects (Jaccard): 3.0
123
+ - Temporal (Gaussian): 1.0
124
+ - Language (Exact): 1.5
125
+ - Publisher (Jaccard): 1.0
126
+
127
+ Returns:
128
+ Self for chaining
129
+ """
130
+ return (
131
+ self.description(weight=2.0)
132
+ .authors(weight=3.0)
133
+ .subjects(weight=3.0)
134
+ .temporal(weight=1.0)
135
+ .language(weight=1.5)
136
+ .publisher(weight=1.0)
137
+ )
138
+
139
+ # ===== Tier 2: Semantic Methods =====
140
+
141
+ def content(
142
+ self, weight: float = 1.0, metric: Optional[Metric] = None
143
+ ) -> "BookSimilarity":
144
+ """Add content similarity (full text).
145
+
146
+ Default metric: TfidfMetric (cosine similarity of TF-IDF vectors)
147
+
148
+ Args:
149
+ weight: Weight for this feature (default 1.0)
150
+ metric: Optional custom metric (default TfidfMetric)
151
+
152
+ Returns:
153
+ Self for chaining
154
+ """
155
+ metric = metric or TfidfMetric()
156
+ extractor = ContentExtractor()
157
+ self.features.append(Feature(extractor, metric, weight, "content"))
158
+ return self
159
+
160
+ def description(
161
+ self, weight: float = 1.0, metric: Optional[Metric] = None
162
+ ) -> "BookSimilarity":
163
+ """Add description similarity (book summary/blurb).
164
+
165
+ Default metric: TfidfMetric (delegates to content provider)
166
+
167
+ Args:
168
+ weight: Weight for this feature (default 1.0)
169
+ metric: Optional custom metric (default TfidfMetric)
170
+
171
+ Returns:
172
+ Self for chaining
173
+ """
174
+ metric = metric or TfidfMetric()
175
+ extractor = DescriptionExtractor()
176
+ self.features.append(Feature(extractor, metric, weight, "description"))
177
+ return self
178
+
179
+ def authors(
180
+ self, weight: float = 1.0, metric: Optional[Metric] = None
181
+ ) -> "BookSimilarity":
182
+ """Add author overlap similarity.
183
+
184
+ Default metric: JaccardMetric (set overlap)
185
+
186
+ Args:
187
+ weight: Weight for this feature (default 1.0)
188
+ metric: Optional custom metric (default JaccardMetric)
189
+
190
+ Returns:
191
+ Self for chaining
192
+ """
193
+ metric = metric or JaccardMetric()
194
+ extractor = AuthorsExtractor()
195
+ self.features.append(Feature(extractor, metric, weight, "authors"))
196
+ return self
197
+
198
+ def subjects(
199
+ self, weight: float = 1.0, metric: Optional[Metric] = None
200
+ ) -> "BookSimilarity":
201
+ """Add subject/tag overlap similarity.
202
+
203
+ Default metric: JaccardMetric (set overlap)
204
+
205
+ Args:
206
+ weight: Weight for this feature (default 1.0)
207
+ metric: Optional custom metric (default JaccardMetric)
208
+
209
+ Returns:
210
+ Self for chaining
211
+ """
212
+ metric = metric or JaccardMetric()
213
+ extractor = SubjectsExtractor()
214
+ self.features.append(Feature(extractor, metric, weight, "subjects"))
215
+ return self
216
+
217
+ def temporal(
218
+ self, weight: float = 1.0, metric: Optional[Metric] = None, sigma: float = 10.0
219
+ ) -> "BookSimilarity":
220
+ """Add temporal proximity similarity (publication date).
221
+
222
+ Default metric: TemporalDecayMetric (Gaussian decay)
223
+
224
+ Args:
225
+ weight: Weight for this feature (default 1.0)
226
+ metric: Optional custom metric (default TemporalDecayMetric)
227
+ sigma: Standard deviation in years for Gaussian decay (default 10.0)
228
+
229
+ Returns:
230
+ Self for chaining
231
+ """
232
+ metric = metric or TemporalDecayMetric(sigma=sigma)
233
+ extractor = PublicationYearExtractor()
234
+ self.features.append(Feature(extractor, metric, weight, "temporal"))
235
+ return self
236
+
237
+ def language(
238
+ self, weight: float = 1.0, metric: Optional[Metric] = None
239
+ ) -> "BookSimilarity":
240
+ """Add language match similarity.
241
+
242
+ Default metric: ExactMatchMetric (1 if same language, 0 otherwise)
243
+
244
+ Args:
245
+ weight: Weight for this feature (default 1.0)
246
+ metric: Optional custom metric (default ExactMatchMetric)
247
+
248
+ Returns:
249
+ Self for chaining
250
+ """
251
+ metric = metric or ExactMatchMetric()
252
+ extractor = LanguageExtractor()
253
+ self.features.append(Feature(extractor, metric, weight, "language"))
254
+ return self
255
+
256
+ def publisher(
257
+ self, weight: float = 1.0, metric: Optional[Metric] = None
258
+ ) -> "BookSimilarity":
259
+ """Add publisher match similarity.
260
+
261
+ Default metric: ExactMatchMetric (1 if same publisher, 0 otherwise)
262
+
263
+ Args:
264
+ weight: Weight for this feature (default 1.0)
265
+ metric: Optional custom metric (default ExactMatchMetric)
266
+
267
+ Returns:
268
+ Self for chaining
269
+ """
270
+ metric = metric or ExactMatchMetric()
271
+ extractor = PublisherExtractor()
272
+ self.features.append(Feature(extractor, metric, weight, "publisher"))
273
+ return self
274
+
275
+ def page_count(
276
+ self,
277
+ weight: float = 1.0,
278
+ metric: Optional[Metric] = None,
279
+ max_diff: float = 1000.0,
280
+ ) -> "BookSimilarity":
281
+ """Add page count proximity similarity.
282
+
283
+ Default metric: NumericProximityMetric
284
+
285
+ Args:
286
+ weight: Weight for this feature (default 1.0)
287
+ metric: Optional custom metric (default NumericProximityMetric)
288
+ max_diff: Maximum expected difference in pages (default 1000)
289
+
290
+ Returns:
291
+ Self for chaining
292
+ """
293
+ metric = metric or NumericProximityMetric(max_diff=max_diff)
294
+ extractor = PageCountExtractor()
295
+ self.features.append(Feature(extractor, metric, weight, "page_count"))
296
+ return self
297
+
298
+ # ===== Tier 3: Escape Hatch =====
299
+
300
+ def custom(
301
+ self, feature: Feature, name: Optional[str] = None
302
+ ) -> "BookSimilarity":
303
+ """Add a custom feature for power users.
304
+
305
+ Args:
306
+ feature: Custom Feature (extractor + metric + weight)
307
+ name: Optional name for this feature
308
+
309
+ Returns:
310
+ Self for chaining
311
+ """
312
+ if name:
313
+ feature.name = name
314
+ self.features.append(feature)
315
+ return self
316
+
317
+ # ===== Core Functionality =====
318
+
319
+ def fit(self, books: List[Book]) -> "BookSimilarity":
320
+ """Fit all metrics on the corpus.
321
+
322
+ This pre-computes expensive features (e.g., TF-IDF vectors) for
323
+ dramatic performance improvements.
324
+
325
+ Args:
326
+ books: List of books to fit on
327
+
328
+ Returns:
329
+ Self for chaining
330
+ """
331
+ if not books:
332
+ return self
333
+
334
+ # For each feature, extract values and fit metric
335
+ for feature in self.features:
336
+ # Extract values for all books
337
+ data = {}
338
+ for book in books:
339
+ try:
340
+ value = feature.extractor.extract(book)
341
+ data[book.id] = value
342
+ except Exception:
343
+ # Skip books that fail extraction
344
+ continue
345
+
346
+ # Fit metric (no-op for most metrics)
347
+ feature.metric.fit(data)
348
+
349
+ self._fitted = True
350
+ return self
351
+
352
+ def similarity(self, book1: Book, book2: Book) -> float:
353
+ """Compute similarity between two books.
354
+
355
+ Returns weighted average of all feature similarities.
356
+
357
+ Args:
358
+ book1: First book
359
+ book2: Second book
360
+
361
+ Returns:
362
+ Similarity score in [0, 1]
363
+ """
364
+ if not self.features:
365
+ raise ValueError("No features configured. Use .content(), .authors(), etc.")
366
+
367
+ total_weighted_sim = 0.0
368
+ total_weight = 0.0
369
+
370
+ for feature in self.features:
371
+ try:
372
+ weighted_sim = feature.similarity(book1, book2)
373
+ total_weighted_sim += weighted_sim
374
+ total_weight += feature.weight
375
+ except Exception:
376
+ # Skip features that fail
377
+ continue
378
+
379
+ if total_weight == 0:
380
+ return 0.0
381
+
382
+ return total_weighted_sim / total_weight
383
+
384
+ def similarity_matrix(self, books: List[Book]) -> np.ndarray:
385
+ """Compute pairwise similarity matrix for all books.
386
+
387
+ Returns NxN matrix where matrix[i][j] = similarity(books[i], books[j])
388
+
389
+ This is much faster than computing similarities one by one.
390
+
391
+ Args:
392
+ books: List of books
393
+
394
+ Returns:
395
+ NxN numpy array of similarities
396
+ """
397
+ n = len(books)
398
+ matrix = np.zeros((n, n))
399
+
400
+ # Diagonal is always 1.0 (book is identical to itself)
401
+ np.fill_diagonal(matrix, 1.0)
402
+
403
+ # Compute upper triangle (matrix is symmetric)
404
+ for i in range(n):
405
+ for j in range(i + 1, n):
406
+ sim = self.similarity(books[i], books[j])
407
+ matrix[i][j] = sim
408
+ matrix[j][i] = sim # Symmetric
409
+
410
+ return matrix
411
+
412
+ def find_similar(
413
+ self, book: Book, candidates: List[Book], top_k: int = 10
414
+ ) -> List[Tuple[Book, float]]:
415
+ """Find top-k most similar books from candidates.
416
+
417
+ Args:
418
+ book: Query book
419
+ candidates: Candidate books to compare against
420
+ top_k: Number of results to return (default 10)
421
+
422
+ Returns:
423
+ List of (book, similarity) tuples, sorted by similarity descending
424
+ """
425
+ # Compute similarities
426
+ similarities = []
427
+ for candidate in candidates:
428
+ if candidate.id == book.id:
429
+ continue # Skip self
430
+
431
+ sim = self.similarity(book, candidate)
432
+ similarities.append((candidate, sim))
433
+
434
+ # Sort by similarity descending
435
+ similarities.sort(key=lambda x: x[1], reverse=True)
436
+
437
+ # Return top-k
438
+ return similarities[:top_k]
439
+
440
+ def save(self, path: Path) -> None:
441
+ """Save fitted state to disk.
442
+
443
+ Args:
444
+ path: Directory to save to (will create multiple files)
445
+ """
446
+ if not self._fitted:
447
+ raise RuntimeError("Must call fit() before save()")
448
+
449
+ path = Path(path)
450
+ path.mkdir(parents=True, exist_ok=True)
451
+
452
+ # Save each feature's metric
453
+ for i, feature in enumerate(self.features):
454
+ metric_path = path / f"metric_{i}_{feature.name}.pkl"
455
+ feature.metric.save(metric_path)
456
+
457
+ def load(self, path: Path) -> None:
458
+ """Load fitted state from disk.
459
+
460
+ Args:
461
+ path: Directory to load from
462
+ """
463
+ path = Path(path)
464
+
465
+ # Load each feature's metric
466
+ for i, feature in enumerate(self.features):
467
+ metric_path = path / f"metric_{i}_{feature.name}.pkl"
468
+ if metric_path.exists():
469
+ feature.metric.load(metric_path)
470
+
471
+ self._fitted = True