ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (84) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +443 -0
  4. ebk/ai/llm_providers/__init__.py +21 -0
  5. ebk/ai/llm_providers/base.py +230 -0
  6. ebk/ai/llm_providers/ollama.py +362 -0
  7. ebk/ai/metadata_enrichment.py +396 -0
  8. ebk/ai/question_generator.py +328 -0
  9. ebk/ai/reading_companion.py +224 -0
  10. ebk/ai/semantic_search.py +434 -0
  11. ebk/ai/text_extractor.py +394 -0
  12. ebk/cli.py +2828 -680
  13. ebk/config.py +260 -22
  14. ebk/db/__init__.py +37 -0
  15. ebk/db/migrations.py +180 -0
  16. ebk/db/models.py +526 -0
  17. ebk/db/session.py +144 -0
  18. ebk/decorators.py +132 -0
  19. ebk/exports/base_exporter.py +218 -0
  20. ebk/exports/html_library.py +1390 -0
  21. ebk/exports/html_utils.py +117 -0
  22. ebk/exports/hugo.py +7 -3
  23. ebk/exports/jinja_export.py +287 -0
  24. ebk/exports/multi_facet_export.py +164 -0
  25. ebk/exports/symlink_dag.py +479 -0
  26. ebk/extract_metadata.py +76 -7
  27. ebk/library_db.py +899 -0
  28. ebk/plugins/__init__.py +42 -0
  29. ebk/plugins/base.py +502 -0
  30. ebk/plugins/hooks.py +444 -0
  31. ebk/plugins/registry.py +500 -0
  32. ebk/repl/__init__.py +9 -0
  33. ebk/repl/find.py +126 -0
  34. ebk/repl/grep.py +174 -0
  35. ebk/repl/shell.py +1677 -0
  36. ebk/repl/text_utils.py +320 -0
  37. ebk/search_parser.py +413 -0
  38. ebk/server.py +1633 -0
  39. ebk/services/__init__.py +11 -0
  40. ebk/services/import_service.py +442 -0
  41. ebk/services/tag_service.py +282 -0
  42. ebk/services/text_extraction.py +317 -0
  43. ebk/similarity/__init__.py +77 -0
  44. ebk/similarity/base.py +154 -0
  45. ebk/similarity/core.py +445 -0
  46. ebk/similarity/extractors.py +168 -0
  47. ebk/similarity/metrics.py +376 -0
  48. ebk/vfs/__init__.py +101 -0
  49. ebk/vfs/base.py +301 -0
  50. ebk/vfs/library_vfs.py +124 -0
  51. ebk/vfs/nodes/__init__.py +54 -0
  52. ebk/vfs/nodes/authors.py +196 -0
  53. ebk/vfs/nodes/books.py +480 -0
  54. ebk/vfs/nodes/files.py +155 -0
  55. ebk/vfs/nodes/metadata.py +385 -0
  56. ebk/vfs/nodes/root.py +100 -0
  57. ebk/vfs/nodes/similar.py +165 -0
  58. ebk/vfs/nodes/subjects.py +184 -0
  59. ebk/vfs/nodes/tags.py +371 -0
  60. ebk/vfs/resolver.py +228 -0
  61. ebk-0.3.2.dist-info/METADATA +755 -0
  62. ebk-0.3.2.dist-info/RECORD +69 -0
  63. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
  64. ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
  65. ebk/imports/__init__.py +0 -0
  66. ebk/imports/calibre.py +0 -144
  67. ebk/imports/ebooks.py +0 -116
  68. ebk/llm.py +0 -58
  69. ebk/manager.py +0 -44
  70. ebk/merge.py +0 -308
  71. ebk/streamlit/__init__.py +0 -0
  72. ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
  73. ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
  74. ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
  75. ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
  76. ebk/streamlit/app.py +0 -185
  77. ebk/streamlit/display.py +0 -168
  78. ebk/streamlit/filters.py +0 -151
  79. ebk/streamlit/utils.py +0 -58
  80. ebk/utils.py +0 -311
  81. ebk-0.1.0.dist-info/METADATA +0 -457
  82. ebk-0.1.0.dist-info/RECORD +0 -29
  83. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
  84. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,230 @@
1
+ """
2
+ Base abstract LLM provider interface.
3
+
4
+ This module defines the abstract base class that all LLM providers must implement.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Dict, Any, List, Optional, AsyncIterator
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+
12
+
13
+ class ModelCapability(Enum):
14
+ """Capabilities that an LLM model might support."""
15
+ TEXT_GENERATION = "text_generation"
16
+ JSON_MODE = "json_mode"
17
+ FUNCTION_CALLING = "function_calling"
18
+ STREAMING = "streaming"
19
+ VISION = "vision"
20
+ EMBEDDINGS = "embeddings"
21
+
22
+
23
+ @dataclass
24
+ class LLMConfig:
25
+ """Configuration for LLM provider."""
26
+
27
+ # Connection settings
28
+ base_url: str
29
+ api_key: Optional[str] = None
30
+
31
+ # Model settings
32
+ model: str = "default"
33
+ temperature: float = 0.7
34
+ max_tokens: Optional[int] = None
35
+ top_p: float = 0.9
36
+
37
+ # Behavior settings
38
+ timeout: float = 60.0
39
+ max_retries: int = 3
40
+
41
+ # Additional provider-specific settings
42
+ extra_params: Dict[str, Any] = None
43
+
44
+ def __post_init__(self):
45
+ if self.extra_params is None:
46
+ self.extra_params = {}
47
+
48
+
49
+ @dataclass
50
+ class LLMResponse:
51
+ """Response from LLM completion."""
52
+
53
+ content: str
54
+ model: str
55
+ finish_reason: Optional[str] = None
56
+ usage: Optional[Dict[str, int]] = None # tokens used
57
+ raw_response: Optional[Dict[str, Any]] = None
58
+
59
+ def to_dict(self) -> Dict[str, Any]:
60
+ """Convert to dictionary."""
61
+ return {
62
+ "content": self.content,
63
+ "model": self.model,
64
+ "finish_reason": self.finish_reason,
65
+ "usage": self.usage,
66
+ }
67
+
68
+
69
+ class BaseLLMProvider(ABC):
70
+ """
71
+ Abstract base class for LLM providers.
72
+
73
+ All LLM providers must implement this interface to ensure consistency
74
+ across different backends (Ollama, OpenAI, Anthropic, etc.).
75
+ """
76
+
77
+ def __init__(self, config: LLMConfig):
78
+ """
79
+ Initialize the provider with configuration.
80
+
81
+ Args:
82
+ config: LLM configuration
83
+ """
84
+ self.config = config
85
+ self._client = None
86
+
87
+ @property
88
+ @abstractmethod
89
+ def name(self) -> str:
90
+ """Provider name (e.g., 'ollama', 'openai')."""
91
+ pass
92
+
93
+ @property
94
+ @abstractmethod
95
+ def supported_capabilities(self) -> List[ModelCapability]:
96
+ """List of capabilities supported by this provider."""
97
+ pass
98
+
99
+ @abstractmethod
100
+ async def initialize(self) -> None:
101
+ """
102
+ Initialize the provider (establish connections, etc.).
103
+
104
+ This is called once before first use.
105
+ """
106
+ pass
107
+
108
+ @abstractmethod
109
+ async def cleanup(self) -> None:
110
+ """
111
+ Cleanup resources (close connections, etc.).
112
+
113
+ This is called when the provider is no longer needed.
114
+ """
115
+ pass
116
+
117
+ @abstractmethod
118
+ async def complete(
119
+ self,
120
+ prompt: str,
121
+ system_prompt: Optional[str] = None,
122
+ **kwargs
123
+ ) -> LLMResponse:
124
+ """
125
+ Generate a text completion.
126
+
127
+ Args:
128
+ prompt: The user prompt
129
+ system_prompt: Optional system prompt to set context
130
+ **kwargs: Additional provider-specific parameters
131
+
132
+ Returns:
133
+ LLMResponse with generated text
134
+
135
+ Raises:
136
+ Exception: If completion fails
137
+ """
138
+ pass
139
+
140
+ @abstractmethod
141
+ async def complete_json(
142
+ self,
143
+ prompt: str,
144
+ system_prompt: Optional[str] = None,
145
+ schema: Optional[Dict[str, Any]] = None,
146
+ **kwargs
147
+ ) -> Dict[str, Any]:
148
+ """
149
+ Generate a JSON completion.
150
+
151
+ Args:
152
+ prompt: The user prompt
153
+ system_prompt: Optional system prompt
154
+ schema: Optional JSON schema to validate against
155
+ **kwargs: Additional parameters
156
+
157
+ Returns:
158
+ Parsed JSON object
159
+
160
+ Raises:
161
+ Exception: If completion or parsing fails
162
+ """
163
+ pass
164
+
165
+ async def complete_streaming(
166
+ self,
167
+ prompt: str,
168
+ system_prompt: Optional[str] = None,
169
+ **kwargs
170
+ ) -> AsyncIterator[str]:
171
+ """
172
+ Generate a streaming text completion.
173
+
174
+ Args:
175
+ prompt: The user prompt
176
+ system_prompt: Optional system prompt
177
+ **kwargs: Additional parameters
178
+
179
+ Yields:
180
+ Text chunks as they are generated
181
+
182
+ Raises:
183
+ NotImplementedError: If streaming is not supported
184
+ """
185
+ raise NotImplementedError(
186
+ f"{self.name} does not support streaming completions"
187
+ )
188
+
189
+ async def get_embeddings(
190
+ self,
191
+ texts: List[str],
192
+ **kwargs
193
+ ) -> List[List[float]]:
194
+ """
195
+ Get embeddings for text inputs.
196
+
197
+ Args:
198
+ texts: List of texts to embed
199
+ **kwargs: Additional parameters
200
+
201
+ Returns:
202
+ List of embedding vectors
203
+
204
+ Raises:
205
+ NotImplementedError: If embeddings are not supported
206
+ """
207
+ raise NotImplementedError(
208
+ f"{self.name} does not support embeddings"
209
+ )
210
+
211
+ def supports_capability(self, capability: ModelCapability) -> bool:
212
+ """
213
+ Check if provider supports a specific capability.
214
+
215
+ Args:
216
+ capability: The capability to check
217
+
218
+ Returns:
219
+ True if supported
220
+ """
221
+ return capability in self.supported_capabilities
222
+
223
+ async def __aenter__(self):
224
+ """Async context manager entry."""
225
+ await self.initialize()
226
+ return self
227
+
228
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
229
+ """Async context manager exit."""
230
+ await self.cleanup()
@@ -0,0 +1,362 @@
1
+ """
2
+ Ollama LLM Provider.
3
+
4
+ Supports both local and remote Ollama instances.
5
+ """
6
+
7
+ import json
8
+ import re
9
+ from typing import Dict, Any, List, Optional, AsyncIterator
10
+ import httpx
11
+
12
+ from .base import BaseLLMProvider, LLMConfig, LLMResponse, ModelCapability
13
+
14
+
15
+ class OllamaProvider(BaseLLMProvider):
16
+ """
17
+ Ollama LLM provider.
18
+
19
+ Supports:
20
+ - Local Ollama (default: http://localhost:11434)
21
+ - Remote Ollama (e.g., basement GPU server)
22
+ - Streaming completions
23
+ - JSON mode
24
+ - Embeddings
25
+ """
26
+
27
+ def __init__(self, config: LLMConfig):
28
+ """
29
+ Initialize Ollama provider.
30
+
31
+ Args:
32
+ config: LLM configuration with base_url pointing to Ollama
33
+ """
34
+ super().__init__(config)
35
+
36
+ @property
37
+ def name(self) -> str:
38
+ return "ollama"
39
+
40
+ @property
41
+ def supported_capabilities(self) -> List[ModelCapability]:
42
+ return [
43
+ ModelCapability.TEXT_GENERATION,
44
+ ModelCapability.JSON_MODE,
45
+ ModelCapability.STREAMING,
46
+ ModelCapability.EMBEDDINGS,
47
+ ]
48
+
49
+ @classmethod
50
+ def local(cls, model: str = "llama3.2", **kwargs) -> 'OllamaProvider':
51
+ """
52
+ Create provider for local Ollama instance.
53
+
54
+ Args:
55
+ model: Model name (e.g., 'llama3.2', 'mistral', 'codellama')
56
+ **kwargs: Additional config parameters
57
+
58
+ Returns:
59
+ Configured OllamaProvider
60
+ """
61
+ config = LLMConfig(
62
+ base_url="http://localhost:11434",
63
+ model=model,
64
+ **kwargs
65
+ )
66
+ return cls(config)
67
+
68
+ @classmethod
69
+ def remote(
70
+ cls,
71
+ host: str,
72
+ port: int = 11434,
73
+ model: str = "llama3.2",
74
+ **kwargs
75
+ ) -> 'OllamaProvider':
76
+ """
77
+ Create provider for remote Ollama instance.
78
+
79
+ Args:
80
+ host: Remote host (e.g., '192.168.1.100', 'basement-gpu.local')
81
+ port: Ollama port (default: 11434)
82
+ model: Model name
83
+ **kwargs: Additional config parameters
84
+
85
+ Returns:
86
+ Configured OllamaProvider
87
+
88
+ Example:
89
+ >>> provider = OllamaProvider.remote(
90
+ ... host='192.168.1.100',
91
+ ... model='llama3.2'
92
+ ... )
93
+ """
94
+ config = LLMConfig(
95
+ base_url=f"http://{host}:{port}",
96
+ model=model,
97
+ **kwargs
98
+ )
99
+ return cls(config)
100
+
101
+ async def initialize(self) -> None:
102
+ """Initialize HTTP client."""
103
+ self._client = httpx.AsyncClient(
104
+ base_url=self.config.base_url,
105
+ timeout=self.config.timeout,
106
+ )
107
+
108
+ async def cleanup(self) -> None:
109
+ """Close HTTP client."""
110
+ if self._client:
111
+ await self._client.aclose()
112
+ self._client = None
113
+
114
+ async def complete(
115
+ self,
116
+ prompt: str,
117
+ system_prompt: Optional[str] = None,
118
+ **kwargs
119
+ ) -> LLMResponse:
120
+ """
121
+ Generate completion using Ollama.
122
+
123
+ Args:
124
+ prompt: User prompt
125
+ system_prompt: Optional system prompt
126
+ **kwargs: Additional parameters (temperature, max_tokens, etc.)
127
+
128
+ Returns:
129
+ LLMResponse with generated text
130
+ """
131
+ if not self._client:
132
+ await self.initialize()
133
+
134
+ # Build request payload
135
+ data = {
136
+ "model": self.config.model,
137
+ "prompt": prompt,
138
+ "stream": False,
139
+ "options": {
140
+ "temperature": kwargs.get("temperature", self.config.temperature),
141
+ "top_p": kwargs.get("top_p", self.config.top_p),
142
+ }
143
+ }
144
+
145
+ if system_prompt:
146
+ data["system"] = system_prompt
147
+
148
+ if self.config.max_tokens:
149
+ data["options"]["num_predict"] = self.config.max_tokens
150
+
151
+ # Make request
152
+ response = await self._client.post("/api/generate", json=data)
153
+ response.raise_for_status()
154
+
155
+ result = response.json()
156
+
157
+ return LLMResponse(
158
+ content=result["response"],
159
+ model=result.get("model", self.config.model),
160
+ finish_reason=result.get("done_reason"),
161
+ usage={
162
+ "prompt_tokens": result.get("prompt_eval_count", 0),
163
+ "completion_tokens": result.get("eval_count", 0),
164
+ },
165
+ raw_response=result,
166
+ )
167
+
168
+ async def complete_json(
169
+ self,
170
+ prompt: str,
171
+ system_prompt: Optional[str] = None,
172
+ schema: Optional[Dict[str, Any]] = None,
173
+ **kwargs
174
+ ) -> Dict[str, Any]:
175
+ """
176
+ Generate JSON completion.
177
+
178
+ Args:
179
+ prompt: User prompt
180
+ system_prompt: Optional system prompt
181
+ schema: Optional JSON schema
182
+ **kwargs: Additional parameters
183
+
184
+ Returns:
185
+ Parsed JSON object
186
+ """
187
+ # Enhance prompt for JSON output
188
+ json_system = "You are a helpful assistant that responds only in valid JSON format."
189
+ if system_prompt:
190
+ json_system = f"{system_prompt}\n\n{json_system}"
191
+
192
+ json_prompt = f"{prompt}\n\nRespond with valid JSON only. Do not include any explanation or markdown formatting."
193
+
194
+ if schema:
195
+ json_prompt += f"\n\nFollow this schema:\n```json\n{json.dumps(schema, indent=2)}\n```"
196
+
197
+ # Use Ollama's JSON format mode if available
198
+ kwargs["format"] = "json"
199
+
200
+ response = await self.complete(json_prompt, system_prompt=json_system, **kwargs)
201
+
202
+ # Parse JSON from response
203
+ return self._parse_json_response(response.content)
204
+
205
+ def _parse_json_response(self, content: str) -> Dict[str, Any]:
206
+ """
207
+ Parse JSON from response content.
208
+
209
+ Handles common issues like markdown code blocks.
210
+ """
211
+ try:
212
+ # Try direct parse first
213
+ return json.loads(content)
214
+ except json.JSONDecodeError:
215
+ pass
216
+
217
+ # Try to extract JSON from markdown code block
218
+ cleaned = content.strip()
219
+ if cleaned.startswith("```json"):
220
+ cleaned = cleaned[7:]
221
+ elif cleaned.startswith("```"):
222
+ cleaned = cleaned[3:]
223
+
224
+ if cleaned.endswith("```"):
225
+ cleaned = cleaned[:-3]
226
+
227
+ cleaned = cleaned.strip()
228
+
229
+ try:
230
+ return json.loads(cleaned)
231
+ except json.JSONDecodeError:
232
+ pass
233
+
234
+ # Try to find JSON object in text
235
+ json_match = re.search(r'\{.*\}', content, re.DOTALL)
236
+ if json_match:
237
+ try:
238
+ return json.loads(json_match.group())
239
+ except json.JSONDecodeError:
240
+ pass
241
+
242
+ # Last resort: try to find JSON array
243
+ json_match = re.search(r'\[.*\]', content, re.DOTALL)
244
+ if json_match:
245
+ try:
246
+ return json.loads(json_match.group())
247
+ except json.JSONDecodeError:
248
+ pass
249
+
250
+ raise ValueError(f"Failed to parse JSON from response: {content[:200]}...")
251
+
252
+ async def complete_streaming(
253
+ self,
254
+ prompt: str,
255
+ system_prompt: Optional[str] = None,
256
+ **kwargs
257
+ ) -> AsyncIterator[str]:
258
+ """
259
+ Generate streaming completion.
260
+
261
+ Args:
262
+ prompt: User prompt
263
+ system_prompt: Optional system prompt
264
+ **kwargs: Additional parameters
265
+
266
+ Yields:
267
+ Text chunks as they are generated
268
+ """
269
+ if not self._client:
270
+ await self.initialize()
271
+
272
+ data = {
273
+ "model": self.config.model,
274
+ "prompt": prompt,
275
+ "stream": True,
276
+ "options": {
277
+ "temperature": kwargs.get("temperature", self.config.temperature),
278
+ "top_p": kwargs.get("top_p", self.config.top_p),
279
+ }
280
+ }
281
+
282
+ if system_prompt:
283
+ data["system"] = system_prompt
284
+
285
+ async with self._client.stream("POST", "/api/generate", json=data) as response:
286
+ response.raise_for_status()
287
+
288
+ async for line in response.aiter_lines():
289
+ if line.strip():
290
+ try:
291
+ chunk = json.loads(line)
292
+ if "response" in chunk:
293
+ yield chunk["response"]
294
+ if chunk.get("done", False):
295
+ break
296
+ except json.JSONDecodeError:
297
+ continue
298
+
299
+ async def get_embeddings(
300
+ self,
301
+ texts: List[str],
302
+ **kwargs
303
+ ) -> List[List[float]]:
304
+ """
305
+ Get embeddings using Ollama.
306
+
307
+ Args:
308
+ texts: List of texts to embed
309
+ **kwargs: Additional parameters
310
+
311
+ Returns:
312
+ List of embedding vectors
313
+ """
314
+ if not self._client:
315
+ await self.initialize()
316
+
317
+ embeddings = []
318
+
319
+ for text in texts:
320
+ data = {
321
+ "model": self.config.model,
322
+ "prompt": text,
323
+ }
324
+
325
+ response = await self._client.post("/api/embeddings", json=data)
326
+ response.raise_for_status()
327
+
328
+ result = response.json()
329
+ embeddings.append(result["embedding"])
330
+
331
+ return embeddings
332
+
333
+ async def list_models(self) -> List[str]:
334
+ """
335
+ List available models on Ollama server.
336
+
337
+ Returns:
338
+ List of model names
339
+ """
340
+ if not self._client:
341
+ await self.initialize()
342
+
343
+ response = await self._client.get("/api/tags")
344
+ response.raise_for_status()
345
+
346
+ result = response.json()
347
+ return [model["name"] for model in result.get("models", [])]
348
+
349
+ async def pull_model(self, model_name: str) -> None:
350
+ """
351
+ Pull a model from Ollama registry.
352
+
353
+ Args:
354
+ model_name: Name of model to pull (e.g., 'llama3.2', 'mistral')
355
+ """
356
+ if not self._client:
357
+ await self.initialize()
358
+
359
+ data = {"name": model_name, "stream": False}
360
+
361
+ response = await self._client.post("/api/pull", json=data)
362
+ response.raise_for_status()