ebk 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +450 -0
  4. ebk/ai/llm_providers/__init__.py +26 -0
  5. ebk/ai/llm_providers/anthropic.py +209 -0
  6. ebk/ai/llm_providers/base.py +295 -0
  7. ebk/ai/llm_providers/gemini.py +285 -0
  8. ebk/ai/llm_providers/ollama.py +294 -0
  9. ebk/ai/metadata_enrichment.py +394 -0
  10. ebk/ai/question_generator.py +328 -0
  11. ebk/ai/reading_companion.py +224 -0
  12. ebk/ai/semantic_search.py +433 -0
  13. ebk/ai/text_extractor.py +393 -0
  14. ebk/calibre_import.py +66 -0
  15. ebk/cli.py +6433 -0
  16. ebk/config.py +230 -0
  17. ebk/db/__init__.py +37 -0
  18. ebk/db/migrations.py +507 -0
  19. ebk/db/models.py +725 -0
  20. ebk/db/session.py +144 -0
  21. ebk/decorators.py +1 -0
  22. ebk/exports/__init__.py +0 -0
  23. ebk/exports/base_exporter.py +218 -0
  24. ebk/exports/echo_export.py +279 -0
  25. ebk/exports/html_library.py +1743 -0
  26. ebk/exports/html_utils.py +87 -0
  27. ebk/exports/hugo.py +59 -0
  28. ebk/exports/jinja_export.py +286 -0
  29. ebk/exports/multi_facet_export.py +159 -0
  30. ebk/exports/opds_export.py +232 -0
  31. ebk/exports/symlink_dag.py +479 -0
  32. ebk/exports/zip.py +25 -0
  33. ebk/extract_metadata.py +341 -0
  34. ebk/ident.py +89 -0
  35. ebk/library_db.py +1440 -0
  36. ebk/opds.py +748 -0
  37. ebk/plugins/__init__.py +42 -0
  38. ebk/plugins/base.py +502 -0
  39. ebk/plugins/hooks.py +442 -0
  40. ebk/plugins/registry.py +499 -0
  41. ebk/repl/__init__.py +9 -0
  42. ebk/repl/find.py +126 -0
  43. ebk/repl/grep.py +173 -0
  44. ebk/repl/shell.py +1677 -0
  45. ebk/repl/text_utils.py +320 -0
  46. ebk/search_parser.py +413 -0
  47. ebk/server.py +3608 -0
  48. ebk/services/__init__.py +28 -0
  49. ebk/services/annotation_extraction.py +351 -0
  50. ebk/services/annotation_service.py +380 -0
  51. ebk/services/export_service.py +577 -0
  52. ebk/services/import_service.py +447 -0
  53. ebk/services/personal_metadata_service.py +347 -0
  54. ebk/services/queue_service.py +253 -0
  55. ebk/services/tag_service.py +281 -0
  56. ebk/services/text_extraction.py +317 -0
  57. ebk/services/view_service.py +12 -0
  58. ebk/similarity/__init__.py +77 -0
  59. ebk/similarity/base.py +154 -0
  60. ebk/similarity/core.py +471 -0
  61. ebk/similarity/extractors.py +168 -0
  62. ebk/similarity/metrics.py +376 -0
  63. ebk/skills/SKILL.md +182 -0
  64. ebk/skills/__init__.py +1 -0
  65. ebk/vfs/__init__.py +101 -0
  66. ebk/vfs/base.py +298 -0
  67. ebk/vfs/library_vfs.py +122 -0
  68. ebk/vfs/nodes/__init__.py +54 -0
  69. ebk/vfs/nodes/authors.py +196 -0
  70. ebk/vfs/nodes/books.py +480 -0
  71. ebk/vfs/nodes/files.py +155 -0
  72. ebk/vfs/nodes/metadata.py +385 -0
  73. ebk/vfs/nodes/root.py +100 -0
  74. ebk/vfs/nodes/similar.py +165 -0
  75. ebk/vfs/nodes/subjects.py +184 -0
  76. ebk/vfs/nodes/tags.py +371 -0
  77. ebk/vfs/resolver.py +228 -0
  78. ebk/vfs_router.py +275 -0
  79. ebk/views/__init__.py +32 -0
  80. ebk/views/dsl.py +668 -0
  81. ebk/views/service.py +619 -0
  82. ebk-0.4.4.dist-info/METADATA +755 -0
  83. ebk-0.4.4.dist-info/RECORD +87 -0
  84. ebk-0.4.4.dist-info/WHEEL +5 -0
  85. ebk-0.4.4.dist-info/entry_points.txt +2 -0
  86. ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
  87. ebk-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,209 @@
1
+ """
2
+ Anthropic Claude LLM Provider.
3
+
4
+ Supports Claude models via the Anthropic API.
5
+ """
6
+
7
+ import json
8
+ from typing import Dict, Any, List, Optional, AsyncIterator
9
+ import httpx
10
+
11
+ from .base import BaseLLMProvider, LLMConfig, LLMResponse, ModelCapability
12
+
13
+
14
+ class AnthropicProvider(BaseLLMProvider):
15
+ """
16
+ Anthropic Claude LLM provider.
17
+
18
+ Supports:
19
+ - Claude 3 models (haiku, sonnet, opus)
20
+ - Claude 3.5/4 models
21
+ - Streaming completions
22
+ - JSON mode (via prompting)
23
+ """
24
+
25
+ API_VERSION = "2023-06-01"
26
+ DEFAULT_MODEL = "claude-sonnet-4-20250514"
27
+
28
+ def __init__(self, config: LLMConfig):
29
+ """
30
+ Initialize Anthropic provider.
31
+
32
+ Args:
33
+ config: LLM configuration with api_key for Anthropic
34
+ """
35
+ super().__init__(config)
36
+ if not config.api_key:
37
+ raise ValueError("Anthropic API key is required")
38
+
39
+ @property
40
+ def name(self) -> str:
41
+ return "anthropic"
42
+
43
+ @property
44
+ def supported_capabilities(self) -> List[ModelCapability]:
45
+ return [
46
+ ModelCapability.TEXT_GENERATION,
47
+ ModelCapability.JSON_MODE,
48
+ ModelCapability.STREAMING,
49
+ ModelCapability.VISION,
50
+ ]
51
+
52
+ @classmethod
53
+ def create(
54
+ cls,
55
+ api_key: str,
56
+ model: str = DEFAULT_MODEL,
57
+ **kwargs
58
+ ) -> 'AnthropicProvider':
59
+ """
60
+ Create an Anthropic provider.
61
+
62
+ Args:
63
+ api_key: Anthropic API key
64
+ model: Model name (e.g., 'claude-sonnet-4-20250514', 'claude-3-5-sonnet-20241022')
65
+ **kwargs: Additional config parameters
66
+
67
+ Returns:
68
+ Configured AnthropicProvider
69
+
70
+ Example:
71
+ >>> provider = AnthropicProvider.create(
72
+ ... api_key="sk-ant-...",
73
+ ... model="claude-sonnet-4-20250514"
74
+ ... )
75
+ """
76
+ config = LLMConfig(
77
+ base_url="https://api.anthropic.com",
78
+ api_key=api_key,
79
+ model=model,
80
+ **kwargs
81
+ )
82
+ return cls(config)
83
+
84
+ async def initialize(self) -> None:
85
+ """Initialize HTTP client with Anthropic headers."""
86
+ self._client = httpx.AsyncClient(
87
+ base_url=self.config.base_url,
88
+ timeout=self.config.timeout,
89
+ headers={
90
+ "x-api-key": self.config.api_key,
91
+ "anthropic-version": self.API_VERSION,
92
+ "content-type": "application/json",
93
+ }
94
+ )
95
+
96
+ async def cleanup(self) -> None:
97
+ """Close HTTP client."""
98
+ if self._client:
99
+ await self._client.aclose()
100
+ self._client = None
101
+
102
+ async def complete(
103
+ self,
104
+ prompt: str,
105
+ system_prompt: Optional[str] = None,
106
+ **kwargs
107
+ ) -> LLMResponse:
108
+ """
109
+ Generate completion using Anthropic Claude.
110
+
111
+ Args:
112
+ prompt: User prompt
113
+ system_prompt: Optional system prompt
114
+ **kwargs: Additional parameters (temperature, max_tokens, etc.)
115
+
116
+ Returns:
117
+ LLMResponse with generated text
118
+ """
119
+ if not self._client:
120
+ await self.initialize()
121
+
122
+ # Build request payload - Anthropic uses messages format
123
+ data = {
124
+ "model": self.config.model,
125
+ "messages": [
126
+ {"role": "user", "content": prompt}
127
+ ],
128
+ "max_tokens": kwargs.get("max_tokens", self.config.max_tokens or 4096),
129
+ "temperature": kwargs.get("temperature", self.config.temperature),
130
+ "top_p": kwargs.get("top_p", self.config.top_p),
131
+ }
132
+
133
+ if system_prompt:
134
+ data["system"] = system_prompt
135
+
136
+ # Make request
137
+ response = await self._client.post("/v1/messages", json=data)
138
+ response.raise_for_status()
139
+
140
+ result = response.json()
141
+
142
+ # Extract content from response
143
+ content = ""
144
+ if result.get("content"):
145
+ for block in result["content"]:
146
+ if block.get("type") == "text":
147
+ content += block.get("text", "")
148
+
149
+ return LLMResponse(
150
+ content=content,
151
+ model=result.get("model", self.config.model),
152
+ finish_reason=result.get("stop_reason"),
153
+ usage={
154
+ "prompt_tokens": result.get("usage", {}).get("input_tokens", 0),
155
+ "completion_tokens": result.get("usage", {}).get("output_tokens", 0),
156
+ },
157
+ raw_response=result,
158
+ )
159
+
160
+ async def complete_streaming(
161
+ self,
162
+ prompt: str,
163
+ system_prompt: Optional[str] = None,
164
+ **kwargs
165
+ ) -> AsyncIterator[str]:
166
+ """
167
+ Generate streaming completion.
168
+
169
+ Args:
170
+ prompt: User prompt
171
+ system_prompt: Optional system prompt
172
+ **kwargs: Additional parameters
173
+
174
+ Yields:
175
+ Text chunks as they are generated
176
+ """
177
+ if not self._client:
178
+ await self.initialize()
179
+
180
+ data = {
181
+ "model": self.config.model,
182
+ "messages": [
183
+ {"role": "user", "content": prompt}
184
+ ],
185
+ "max_tokens": kwargs.get("max_tokens", self.config.max_tokens or 4096),
186
+ "temperature": kwargs.get("temperature", self.config.temperature),
187
+ "top_p": kwargs.get("top_p", self.config.top_p),
188
+ "stream": True,
189
+ }
190
+
191
+ if system_prompt:
192
+ data["system"] = system_prompt
193
+
194
+ async with self._client.stream("POST", "/v1/messages", json=data) as response:
195
+ response.raise_for_status()
196
+
197
+ async for line in response.aiter_lines():
198
+ if line.startswith("data: "):
199
+ try:
200
+ chunk = json.loads(line[6:])
201
+ # Handle different event types
202
+ if chunk.get("type") == "content_block_delta":
203
+ delta = chunk.get("delta", {})
204
+ if delta.get("type") == "text_delta":
205
+ yield delta.get("text", "")
206
+ elif chunk.get("type") == "message_stop":
207
+ break
208
+ except json.JSONDecodeError:
209
+ continue
@@ -0,0 +1,295 @@
1
+ """
2
+ Base abstract LLM provider interface.
3
+
4
+ This module defines the abstract base class that all LLM providers must implement.
5
+ """
6
+
7
+ import json
8
+ import re
9
+ from abc import ABC, abstractmethod
10
+ from typing import Dict, Any, List, Optional, AsyncIterator
11
+ from dataclasses import dataclass
12
+ from enum import Enum
13
+
14
+
15
+ class ModelCapability(Enum):
16
+ """Capabilities that an LLM model might support."""
17
+ TEXT_GENERATION = "text_generation"
18
+ JSON_MODE = "json_mode"
19
+ FUNCTION_CALLING = "function_calling"
20
+ STREAMING = "streaming"
21
+ VISION = "vision"
22
+ EMBEDDINGS = "embeddings"
23
+
24
+
25
+ @dataclass
26
+ class LLMConfig:
27
+ """Configuration for LLM provider."""
28
+
29
+ # Connection settings
30
+ base_url: str
31
+ api_key: Optional[str] = None
32
+
33
+ # Model settings
34
+ model: str = "default"
35
+ temperature: float = 0.7
36
+ max_tokens: Optional[int] = None
37
+ top_p: float = 0.9
38
+
39
+ # Behavior settings
40
+ timeout: float = 60.0
41
+ max_retries: int = 3
42
+
43
+ # Additional provider-specific settings
44
+ extra_params: Dict[str, Any] = None
45
+
46
+ def __post_init__(self):
47
+ if self.extra_params is None:
48
+ self.extra_params = {}
49
+
50
+
51
+ @dataclass
52
+ class LLMResponse:
53
+ """Response from LLM completion."""
54
+
55
+ content: str
56
+ model: str
57
+ finish_reason: Optional[str] = None
58
+ usage: Optional[Dict[str, int]] = None # tokens used
59
+ raw_response: Optional[Dict[str, Any]] = None
60
+
61
+ def to_dict(self) -> Dict[str, Any]:
62
+ """Convert to dictionary."""
63
+ return {
64
+ "content": self.content,
65
+ "model": self.model,
66
+ "finish_reason": self.finish_reason,
67
+ "usage": self.usage,
68
+ }
69
+
70
+
71
+ class BaseLLMProvider(ABC):
72
+ """
73
+ Abstract base class for LLM providers.
74
+
75
+ All LLM providers must implement this interface to ensure consistency
76
+ across different backends (Ollama, OpenAI, Anthropic, etc.).
77
+ """
78
+
79
+ def __init__(self, config: LLMConfig):
80
+ """
81
+ Initialize the provider with configuration.
82
+
83
+ Args:
84
+ config: LLM configuration
85
+ """
86
+ self.config = config
87
+ self._client = None
88
+
89
+ @property
90
+ @abstractmethod
91
+ def name(self) -> str:
92
+ """Provider name (e.g., 'ollama', 'openai')."""
93
+ pass
94
+
95
+ @property
96
+ @abstractmethod
97
+ def supported_capabilities(self) -> List[ModelCapability]:
98
+ """List of capabilities supported by this provider."""
99
+ pass
100
+
101
+ @abstractmethod
102
+ async def initialize(self) -> None:
103
+ """
104
+ Initialize the provider (establish connections, etc.).
105
+
106
+ This is called once before first use.
107
+ """
108
+ pass
109
+
110
+ @abstractmethod
111
+ async def cleanup(self) -> None:
112
+ """
113
+ Cleanup resources (close connections, etc.).
114
+
115
+ This is called when the provider is no longer needed.
116
+ """
117
+ pass
118
+
119
+ @abstractmethod
120
+ async def complete(
121
+ self,
122
+ prompt: str,
123
+ system_prompt: Optional[str] = None,
124
+ **kwargs
125
+ ) -> LLMResponse:
126
+ """
127
+ Generate a text completion.
128
+
129
+ Args:
130
+ prompt: The user prompt
131
+ system_prompt: Optional system prompt to set context
132
+ **kwargs: Additional provider-specific parameters
133
+
134
+ Returns:
135
+ LLMResponse with generated text
136
+
137
+ Raises:
138
+ Exception: If completion fails
139
+ """
140
+ pass
141
+
142
+ async def complete_json(
143
+ self,
144
+ prompt: str,
145
+ system_prompt: Optional[str] = None,
146
+ schema: Optional[Dict[str, Any]] = None,
147
+ **kwargs
148
+ ) -> Dict[str, Any]:
149
+ """
150
+ Generate a JSON completion.
151
+
152
+ Builds a JSON-focused system/user prompt, delegates to complete(),
153
+ and parses the result. Subclasses can override to add provider-specific
154
+ behavior (e.g., Ollama's format="json" parameter).
155
+
156
+ Args:
157
+ prompt: The user prompt
158
+ system_prompt: Optional system prompt
159
+ schema: Optional JSON schema to validate against
160
+ **kwargs: Additional parameters
161
+
162
+ Returns:
163
+ Parsed JSON object
164
+
165
+ Raises:
166
+ Exception: If completion or parsing fails
167
+ """
168
+ # Enhance prompt for JSON output
169
+ json_system = "You are a helpful assistant that responds only in valid JSON format."
170
+ if system_prompt:
171
+ json_system = f"{system_prompt}\n\n{json_system}"
172
+
173
+ json_prompt = f"{prompt}\n\nRespond with valid JSON only. Do not include any explanation or markdown formatting."
174
+
175
+ if schema:
176
+ json_prompt += f"\n\nFollow this schema:\n```json\n{json.dumps(schema, indent=2)}\n```"
177
+
178
+ response = await self.complete(json_prompt, system_prompt=json_system, **kwargs)
179
+
180
+ # Parse JSON from response
181
+ return self._parse_json_response(response.content)
182
+
183
+ def _parse_json_response(self, content: str) -> Dict[str, Any]:
184
+ """
185
+ Parse JSON from response content.
186
+
187
+ Handles common issues like markdown code blocks.
188
+ """
189
+ try:
190
+ # Try direct parse first
191
+ return json.loads(content)
192
+ except json.JSONDecodeError:
193
+ pass
194
+
195
+ # Try to extract JSON from markdown code block
196
+ cleaned = content.strip()
197
+ if cleaned.startswith("```json"):
198
+ cleaned = cleaned[7:]
199
+ elif cleaned.startswith("```"):
200
+ cleaned = cleaned[3:]
201
+
202
+ if cleaned.endswith("```"):
203
+ cleaned = cleaned[:-3]
204
+
205
+ cleaned = cleaned.strip()
206
+
207
+ try:
208
+ return json.loads(cleaned)
209
+ except json.JSONDecodeError:
210
+ pass
211
+
212
+ # Try to find JSON object in text
213
+ json_match = re.search(r'\{.*\}', content, re.DOTALL)
214
+ if json_match:
215
+ try:
216
+ return json.loads(json_match.group())
217
+ except json.JSONDecodeError:
218
+ pass
219
+
220
+ # Last resort: try to find JSON array
221
+ json_match = re.search(r'\[.*\]', content, re.DOTALL)
222
+ if json_match:
223
+ try:
224
+ return json.loads(json_match.group())
225
+ except json.JSONDecodeError:
226
+ pass
227
+
228
+ raise ValueError(f"Failed to parse JSON from response: {content[:200]}...")
229
+
230
+ async def complete_streaming(
231
+ self,
232
+ prompt: str,
233
+ system_prompt: Optional[str] = None,
234
+ **kwargs
235
+ ) -> AsyncIterator[str]:
236
+ """
237
+ Generate a streaming text completion.
238
+
239
+ Args:
240
+ prompt: The user prompt
241
+ system_prompt: Optional system prompt
242
+ **kwargs: Additional parameters
243
+
244
+ Yields:
245
+ Text chunks as they are generated
246
+
247
+ Raises:
248
+ NotImplementedError: If streaming is not supported
249
+ """
250
+ raise NotImplementedError(
251
+ f"{self.name} does not support streaming completions"
252
+ )
253
+
254
+ async def get_embeddings(
255
+ self,
256
+ texts: List[str],
257
+ **kwargs
258
+ ) -> List[List[float]]:
259
+ """
260
+ Get embeddings for text inputs.
261
+
262
+ Args:
263
+ texts: List of texts to embed
264
+ **kwargs: Additional parameters
265
+
266
+ Returns:
267
+ List of embedding vectors
268
+
269
+ Raises:
270
+ NotImplementedError: If embeddings are not supported
271
+ """
272
+ raise NotImplementedError(
273
+ f"{self.name} does not support embeddings"
274
+ )
275
+
276
+ def supports_capability(self, capability: ModelCapability) -> bool:
277
+ """
278
+ Check if provider supports a specific capability.
279
+
280
+ Args:
281
+ capability: The capability to check
282
+
283
+ Returns:
284
+ True if supported
285
+ """
286
+ return capability in self.supported_capabilities
287
+
288
+ async def __aenter__(self):
289
+ """Async context manager entry."""
290
+ await self.initialize()
291
+ return self
292
+
293
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
294
+ """Async context manager exit."""
295
+ await self.cleanup()