ebk 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

ebk/config.py CHANGED
@@ -1,35 +1,273 @@
1
+ """
2
+ Configuration management for EBK.
3
+
4
+ Handles loading and saving user configuration from:
5
+ - XDG config directory: ~/.config/ebk/config.json
6
+ - Fallback: ~/.ebk/config.json
7
+ - Legacy: ~/.ebkrc (for backward compatibility)
8
+ """
9
+
1
10
  import configparser
11
+ import json
2
12
  import os
13
+ from pathlib import Path
14
+ from typing import Dict, Any, Optional
15
+ from dataclasses import dataclass, asdict, field
16
+
17
+
18
+ @dataclass
19
+ class LLMConfig:
20
+ """LLM provider configuration."""
21
+ provider: str = "ollama"
22
+ model: str = "llama3.2"
23
+ host: str = "localhost"
24
+ port: int = 11434
25
+ api_key: Optional[str] = None
26
+ temperature: float = 0.7
27
+ max_tokens: Optional[int] = None
28
+
29
+
30
+ @dataclass
31
+ class ServerConfig:
32
+ """Web server configuration."""
33
+ host: str = "0.0.0.0"
34
+ port: int = 8000
35
+ auto_open_browser: bool = False
36
+ page_size: int = 50
37
+
38
+
39
+ @dataclass
40
+ class CLIConfig:
41
+ """CLI default options."""
42
+ verbose: bool = False
43
+ color: bool = True
44
+ page_size: int = 50
45
+
46
+
47
+ @dataclass
48
+ class LibraryConfig:
49
+ """Library-related settings."""
50
+ default_path: Optional[str] = None
51
+
52
+
53
+ @dataclass
54
+ class EBKConfig:
55
+ """Main EBK configuration."""
56
+ llm: LLMConfig = field(default_factory=LLMConfig)
57
+ server: ServerConfig = field(default_factory=ServerConfig)
58
+ cli: CLIConfig = field(default_factory=CLIConfig)
59
+ library: LibraryConfig = field(default_factory=LibraryConfig)
60
+
61
+ def to_dict(self) -> Dict[str, Any]:
62
+ """Convert to dictionary."""
63
+ return {
64
+ "llm": asdict(self.llm),
65
+ "server": asdict(self.server),
66
+ "cli": asdict(self.cli),
67
+ "library": asdict(self.library),
68
+ }
69
+
70
+ @classmethod
71
+ def from_dict(cls, data: Dict[str, Any]) -> 'EBKConfig':
72
+ """Create from dictionary."""
73
+ llm_data = data.get("llm", {})
74
+ server_data = data.get("server", {})
75
+ cli_data = data.get("cli", {})
76
+ library_data = data.get("library", {})
77
+ return cls(
78
+ llm=LLMConfig(**llm_data),
79
+ server=ServerConfig(**server_data),
80
+ cli=CLIConfig(**cli_data),
81
+ library=LibraryConfig(**library_data),
82
+ )
83
+
84
+
85
+ def get_config_path() -> Path:
86
+ """
87
+ Get configuration file path.
88
+
89
+ Follows XDG Base Directory specification:
90
+ 1. $XDG_CONFIG_HOME/ebk/config.json (usually ~/.config/ebk/config.json)
91
+ 2. Fallback: ~/.ebk/config.json
92
+
93
+ Returns:
94
+ Path to config file
95
+ """
96
+ # Try XDG config directory first
97
+ xdg_config_home = Path.home() / ".config"
98
+ if xdg_config_home.exists():
99
+ config_dir = xdg_config_home / "ebk"
100
+ else:
101
+ # Fallback to ~/.ebk
102
+ config_dir = Path.home() / ".ebk"
103
+
104
+ return config_dir / "config.json"
105
+
106
+
107
+ def load_config() -> EBKConfig:
108
+ """
109
+ Load configuration from file.
110
+
111
+ Returns:
112
+ EBKConfig instance with loaded values or defaults
113
+ """
114
+ config_path = get_config_path()
115
+
116
+ if not config_path.exists():
117
+ # Return default config
118
+ return EBKConfig()
119
+
120
+ try:
121
+ with open(config_path, 'r') as f:
122
+ data = json.load(f)
123
+ return EBKConfig.from_dict(data)
124
+ except (json.JSONDecodeError, OSError) as e:
125
+ print(f"Warning: Failed to load config from {config_path}: {e}")
126
+ print("Using default configuration")
127
+ return EBKConfig()
128
+
129
+
130
+ def save_config(config: EBKConfig) -> None:
131
+ """
132
+ Save configuration to file.
133
+
134
+ Args:
135
+ config: Configuration to save
136
+ """
137
+ config_path = get_config_path()
138
+
139
+ # Create directory if it doesn't exist
140
+ config_path.parent.mkdir(parents=True, exist_ok=True)
141
+
142
+ # Write config
143
+ with open(config_path, 'w') as f:
144
+ json.dump(config.to_dict(), f, indent=2)
145
+
146
+ print(f"Configuration saved to {config_path}")
147
+
148
+
149
+ def ensure_config_exists() -> Path:
150
+ """
151
+ Ensure configuration file exists, creating with defaults if not.
152
+
153
+ Returns:
154
+ Path to config file
155
+ """
156
+ config_path = get_config_path()
157
+
158
+ if not config_path.exists():
159
+ config = EBKConfig()
160
+ save_config(config)
161
+ print(f"Created default configuration at {config_path}")
162
+
163
+ return config_path
164
+
165
+
166
+ def update_config(
167
+ # LLM settings
168
+ llm_provider: Optional[str] = None,
169
+ llm_model: Optional[str] = None,
170
+ llm_host: Optional[str] = None,
171
+ llm_port: Optional[int] = None,
172
+ llm_api_key: Optional[str] = None,
173
+ llm_temperature: Optional[float] = None,
174
+ llm_max_tokens: Optional[int] = None,
175
+ # Server settings
176
+ server_host: Optional[str] = None,
177
+ server_port: Optional[int] = None,
178
+ server_auto_open: Optional[bool] = None,
179
+ server_page_size: Optional[int] = None,
180
+ # CLI settings
181
+ cli_verbose: Optional[bool] = None,
182
+ cli_color: Optional[bool] = None,
183
+ cli_page_size: Optional[int] = None,
184
+ # Library settings
185
+ library_default_path: Optional[str] = None,
186
+ ) -> None:
187
+ """
188
+ Update configuration.
3
189
 
190
+ Only updates provided values, leaving others unchanged.
191
+ """
192
+ config = load_config()
193
+
194
+ # Update LLM config
195
+ if llm_provider is not None:
196
+ config.llm.provider = llm_provider
197
+ if llm_model is not None:
198
+ config.llm.model = llm_model
199
+ if llm_host is not None:
200
+ config.llm.host = llm_host
201
+ if llm_port is not None:
202
+ config.llm.port = llm_port
203
+ if llm_api_key is not None:
204
+ config.llm.api_key = llm_api_key
205
+ if llm_temperature is not None:
206
+ config.llm.temperature = llm_temperature
207
+ if llm_max_tokens is not None:
208
+ config.llm.max_tokens = llm_max_tokens
209
+
210
+ # Update server config
211
+ if server_host is not None:
212
+ config.server.host = server_host
213
+ if server_port is not None:
214
+ config.server.port = server_port
215
+ if server_auto_open is not None:
216
+ config.server.auto_open_browser = server_auto_open
217
+ if server_page_size is not None:
218
+ config.server.page_size = server_page_size
219
+
220
+ # Update CLI config
221
+ if cli_verbose is not None:
222
+ config.cli.verbose = cli_verbose
223
+ if cli_color is not None:
224
+ config.cli.color = cli_color
225
+ if cli_page_size is not None:
226
+ config.cli.page_size = cli_page_size
227
+
228
+ # Update library config
229
+ if library_default_path is not None:
230
+ config.library.default_path = library_default_path
231
+
232
+ save_config(config)
233
+
234
+
235
+ # Backward compatibility
236
+ def update_llm_config(
237
+ provider: Optional[str] = None,
238
+ model: Optional[str] = None,
239
+ host: Optional[str] = None,
240
+ port: Optional[int] = None,
241
+ api_key: Optional[str] = None,
242
+ temperature: Optional[float] = None,
243
+ max_tokens: Optional[int] = None
244
+ ) -> None:
245
+ """Update LLM configuration (legacy function)."""
246
+ update_config(
247
+ llm_provider=provider,
248
+ llm_model=model,
249
+ llm_host=host,
250
+ llm_port=port,
251
+ llm_api_key=api_key,
252
+ llm_temperature=temperature,
253
+ llm_max_tokens=max_tokens,
254
+ )
255
+
256
+
257
+ # Legacy support for ~/.ebkrc
4
258
  def load_ebkrc_config():
5
259
  """
6
- Loads configuration from ~/.btkrc.
260
+ Loads configuration from ~/.ebkrc (legacy).
7
261
 
8
- If using LLM interface, expects a section [llm] with at least 'endpoint' and 'api_key'.
9
- If using cloud interface (for generating complex networks), the section [cloud] may be used to specify various parameters.
262
+ The configuration file can contain various sections for different features.
263
+ For example, [streamlit] section for dashboard configuration.
10
264
  """
11
265
  config_path = os.path.expanduser("~/.ebkrc")
12
266
  parser = configparser.ConfigParser()
13
267
 
14
268
  if not os.path.exists(config_path):
15
- raise FileNotFoundError(f"Could not find config file at {config_path}")
269
+ # Config file is optional
270
+ return parser
16
271
 
17
272
  parser.read(config_path)
18
-
19
- if "llm" not in parser:
20
- raise ValueError(
21
- "Config file ~/.btkrc is missing the [llm] section. "
22
- "Please add it with 'endpoint' and 'api_key' keys."
23
- )
24
-
25
- endpoint = parser["llm"].get("endpoint", "")
26
- api_key = parser["llm"].get("api_key", "")
27
- model = parser["llm"].get("model", "gpt-3.5-turbo")
28
-
29
- if not endpoint or not api_key or not model:
30
- raise ValueError(
31
- "Please make sure your [llm] section in ~/.btkrc "
32
- "includes 'endpoint', 'api_key', and 'model' keys."
33
- )
34
-
35
- return endpoint, api_key, model
273
+ return parser
ebk/decorators.py ADDED
@@ -0,0 +1,132 @@
1
+ """Decorators for ebk functionality."""
2
+
3
+ import functools
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Callable, Any
7
+ import typer
8
+ from rich.console import Console
9
+
10
+ logger = logging.getLogger(__name__)
11
+ console = Console()
12
+
13
+
14
+ def handle_library_errors(func: Callable) -> Callable:
15
+ """
16
+ Decorator to handle common library operation errors.
17
+
18
+ Reduces code duplication by centralizing error handling for:
19
+ - FileNotFoundError: Library doesn't exist
20
+ - PermissionError: No access to files
21
+ - ValueError: Invalid data or arguments
22
+ - General exceptions: Unexpected errors
23
+ """
24
+ @functools.wraps(func)
25
+ def wrapper(*args, **kwargs) -> Any:
26
+ try:
27
+ return func(*args, **kwargs)
28
+ except FileNotFoundError as e:
29
+ console.print(f"[bold red]Error:[/bold red] Library or file not found: {e}")
30
+ raise typer.Exit(code=1)
31
+ except PermissionError as e:
32
+ console.print(f"[bold red]Error:[/bold red] Permission denied: {e}")
33
+ console.print("[yellow]Tip: Check file permissions or run with appropriate privileges[/yellow]")
34
+ raise typer.Exit(code=1)
35
+ except ValueError as e:
36
+ console.print(f"[bold red]Error:[/bold red] Invalid input: {e}")
37
+ raise typer.Exit(code=1)
38
+ except KeyboardInterrupt:
39
+ console.print("\n[yellow]Operation cancelled by user[/yellow]")
40
+ raise typer.Exit(code=130)
41
+ except Exception as e:
42
+ logger.error(f"Unexpected error in {func.__name__}: {e}", exc_info=True)
43
+ console.print(f"[bold red]Unexpected error:[/bold red] {e}")
44
+ console.print("[dim]See log file for details[/dim]")
45
+ raise typer.Exit(code=1)
46
+
47
+ return wrapper
48
+
49
+
50
+ def validate_path(path_type: str = "directory") -> Callable:
51
+ """
52
+ Decorator to validate and sanitize file paths for security.
53
+
54
+ Args:
55
+ path_type: Either "directory" or "file"
56
+
57
+ Prevents:
58
+ - Path traversal attacks
59
+ - Access to system directories
60
+ - Symbolic link attacks
61
+ """
62
+ def decorator(func: Callable) -> Callable:
63
+ @functools.wraps(func)
64
+ def wrapper(*args, **kwargs) -> Any:
65
+ # Find path arguments (usually first positional arg)
66
+ if args:
67
+ path = Path(args[0]).resolve()
68
+
69
+ # Security checks
70
+ try:
71
+ # Ensure path is within current directory or explicitly allowed
72
+ cwd = Path.cwd()
73
+ home = Path.home()
74
+
75
+ # Check if path is trying to escape to system directories
76
+ if path.parts[0] in ('/', '\\') and not (
77
+ path.is_relative_to(cwd) or
78
+ path.is_relative_to(home)
79
+ ):
80
+ raise ValueError(f"Access to system path not allowed: {path}")
81
+
82
+ # Check for suspicious patterns
83
+ suspicious_patterns = ['../', '...', '~/', '/etc/', '/usr/', '/bin/', '/sys/']
84
+ path_str = str(path)
85
+ for pattern in suspicious_patterns:
86
+ if pattern in path_str and not path.is_relative_to(home):
87
+ raise ValueError(f"Suspicious path pattern detected: {pattern}")
88
+
89
+ # Validate based on type
90
+ if path_type == "directory":
91
+ if path.exists() and not path.is_dir():
92
+ raise ValueError(f"Path exists but is not a directory: {path}")
93
+ elif path_type == "file":
94
+ if path.exists() and not path.is_file():
95
+ raise ValueError(f"Path exists but is not a file: {path}")
96
+
97
+ # Replace the path with the resolved, safe version
98
+ args = (str(path),) + args[1:]
99
+
100
+ except ValueError as e:
101
+ console.print(f"[bold red]Security Error:[/bold red] {e}")
102
+ raise typer.Exit(code=1)
103
+
104
+ return func(*args, **kwargs)
105
+
106
+ return wrapper
107
+ return decorator
108
+
109
+
110
+ def require_confirmation(message: str = "Are you sure you want to continue?") -> Callable:
111
+ """
112
+ Decorator to require user confirmation for destructive operations.
113
+ """
114
+ def decorator(func: Callable) -> Callable:
115
+ @functools.wraps(func)
116
+ def wrapper(*args, **kwargs) -> Any:
117
+ # Check if --yes flag was passed (common pattern)
118
+ if kwargs.get('yes', False):
119
+ return func(*args, **kwargs)
120
+
121
+ # Ask for confirmation
122
+ console.print(f"[yellow]⚠️ {message}[/yellow]")
123
+ response = typer.confirm("Continue?")
124
+
125
+ if not response:
126
+ console.print("[red]Operation cancelled[/red]")
127
+ raise typer.Exit(code=0)
128
+
129
+ return func(*args, **kwargs)
130
+
131
+ return wrapper
132
+ return decorator
ebk/extract_metadata.py CHANGED
@@ -2,7 +2,7 @@ import os
2
2
  import xmltodict
3
3
  from typing import Dict, Optional
4
4
  from slugify import slugify
5
- import PyPDF2
5
+ import pypdf
6
6
  from ebooklib import epub
7
7
 
8
8
  def extract_metadata_from_opf(opf_file: str) -> Dict:
@@ -32,12 +32,17 @@ def extract_metadata_from_opf(opf_file: str) -> Dict:
32
32
  simplified = {
33
33
  "title": metadata.get("dc:title", metadata.get("title")),
34
34
  "creators": None,
35
+ "contributors": None,
35
36
  "subjects": None,
36
37
  "description": metadata.get("dc:description", metadata.get("description")),
37
38
  "language": metadata.get("dc:language", metadata.get("language")),
38
39
  "date": metadata.get("dc:date", metadata.get("date")),
39
40
  "publisher": metadata.get("dc:publisher", metadata.get("publisher")),
40
- "identifiers": None
41
+ "identifiers": None,
42
+ "rights": metadata.get("dc:rights", metadata.get("rights")),
43
+ "source": metadata.get("dc:source", metadata.get("source")),
44
+ "series": None,
45
+ "series_index": None
41
46
  }
42
47
 
43
48
  # -- Creators
@@ -75,12 +80,64 @@ def extract_metadata_from_opf(opf_file: str) -> Dict:
75
80
  text = identifiers.get("#text", "").strip()
76
81
  simplified["identifiers"][scheme] = text
77
82
 
83
+ # -- Contributors (editors, translators, etc)
84
+ contributors_raw = metadata.get("dc:contributor", metadata.get("contributor"))
85
+ if contributors_raw:
86
+ simplified["contributors"] = []
87
+ if isinstance(contributors_raw, list):
88
+ for contrib in contributors_raw:
89
+ if isinstance(contrib, dict):
90
+ name = contrib.get("#text", "").strip()
91
+ role = contrib.get("@opf:role", "contributor")
92
+ file_as = contrib.get("@opf:file-as", "")
93
+ if name:
94
+ simplified["contributors"].append({
95
+ "name": name,
96
+ "role": role,
97
+ "file_as": file_as
98
+ })
99
+ elif isinstance(contrib, str):
100
+ simplified["contributors"].append({
101
+ "name": contrib.strip(),
102
+ "role": "contributor",
103
+ "file_as": ""
104
+ })
105
+ elif isinstance(contributors_raw, dict):
106
+ name = contributors_raw.get("#text", "").strip()
107
+ role = contributors_raw.get("@opf:role", "contributor")
108
+ file_as = contributors_raw.get("@opf:file-as", "")
109
+ if name:
110
+ simplified["contributors"] = [{
111
+ "name": name,
112
+ "role": role,
113
+ "file_as": file_as
114
+ }]
115
+
116
+ # -- Calibre-specific metadata (series, etc)
117
+ # Look for meta tags with name attributes
118
+ meta_tags = metadata.get("meta", [])
119
+ if not isinstance(meta_tags, list):
120
+ meta_tags = [meta_tags] if meta_tags else []
121
+
122
+ for meta in meta_tags:
123
+ if isinstance(meta, dict):
124
+ meta_name = meta.get("@name", "")
125
+ meta_content = meta.get("@content", "")
126
+
127
+ if meta_name == "calibre:series" and meta_content:
128
+ simplified["series"] = meta_content
129
+ elif meta_name == "calibre:series_index" and meta_content:
130
+ try:
131
+ simplified["series_index"] = float(meta_content)
132
+ except (ValueError, TypeError):
133
+ pass
134
+
78
135
  return simplified
79
136
 
80
137
 
81
138
  def extract_metadata_from_pdf(pdf_path: str) -> Dict:
82
139
  """
83
- Extract metadata from a PDF file using PyPDF2.
140
+ Extract metadata from a PDF file using pypdf.
84
141
  Returns a dictionary with the same keys as the OPF-based dict.
85
142
  """
86
143
 
@@ -94,20 +151,23 @@ def extract_metadata_from_pdf(pdf_path: str) -> Dict:
94
151
  "publisher": None,
95
152
  "identifiers": None,
96
153
  "keywords": None,
154
+ "creator_application": None,
97
155
  }
98
156
 
99
157
  try:
100
158
  with open(pdf_path, "rb") as f:
101
- reader = PyPDF2.PdfReader(f)
159
+ reader = pypdf.PdfReader(f)
102
160
  info = reader.metadata or {}
103
161
 
104
- # NOTE: Depending on PyPDF2 version, metadata keys can differ
162
+ # NOTE: Depending on pypdf version, metadata keys can differ
105
163
  # e.g. info.title vs info.get('/Title')
106
164
  pdf_title = info.get("/Title", None) or info.get("title", None)
107
165
  pdf_author = info.get("/Author", None) or info.get("author", None)
108
166
  pdf_subject = info.get("/Subject", None) or info.get("subject", None)
109
167
  pdf_keywords = info.get("/Keywords", None) or info.get("keywords", None)
110
- pdf_publisher = info.get("/Producer", None) or info.get("producer", None) or info.get("/Publisher", None) or info.get("publisher", None)
168
+ pdf_creator = info.get("/Creator", None) or info.get("creator", None) # Application used
169
+ pdf_producer = info.get("/Producer", None) or info.get("producer", None)
170
+ pdf_publisher = info.get("/Publisher", None) or info.get("publisher", None)
111
171
  pdf_creation_date = info.get("/CreationDate", None)
112
172
 
113
173
  if pdf_title:
@@ -130,10 +190,18 @@ def extract_metadata_from_pdf(pdf_path: str) -> Dict:
130
190
  metadata["identifiers"] = {"pdf:identifier": pdf_path}
131
191
 
132
192
  if pdf_keywords:
133
- metadata["keywords"] = [kw.strip() for kw in pdf_keywords.split(",")]
193
+ metadata["keywords"] = [kw.strip() for kw in pdf_keywords.split(",") if kw.strip()]
194
+
195
+ # Creator is the application that created the PDF (e.g., LaTeX, Word)
196
+ if pdf_creator:
197
+ metadata["creator_application"] = pdf_creator.strip()
134
198
 
199
+ # Publisher: prefer explicit Publisher field, fallback to Producer
135
200
  if pdf_publisher:
136
201
  metadata["publisher"] = pdf_publisher.strip()
202
+ elif pdf_producer and not pdf_creator:
203
+ # Only use producer as publisher if there's no creator app
204
+ metadata["publisher"] = pdf_producer.strip()
137
205
 
138
206
  metadata["file_paths"] = [pdf_path]
139
207
 
@@ -259,6 +327,7 @@ def extract_metadata(ebook_file: str, opf_file: Optional[str] = None) -> Dict:
259
327
  if opf_file and os.path.isfile(opf_file):
260
328
  opf_metadata = extract_metadata_from_opf(opf_file)
261
329
 
330
+ ebook_metadata = {}
262
331
  _, ext = os.path.splitext(ebook_file.lower())
263
332
  if ext == ".pdf":
264
333
  ebook_metadata = extract_metadata_from_pdf(ebook_file)