ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ebk might be problematic. Click here for more details.
- ebk/__init__.py +35 -0
- ebk/ai/__init__.py +23 -0
- ebk/ai/knowledge_graph.py +443 -0
- ebk/ai/llm_providers/__init__.py +21 -0
- ebk/ai/llm_providers/base.py +230 -0
- ebk/ai/llm_providers/ollama.py +362 -0
- ebk/ai/metadata_enrichment.py +396 -0
- ebk/ai/question_generator.py +328 -0
- ebk/ai/reading_companion.py +224 -0
- ebk/ai/semantic_search.py +434 -0
- ebk/ai/text_extractor.py +394 -0
- ebk/cli.py +2828 -680
- ebk/config.py +260 -22
- ebk/db/__init__.py +37 -0
- ebk/db/migrations.py +180 -0
- ebk/db/models.py +526 -0
- ebk/db/session.py +144 -0
- ebk/decorators.py +132 -0
- ebk/exports/base_exporter.py +218 -0
- ebk/exports/html_library.py +1390 -0
- ebk/exports/html_utils.py +117 -0
- ebk/exports/hugo.py +7 -3
- ebk/exports/jinja_export.py +287 -0
- ebk/exports/multi_facet_export.py +164 -0
- ebk/exports/symlink_dag.py +479 -0
- ebk/extract_metadata.py +76 -7
- ebk/library_db.py +899 -0
- ebk/plugins/__init__.py +42 -0
- ebk/plugins/base.py +502 -0
- ebk/plugins/hooks.py +444 -0
- ebk/plugins/registry.py +500 -0
- ebk/repl/__init__.py +9 -0
- ebk/repl/find.py +126 -0
- ebk/repl/grep.py +174 -0
- ebk/repl/shell.py +1677 -0
- ebk/repl/text_utils.py +320 -0
- ebk/search_parser.py +413 -0
- ebk/server.py +1633 -0
- ebk/services/__init__.py +11 -0
- ebk/services/import_service.py +442 -0
- ebk/services/tag_service.py +282 -0
- ebk/services/text_extraction.py +317 -0
- ebk/similarity/__init__.py +77 -0
- ebk/similarity/base.py +154 -0
- ebk/similarity/core.py +445 -0
- ebk/similarity/extractors.py +168 -0
- ebk/similarity/metrics.py +376 -0
- ebk/vfs/__init__.py +101 -0
- ebk/vfs/base.py +301 -0
- ebk/vfs/library_vfs.py +124 -0
- ebk/vfs/nodes/__init__.py +54 -0
- ebk/vfs/nodes/authors.py +196 -0
- ebk/vfs/nodes/books.py +480 -0
- ebk/vfs/nodes/files.py +155 -0
- ebk/vfs/nodes/metadata.py +385 -0
- ebk/vfs/nodes/root.py +100 -0
- ebk/vfs/nodes/similar.py +165 -0
- ebk/vfs/nodes/subjects.py +184 -0
- ebk/vfs/nodes/tags.py +371 -0
- ebk/vfs/resolver.py +228 -0
- ebk-0.3.2.dist-info/METADATA +755 -0
- ebk-0.3.2.dist-info/RECORD +69 -0
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
- ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
- ebk/imports/__init__.py +0 -0
- ebk/imports/calibre.py +0 -144
- ebk/imports/ebooks.py +0 -116
- ebk/llm.py +0 -58
- ebk/manager.py +0 -44
- ebk/merge.py +0 -308
- ebk/streamlit/__init__.py +0 -0
- ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
- ebk/streamlit/app.py +0 -185
- ebk/streamlit/display.py +0 -168
- ebk/streamlit/filters.py +0 -151
- ebk/streamlit/utils.py +0 -58
- ebk/utils.py +0 -311
- ebk-0.1.0.dist-info/METADATA +0 -457
- ebk-0.1.0.dist-info/RECORD +0 -29
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
ebk/__init__.py,sha256=KK5aFY07PvPB8LjfNJ_xAuWft6uyIMsiK6vrvhyyg14,790
|
|
2
|
+
ebk/cli.py,sha256=oAG25LPGJH0QZPQJVGEpiHCcgdaPw4sn5mN0knhRX_M,112835
|
|
3
|
+
ebk/config.py,sha256=P090sNH2YSnNNteXjim2_WYN9j2BC_GU5o0wQIwSbT4,7364
|
|
4
|
+
ebk/decorators.py,sha256=MpAD1Wwy3o4l0sEiq4EIy5YtKMKf_nXtu7jpV64Uvyc,5179
|
|
5
|
+
ebk/extract_metadata.py,sha256=epMn9_4zQYiytBwj25_-rYl0QmPN4cP25DuxMnijNMU,12228
|
|
6
|
+
ebk/ident.py,sha256=yRIKVA0rpyhdPCxl0Vx7iu3YOebE8Yvbv32iCJmKk-Y,3324
|
|
7
|
+
ebk/library_db.py,sha256=BEwqvoR8Ayec53ptmVcRsL2oTTlHj1_w0WP_j1ONfSA,29535
|
|
8
|
+
ebk/search_parser.py,sha256=NRVbGnaOWxbc1WtsoFVINjHqCMBEubrKBxY2b_HQqJY,14848
|
|
9
|
+
ebk/server.py,sha256=x5vpQmZjGAwJVgmUq8l8Z1owfoqg2weuUVBovEWVE-s,59036
|
|
10
|
+
ebk/ai/__init__.py,sha256=-0vfDHdht8ZasYvl33nyGoWCaZxKIn_en0PxZdRQMkw,686
|
|
11
|
+
ebk/ai/knowledge_graph.py,sha256=Xp4Ao1fVwp6oKyBG1hDroAqPTADyyUGG86WczvLRDzg,16504
|
|
12
|
+
ebk/ai/metadata_enrichment.py,sha256=jQrqwjTzZkxzkJdxdwQCN1Cchkwa6jibHd8r5hjYosw,12364
|
|
13
|
+
ebk/ai/question_generator.py,sha256=ev1wEmgIDxnb_kaZdiLh0Z1TW7HUjb_HR1NlsBmdduQ,11363
|
|
14
|
+
ebk/ai/reading_companion.py,sha256=8GHiA9tR-vb28eCS3FmpDmRMX2OMyg5IP8AhPZ7c-Vg,7948
|
|
15
|
+
ebk/ai/semantic_search.py,sha256=i0qY6cEr_89RWxTdJpnMoHu-l4csYIdDi6nh17asyfQ,15218
|
|
16
|
+
ebk/ai/text_extractor.py,sha256=SOi7VgiAupZTfh4GG8XDB5N2AMeEP727o_eVlTdlhiQ,13918
|
|
17
|
+
ebk/ai/llm_providers/__init__.py,sha256=dqXtt2zh2iAHfpBLRQWpyDIOrJ8IggcxwvD0isHplcI,438
|
|
18
|
+
ebk/ai/llm_providers/base.py,sha256=am-TO4LTYLNeyMop9qy4TQdS7LJ2VTNvqy4iri1YvNk,5694
|
|
19
|
+
ebk/ai/llm_providers/ollama.py,sha256=MwXhJSsAc7cYfKyrmiXvk25QzIpvfNOZcugs9TNZta8,10132
|
|
20
|
+
ebk/db/__init__.py,sha256=CN1R77Ut5ai8kGSh1ohprGaS5qvOUMdtMT-y--5DIpw,792
|
|
21
|
+
ebk/db/migrations.py,sha256=H0nAl8I-hUKctV7sPj14iockSL9IL0qHihy3krxZPsc,5513
|
|
22
|
+
ebk/db/models.py,sha256=PGcq9knATE06_MStiCpg7zNvO2nDhU592xDt0vhNCxg,19967
|
|
23
|
+
ebk/db/session.py,sha256=3Oc7Xo1gb7B430JtKRtF4JFyFi8hdIKz1GQq-Bjjz_o,3549
|
|
24
|
+
ebk/exports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
ebk/exports/base_exporter.py,sha256=OQFmuyP9vxB5SVErTacOGonjD2Kyc1j1_1G9VcpdlDY,6783
|
|
26
|
+
ebk/exports/html_library.py,sha256=L8b__nU_c_4nZYym-W_M90Eb-GRVZfb3OqLm0nE-0Lk,54107
|
|
27
|
+
ebk/exports/html_utils.py,sha256=ft4PLHBKjx6OFD3bxCEI9tpaEUEqWzhLNqwKc196ulo,3899
|
|
28
|
+
ebk/exports/hugo.py,sha256=0rKNCegtm_Xd2g7IU77DmzlNiHZkoHTd2Wyr_y2gLRk,2209
|
|
29
|
+
ebk/exports/jinja_export.py,sha256=wWy_eQIp16yuZ0i-Ssj8PaGHSNiSdjOwjt5OpKuv-q0,10996
|
|
30
|
+
ebk/exports/multi_facet_export.py,sha256=xgy5Vjkow49ESVUXcjmLcTbMa-nSyaAlYW8HH-SeB4U,6364
|
|
31
|
+
ebk/exports/symlink_dag.py,sha256=ChTSp9qA5Ibui4eIFfbWNDeYBfYh--RtOI4RvP54-Vs,20499
|
|
32
|
+
ebk/exports/zip.py,sha256=erteF1NcymUbka5m_pnXWaRZmkUihALXe4A_cYg8hXs,779
|
|
33
|
+
ebk/plugins/__init__.py,sha256=-YhhLSqVI6lZSCoW-8fJfib7qL12AO3h4IGOFGcs_qU,765
|
|
34
|
+
ebk/plugins/base.py,sha256=x523_EFtHiFgBBTJqzgArIF8D8KtmD20jJUTqew29eM,14121
|
|
35
|
+
ebk/plugins/hooks.py,sha256=ym7fkmqqirc17Q5GYtOO6ahfyYMJHUX1QwRUkwaZs20,14442
|
|
36
|
+
ebk/plugins/registry.py,sha256=xCmKMc1ioSpXM8nBPcep5RxUy-BZQMKYCInDp2NqzWY,15497
|
|
37
|
+
ebk/repl/__init__.py,sha256=zSAp76GfAEtxYsXe8W2m-PtDFeHX668y6jnpzz2yeWA,252
|
|
38
|
+
ebk/repl/find.py,sha256=Yl-4qv_uKl8dJefHzCyNpjJxngtB_fxj4MqXbMGEkKc,3799
|
|
39
|
+
ebk/repl/grep.py,sha256=VIFdcMrFqSHBB2rmLllf9PDW4UjBelf60znUPfBh-AA,4802
|
|
40
|
+
ebk/repl/shell.py,sha256=emzjK1aXiGnYdEAwUybntnaXS62tQhtQZkDeQ3sDIGg,60896
|
|
41
|
+
ebk/repl/text_utils.py,sha256=nSCE-qPL3xHyXjt_5ltjYnOCBXpyPqJiyaKztrfAKuQ,8382
|
|
42
|
+
ebk/services/__init__.py,sha256=j6Gqdqp7MRRWIuSjJH_3TxL7JA5nID4kcNGbWXKXcDs,199
|
|
43
|
+
ebk/services/import_service.py,sha256=WThx2sEuooFQwgFmjyuOZPlIo8FQ-08g-CzVr5r6QaQ,16478
|
|
44
|
+
ebk/services/tag_service.py,sha256=KhuYqSUalOmQc1ikfrfEZ1__wf9f-VVEoWPpFSSGMi0,7982
|
|
45
|
+
ebk/services/text_extraction.py,sha256=jUPBolw45w5cKuL0m92K1i2Qo0FrqCwoX8p7GuBxvzU,10525
|
|
46
|
+
ebk/similarity/__init__.py,sha256=ySprbU_qfMuOEFyUqnBHe-4HSyvGyZHjRUZ4y6Z20-A,1942
|
|
47
|
+
ebk/similarity/base.py,sha256=uWY1whl1jdjvntnuOaB9Mal3f3yiLt7HqjHSBZyM324,4473
|
|
48
|
+
ebk/similarity/core.py,sha256=ohXY61L3xUOjcbtIkfqLwRKiiEST_oTRt8TFLdXIReQ,13340
|
|
49
|
+
ebk/similarity/extractors.py,sha256=UqxnIWM2sBdY_d6mse_NYTuOxdT2f7rtPDfe9b8QR0k,4220
|
|
50
|
+
ebk/similarity/metrics.py,sha256=2rbX10xXauebBSH7fKN9fCSMgSOuxRHKwUyJhd_hFfU,10419
|
|
51
|
+
ebk/vfs/__init__.py,sha256=5d2jCVRNboJlD576IrFWKIUeOVVVv3zVdyMbSMZ0zEY,3129
|
|
52
|
+
ebk/vfs/base.py,sha256=NkOBkefa6I4O1_GY0SmBFHMHhQbt7edyNAR8ncZeVWM,8231
|
|
53
|
+
ebk/vfs/library_vfs.py,sha256=Lkrn1Wkx2f5qnegnmLGUKiHUfOOUoqnQOADq80xaJNg,3353
|
|
54
|
+
ebk/vfs/resolver.py,sha256=D_pvOzmeErl8oMxw73g4X4nKBzxc_OgoAmHDDP_fJA4,6228
|
|
55
|
+
ebk/vfs/nodes/__init__.py,sha256=W6MCMNUh8129Uw2xtNoWi00rrMHNreTXb26KS7IrXrA,1360
|
|
56
|
+
ebk/vfs/nodes/authors.py,sha256=Dqr8Leb2NM3W_qKwvEz8r3-C3uNFQVjNGxf56-qhm9c,5731
|
|
57
|
+
ebk/vfs/nodes/books.py,sha256=5YzFXFKrK1aUCzD7uAYPmuGCTVh_j4vj9VNfVFywo9o,15461
|
|
58
|
+
ebk/vfs/nodes/files.py,sha256=49kCsPFe5zBstcUcC44E-KEUHkoHTKT6AzQOXhirnDA,4673
|
|
59
|
+
ebk/vfs/nodes/metadata.py,sha256=wo2TDV2JshuWAzdjGEVOWM5-9S3XwqoRoqRIgSX3o1g,11392
|
|
60
|
+
ebk/vfs/nodes/root.py,sha256=4WkU3GnNcnnqg6dpkjUXI0pnWX4oAR6UJDNpEqIexLs,3030
|
|
61
|
+
ebk/vfs/nodes/similar.py,sha256=LJeFZigiu7dDyduGi9SJrxNJNP_qix661o8pyGsARFs,4866
|
|
62
|
+
ebk/vfs/nodes/subjects.py,sha256=7U9Raj0b2PXb6LfOo3zlHAqTD0lW3IQk7m5u0nU8WPU,5327
|
|
63
|
+
ebk/vfs/nodes/tags.py,sha256=uFJ5lWQYXD0dDmRP6v9NpfQDieTZJfGtSgaz2zzHVZo,11119
|
|
64
|
+
ebk-0.3.2.dist-info/licenses/LICENSE,sha256=1eh_aOAZz71hpva42M9f8Vqj_FtSUqzE-1EimifmM_8,1068
|
|
65
|
+
ebk-0.3.2.dist-info/METADATA,sha256=P7E6L37hRP087ibZqpd4EV4szzlIHLh_cgAGxkYHOk4,21712
|
|
66
|
+
ebk-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
67
|
+
ebk-0.3.2.dist-info/entry_points.txt,sha256=M24WNtCeBq-nmIDPU-3i0DtM9VP3bM_ul2nQIC1r_RA,36
|
|
68
|
+
ebk-0.3.2.dist-info/top_level.txt,sha256=OLATFvDsJQh-6TqJCili349OH47DoH-6dMih88BWcyg,4
|
|
69
|
+
ebk-0.3.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Alex Towell
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
ebk/imports/__init__.py
DELETED
|
File without changes
|
ebk/imports/calibre.py
DELETED
|
@@ -1,144 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import shutil
|
|
3
|
-
import json
|
|
4
|
-
from slugify import slugify
|
|
5
|
-
from typing import Dict
|
|
6
|
-
import logging
|
|
7
|
-
from ..extract_metadata import extract_metadata
|
|
8
|
-
from ..ident import add_unique_id
|
|
9
|
-
from ..utils import get_unique_filename
|
|
10
|
-
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
12
|
-
|
|
13
|
-
ebook_exts = (".pdf", ".epub", ".mobi", ".azw3", ".txt", ".docx", ".odt",
|
|
14
|
-
".html", ".rtf", ".md", ".fb2", ".cbz", ".cbr", ".djvu",
|
|
15
|
-
".xps", ".ibooks", ".azw", ".lit", ".pdb", ".prc", ".lrf",
|
|
16
|
-
".pdb", ".pml", ".rb", ".snb", ".tcr", ".txtz", ".azw1")
|
|
17
|
-
|
|
18
|
-
def import_calibre(calibre_dir: str,
|
|
19
|
-
output_dir: str,
|
|
20
|
-
ebook_exts: tuple = ebook_exts):
|
|
21
|
-
if not os.path.exists(output_dir):
|
|
22
|
-
os.makedirs(output_dir)
|
|
23
|
-
|
|
24
|
-
metadata_list = []
|
|
25
|
-
|
|
26
|
-
for root, _, files in os.walk(calibre_dir):
|
|
27
|
-
# Look for OPF
|
|
28
|
-
opf_file_path = os.path.join(root, "metadata.opf")
|
|
29
|
-
|
|
30
|
-
# Gather valid ebook files
|
|
31
|
-
ebook_files = [f for f in files if f.lower().endswith(ebook_exts)]
|
|
32
|
-
|
|
33
|
-
if not ebook_files:
|
|
34
|
-
logger.debug(f"No recognized ebook files found in {root}. Skipping.")
|
|
35
|
-
continue # skip if no recognized ebook files
|
|
36
|
-
|
|
37
|
-
# Pick the "primary" ebook file. This is arbitrary and can be changed.
|
|
38
|
-
primary_ebook_file = ebook_files[0]
|
|
39
|
-
ebook_full_path = os.path.join(root, primary_ebook_file)
|
|
40
|
-
|
|
41
|
-
# Extract metadata
|
|
42
|
-
if os.path.exists(opf_file_path):
|
|
43
|
-
logger.debug(f"Found metadata.opf in {root}. Extracting metadata from OPF.")
|
|
44
|
-
metadata = extract_metadata(ebook_full_path, opf_file_path)
|
|
45
|
-
else:
|
|
46
|
-
logger.warning(f"No metadata.opf found in {root}. Inferring metadata from ebook files.")
|
|
47
|
-
metadata = extract_metadata(ebook_full_path) # Only ebook file path is provided
|
|
48
|
-
|
|
49
|
-
# Extract metadata (OPF + ebook)
|
|
50
|
-
metadata = extract_metadata(ebook_full_path, opf_file_path)
|
|
51
|
-
metadata["root"] = root
|
|
52
|
-
metadata["source_folder"] = calibre_dir
|
|
53
|
-
metadata["output_folder"] = output_dir
|
|
54
|
-
metadata["imported_from"] = "calibre"
|
|
55
|
-
metadata["virtual_libs"] = [slugify(output_dir)]
|
|
56
|
-
|
|
57
|
-
# Generate base name
|
|
58
|
-
title_slug = slugify(metadata.get("title", "unknown_title"))
|
|
59
|
-
creator_slug = slugify(
|
|
60
|
-
metadata["creators"][0]) if metadata.get("creators") else "unknown_creator"
|
|
61
|
-
|
|
62
|
-
base_name = f"{title_slug}__{creator_slug}"
|
|
63
|
-
|
|
64
|
-
# Copy ebooks
|
|
65
|
-
file_paths = []
|
|
66
|
-
for ebook_file in ebook_files:
|
|
67
|
-
_, ext = os.path.splitext(ebook_file)
|
|
68
|
-
src = os.path.join(root, ebook_file)
|
|
69
|
-
dst = os.path.join(output_dir, f"{base_name}{ext}")
|
|
70
|
-
dst = get_unique_filename(dst)
|
|
71
|
-
shutil.copy(src, dst)
|
|
72
|
-
file_paths.append(os.path.relpath(dst, output_dir))
|
|
73
|
-
|
|
74
|
-
# Optionally handle cover.jpg
|
|
75
|
-
if "cover.jpg" in files:
|
|
76
|
-
cover_src = os.path.join(root, "cover.jpg")
|
|
77
|
-
cover_dst = os.path.join(output_dir, f"{base_name}_cover.jpg")
|
|
78
|
-
shutil.copy(cover_src, cover_dst)
|
|
79
|
-
metadata["cover_path"] = os.path.relpath(cover_dst, output_dir)
|
|
80
|
-
|
|
81
|
-
# Store relative paths in metadata
|
|
82
|
-
metadata["file_paths"] = file_paths
|
|
83
|
-
metadata_list.append(metadata)
|
|
84
|
-
|
|
85
|
-
for entry in metadata_list:
|
|
86
|
-
add_unique_id(entry)
|
|
87
|
-
|
|
88
|
-
# Write out metadata.json
|
|
89
|
-
output_json = os.path.join(output_dir, "metadata.json")
|
|
90
|
-
with open(output_json, "w", encoding="utf-8") as f:
|
|
91
|
-
json.dump(metadata_list, f, indent=2, ensure_ascii=False)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def ensure_metadata_completeness(metadata: Dict) -> Dict:
|
|
95
|
-
"""
|
|
96
|
-
Ensure that all required metadata fields are present.
|
|
97
|
-
If a field is missing or empty, attempt to infer or set default values.
|
|
98
|
-
|
|
99
|
-
Args:
|
|
100
|
-
metadata (Dict): The metadata dictionary extracted from OPF or inferred.
|
|
101
|
-
|
|
102
|
-
Returns:
|
|
103
|
-
Dict: The updated metadata dictionary with all necessary fields.
|
|
104
|
-
"""
|
|
105
|
-
required_fields = ["title", "creators",
|
|
106
|
-
"subjects", "description",
|
|
107
|
-
"language", "date", "identifiers",
|
|
108
|
-
"file_paths", "cover_path", "unique_id",
|
|
109
|
-
"source_folder", "output_folder",
|
|
110
|
-
"imported_from", "virtual_libs"]
|
|
111
|
-
for field in required_fields:
|
|
112
|
-
if field not in metadata:
|
|
113
|
-
if field == "creators":
|
|
114
|
-
metadata[field] = ["Unknown Author"]
|
|
115
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
116
|
-
elif field == "subjects":
|
|
117
|
-
metadata[field] = []
|
|
118
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
119
|
-
elif field == "description":
|
|
120
|
-
metadata[field] = "No description available."
|
|
121
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
122
|
-
elif field == "language":
|
|
123
|
-
metadata[field] = "en" # Default to English
|
|
124
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
125
|
-
elif field == "date":
|
|
126
|
-
metadata[field] = None # Unknown date
|
|
127
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
128
|
-
elif field == "title":
|
|
129
|
-
metadata[field] = "Unknown Title"
|
|
130
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
131
|
-
elif field == "identifiers":
|
|
132
|
-
metadata[field] = {}
|
|
133
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
134
|
-
elif field == "file_paths":
|
|
135
|
-
metadata[field] = []
|
|
136
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
137
|
-
elif field == "cover_path":
|
|
138
|
-
metadata[field] = None
|
|
139
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
140
|
-
elif field == "unique_id":
|
|
141
|
-
metadata[field] = None
|
|
142
|
-
logger.debug(f"Set default value for '{field}'.")
|
|
143
|
-
|
|
144
|
-
return metadata
|
ebk/imports/ebooks.py
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import json
|
|
3
|
-
import shutil
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
import fitz
|
|
8
|
-
from PIL import Image
|
|
9
|
-
from io import BytesIO
|
|
10
|
-
|
|
11
|
-
from rich.console import Console
|
|
12
|
-
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from slugify import slugify
|
|
15
|
-
from ..extract_metadata import extract_metadata_from_pdf
|
|
16
|
-
from ..ident import add_unique_id
|
|
17
|
-
from ..utils import get_unique_filename
|
|
18
|
-
|
|
19
|
-
import logging
|
|
20
|
-
|
|
21
|
-
def import_ebooks(ebooks_dir, output_dir, output_formats):
|
|
22
|
-
"""
|
|
23
|
-
Import ebooks from a directory into the library.
|
|
24
|
-
|
|
25
|
-
Args:
|
|
26
|
-
ebooks_dir (str): Path to the directory containing the ebooks
|
|
27
|
-
output_dir (str): Path to the output directory
|
|
28
|
-
output_formats (list): List of output formats to convert the ebooks to
|
|
29
|
-
"""
|
|
30
|
-
|
|
31
|
-
logger = logging.getLogger(__name__)
|
|
32
|
-
|
|
33
|
-
if os.path.exists(output_dir):
|
|
34
|
-
logger.error(f"Output directory already exists: {output_dir}")
|
|
35
|
-
return
|
|
36
|
-
os.makedirs(output_dir)
|
|
37
|
-
|
|
38
|
-
metadata_list = []
|
|
39
|
-
for root, _, files in os.walk(ebooks_dir):
|
|
40
|
-
for file in files:
|
|
41
|
-
try:
|
|
42
|
-
# create the dictionary item for file
|
|
43
|
-
item = {
|
|
44
|
-
"title": file
|
|
45
|
-
}
|
|
46
|
-
path = Path(root) / Path(file)
|
|
47
|
-
|
|
48
|
-
# infer the format of the file
|
|
49
|
-
_, ext = os.path.splitext(file)
|
|
50
|
-
ext = ext.lower().strip(".")
|
|
51
|
-
if ext not in output_formats:
|
|
52
|
-
continue
|
|
53
|
-
|
|
54
|
-
cover_image = None
|
|
55
|
-
if ext == "pdf":
|
|
56
|
-
metadata = extract_metadata_from_pdf(path)
|
|
57
|
-
cover_image = extract_cover_from_pdf(path)
|
|
58
|
-
else:
|
|
59
|
-
continue
|
|
60
|
-
|
|
61
|
-
logger.debug(f"Importing ebook {file} in {root}")
|
|
62
|
-
metadata = {key: item.get(key) or metadata.get(key) or value for key, value in metadata.items()}
|
|
63
|
-
|
|
64
|
-
item["root"] = root
|
|
65
|
-
item["source_folder"] = ebooks_dir
|
|
66
|
-
item["output_folder"] = output_dir
|
|
67
|
-
item["imported_from"] = "ebooks"
|
|
68
|
-
item["virtual_libs"] = [slugify(output_dir)]
|
|
69
|
-
|
|
70
|
-
title_slug = slugify(item.get("title", "unknown_title"))
|
|
71
|
-
creator_slug = slugify(item.get("creators", ["unknown_creator"])[0])
|
|
72
|
-
base_name = f"{title_slug}__{creator_slug}"
|
|
73
|
-
|
|
74
|
-
_, ext = os.path.splitext(file)
|
|
75
|
-
src = os.path.join(root, file)
|
|
76
|
-
dst = os.path.join(output_dir, f"{base_name}{ext}")
|
|
77
|
-
dst = get_unique_filename(dst)
|
|
78
|
-
shutil.copy(src, dst)
|
|
79
|
-
file_paths = [ os.path.relpath(dst, output_dir) ]
|
|
80
|
-
item["file_paths"] = file_paths
|
|
81
|
-
|
|
82
|
-
if cover_image:
|
|
83
|
-
cover_image_file = os.path.join(output_dir, f"{base_name}_cover.jpg")
|
|
84
|
-
with open(cover_image_file, "wb") as cover:
|
|
85
|
-
cover.write(cover_image)
|
|
86
|
-
|
|
87
|
-
item["cover_path"] = os.path.relpath(cover_image_file, output_dir)
|
|
88
|
-
metadata_list.append(item)
|
|
89
|
-
|
|
90
|
-
except Exception as e:
|
|
91
|
-
logger.error(f"Error processing file {file} in {root}: {e}")
|
|
92
|
-
|
|
93
|
-
for entry in metadata_list:
|
|
94
|
-
add_unique_id(entry)
|
|
95
|
-
|
|
96
|
-
metadata_file = os.path.join(output_dir, "metadata.json")
|
|
97
|
-
with open(metadata_file, "w") as f:
|
|
98
|
-
json.dump(metadata_list, f, indent=2)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def extract_cover_from_pdf(pdf_path):
|
|
102
|
-
# Open the PDF file
|
|
103
|
-
pdf_document = fitz.open(pdf_path)
|
|
104
|
-
first_page = pdf_document[0]
|
|
105
|
-
|
|
106
|
-
# Render the first page as a PNG image
|
|
107
|
-
pix = first_page.get_pixmap()
|
|
108
|
-
image = Image.open(BytesIO(pix.tobytes(output="png")))
|
|
109
|
-
|
|
110
|
-
# Create a thumbnail
|
|
111
|
-
image.thumbnail((256, 256))
|
|
112
|
-
|
|
113
|
-
# Convert the image to JPEG bytes
|
|
114
|
-
image_bytes = BytesIO()
|
|
115
|
-
image.save(image_bytes, format="JPEG")
|
|
116
|
-
return image_bytes.getvalue()
|
ebk/llm.py
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import requests
|
|
3
|
-
from string import Template
|
|
4
|
-
from .config import load_ebkrc_config
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def query_llm(lib_dir, prompt):
|
|
8
|
-
"""
|
|
9
|
-
Queries an OpenAI-compatible LLM endpoint with the given prompt.
|
|
10
|
-
|
|
11
|
-
:param prompt: The user query or conversation prompt text.
|
|
12
|
-
:param model: The OpenAI model name to use, defaults to gpt-3.5-turbo.
|
|
13
|
-
:param temperature: Sampling temperature, defaults to 0.7.
|
|
14
|
-
:return: The JSON response from the endpoint.
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
return {}
|
|
18
|
-
|
|
19
|
-
# endpoint, api_key, model = load_ebkrc_config()
|
|
20
|
-
|
|
21
|
-
# headers = {
|
|
22
|
-
# "Content-Type": "application/json",
|
|
23
|
-
# "Authorization": f"Bearer {api_key}"
|
|
24
|
-
# }
|
|
25
|
-
|
|
26
|
-
# # let's prefix the prompt with the contents of the file `llm-instructions.md`
|
|
27
|
-
# # however, since this is a ypi package, we need to find the path to the file
|
|
28
|
-
# # we can use the `__file__` variable to get the path to this file, and then
|
|
29
|
-
# # construct the path to the `llm-instructions.md` file
|
|
30
|
-
# file_instr_path = os.path.join(os.path.dirname(__file__), "llm-instructions.md")
|
|
31
|
-
|
|
32
|
-
# # Read the markdown file
|
|
33
|
-
# with open(file_instr_path, "r") as f:
|
|
34
|
-
# template = Template(f.read())
|
|
35
|
-
|
|
36
|
-
# data = {
|
|
37
|
-
# "lib_dir": lib_dir
|
|
38
|
-
# }
|
|
39
|
-
|
|
40
|
-
# instructions = template.safe_substitute(data)
|
|
41
|
-
# prompt = instructions + "\n\Natural language query: " + prompt
|
|
42
|
-
|
|
43
|
-
# data = {
|
|
44
|
-
# "model": model,
|
|
45
|
-
# "prompt": prompt,
|
|
46
|
-
# "stream": False,
|
|
47
|
-
# "format": "json"
|
|
48
|
-
# }
|
|
49
|
-
|
|
50
|
-
# try:
|
|
51
|
-
# response = requests.post(endpoint, headers=headers, json=data)
|
|
52
|
-
# response.raise_for_status()
|
|
53
|
-
# except requests.RequestException as e:
|
|
54
|
-
# raise SystemError(f"Error calling LLM endpoint: {e}")
|
|
55
|
-
# except Exception as e:
|
|
56
|
-
# raise SystemError(f"Unknown Error: {e}")
|
|
57
|
-
|
|
58
|
-
# return response.json()
|
ebk/manager.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
|
|
3
|
-
class LibraryManager:
|
|
4
|
-
def __init__(self, json_file):
|
|
5
|
-
self.json_file = json_file
|
|
6
|
-
self._load_library()
|
|
7
|
-
|
|
8
|
-
def _load_library(self):
|
|
9
|
-
"""Load the JSON library into memory."""
|
|
10
|
-
with open(self.json_file, "r") as f:
|
|
11
|
-
self.library = json.load(f)
|
|
12
|
-
|
|
13
|
-
def save_library(self):
|
|
14
|
-
"""Save the in-memory library back to the JSON file."""
|
|
15
|
-
with open(self.json_file, "w") as f:
|
|
16
|
-
json.dump(self.library, f, indent=4)
|
|
17
|
-
|
|
18
|
-
def list_books(self):
|
|
19
|
-
"""List all books in the library."""
|
|
20
|
-
return self.library
|
|
21
|
-
|
|
22
|
-
def search_books(self, query):
|
|
23
|
-
"""Search for books by title, author, or tags."""
|
|
24
|
-
return [
|
|
25
|
-
book for book in self.library
|
|
26
|
-
if query.lower() in (book["Title"].lower() + book["Author"].lower() + book["Tags"].lower())
|
|
27
|
-
]
|
|
28
|
-
|
|
29
|
-
def add_book(self, book_metadata):
|
|
30
|
-
"""Add a new book to the library."""
|
|
31
|
-
self.library.append(book_metadata)
|
|
32
|
-
self.save_library()
|
|
33
|
-
|
|
34
|
-
def delete_book(self, title):
|
|
35
|
-
"""Delete a book by title."""
|
|
36
|
-
self.library = [book for book in self.library if book["Title"] != title]
|
|
37
|
-
self.save_library()
|
|
38
|
-
|
|
39
|
-
def update_book(self, title, new_metadata):
|
|
40
|
-
"""Update metadata for a specific book."""
|
|
41
|
-
for book in self.library:
|
|
42
|
-
if book["Title"] == title:
|
|
43
|
-
book.update(new_metadata)
|
|
44
|
-
self.save_library()
|