ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (84) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +443 -0
  4. ebk/ai/llm_providers/__init__.py +21 -0
  5. ebk/ai/llm_providers/base.py +230 -0
  6. ebk/ai/llm_providers/ollama.py +362 -0
  7. ebk/ai/metadata_enrichment.py +396 -0
  8. ebk/ai/question_generator.py +328 -0
  9. ebk/ai/reading_companion.py +224 -0
  10. ebk/ai/semantic_search.py +434 -0
  11. ebk/ai/text_extractor.py +394 -0
  12. ebk/cli.py +2828 -680
  13. ebk/config.py +260 -22
  14. ebk/db/__init__.py +37 -0
  15. ebk/db/migrations.py +180 -0
  16. ebk/db/models.py +526 -0
  17. ebk/db/session.py +144 -0
  18. ebk/decorators.py +132 -0
  19. ebk/exports/base_exporter.py +218 -0
  20. ebk/exports/html_library.py +1390 -0
  21. ebk/exports/html_utils.py +117 -0
  22. ebk/exports/hugo.py +7 -3
  23. ebk/exports/jinja_export.py +287 -0
  24. ebk/exports/multi_facet_export.py +164 -0
  25. ebk/exports/symlink_dag.py +479 -0
  26. ebk/extract_metadata.py +76 -7
  27. ebk/library_db.py +899 -0
  28. ebk/plugins/__init__.py +42 -0
  29. ebk/plugins/base.py +502 -0
  30. ebk/plugins/hooks.py +444 -0
  31. ebk/plugins/registry.py +500 -0
  32. ebk/repl/__init__.py +9 -0
  33. ebk/repl/find.py +126 -0
  34. ebk/repl/grep.py +174 -0
  35. ebk/repl/shell.py +1677 -0
  36. ebk/repl/text_utils.py +320 -0
  37. ebk/search_parser.py +413 -0
  38. ebk/server.py +1633 -0
  39. ebk/services/__init__.py +11 -0
  40. ebk/services/import_service.py +442 -0
  41. ebk/services/tag_service.py +282 -0
  42. ebk/services/text_extraction.py +317 -0
  43. ebk/similarity/__init__.py +77 -0
  44. ebk/similarity/base.py +154 -0
  45. ebk/similarity/core.py +445 -0
  46. ebk/similarity/extractors.py +168 -0
  47. ebk/similarity/metrics.py +376 -0
  48. ebk/vfs/__init__.py +101 -0
  49. ebk/vfs/base.py +301 -0
  50. ebk/vfs/library_vfs.py +124 -0
  51. ebk/vfs/nodes/__init__.py +54 -0
  52. ebk/vfs/nodes/authors.py +196 -0
  53. ebk/vfs/nodes/books.py +480 -0
  54. ebk/vfs/nodes/files.py +155 -0
  55. ebk/vfs/nodes/metadata.py +385 -0
  56. ebk/vfs/nodes/root.py +100 -0
  57. ebk/vfs/nodes/similar.py +165 -0
  58. ebk/vfs/nodes/subjects.py +184 -0
  59. ebk/vfs/nodes/tags.py +371 -0
  60. ebk/vfs/resolver.py +228 -0
  61. ebk-0.3.2.dist-info/METADATA +755 -0
  62. ebk-0.3.2.dist-info/RECORD +69 -0
  63. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
  64. ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
  65. ebk/imports/__init__.py +0 -0
  66. ebk/imports/calibre.py +0 -144
  67. ebk/imports/ebooks.py +0 -116
  68. ebk/llm.py +0 -58
  69. ebk/manager.py +0 -44
  70. ebk/merge.py +0 -308
  71. ebk/streamlit/__init__.py +0 -0
  72. ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
  73. ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
  74. ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
  75. ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
  76. ebk/streamlit/app.py +0 -185
  77. ebk/streamlit/display.py +0 -168
  78. ebk/streamlit/filters.py +0 -151
  79. ebk/streamlit/utils.py +0 -58
  80. ebk/utils.py +0 -311
  81. ebk-0.1.0.dist-info/METADATA +0 -457
  82. ebk-0.1.0.dist-info/RECORD +0 -29
  83. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
  84. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,69 @@
1
+ ebk/__init__.py,sha256=KK5aFY07PvPB8LjfNJ_xAuWft6uyIMsiK6vrvhyyg14,790
2
+ ebk/cli.py,sha256=oAG25LPGJH0QZPQJVGEpiHCcgdaPw4sn5mN0knhRX_M,112835
3
+ ebk/config.py,sha256=P090sNH2YSnNNteXjim2_WYN9j2BC_GU5o0wQIwSbT4,7364
4
+ ebk/decorators.py,sha256=MpAD1Wwy3o4l0sEiq4EIy5YtKMKf_nXtu7jpV64Uvyc,5179
5
+ ebk/extract_metadata.py,sha256=epMn9_4zQYiytBwj25_-rYl0QmPN4cP25DuxMnijNMU,12228
6
+ ebk/ident.py,sha256=yRIKVA0rpyhdPCxl0Vx7iu3YOebE8Yvbv32iCJmKk-Y,3324
7
+ ebk/library_db.py,sha256=BEwqvoR8Ayec53ptmVcRsL2oTTlHj1_w0WP_j1ONfSA,29535
8
+ ebk/search_parser.py,sha256=NRVbGnaOWxbc1WtsoFVINjHqCMBEubrKBxY2b_HQqJY,14848
9
+ ebk/server.py,sha256=x5vpQmZjGAwJVgmUq8l8Z1owfoqg2weuUVBovEWVE-s,59036
10
+ ebk/ai/__init__.py,sha256=-0vfDHdht8ZasYvl33nyGoWCaZxKIn_en0PxZdRQMkw,686
11
+ ebk/ai/knowledge_graph.py,sha256=Xp4Ao1fVwp6oKyBG1hDroAqPTADyyUGG86WczvLRDzg,16504
12
+ ebk/ai/metadata_enrichment.py,sha256=jQrqwjTzZkxzkJdxdwQCN1Cchkwa6jibHd8r5hjYosw,12364
13
+ ebk/ai/question_generator.py,sha256=ev1wEmgIDxnb_kaZdiLh0Z1TW7HUjb_HR1NlsBmdduQ,11363
14
+ ebk/ai/reading_companion.py,sha256=8GHiA9tR-vb28eCS3FmpDmRMX2OMyg5IP8AhPZ7c-Vg,7948
15
+ ebk/ai/semantic_search.py,sha256=i0qY6cEr_89RWxTdJpnMoHu-l4csYIdDi6nh17asyfQ,15218
16
+ ebk/ai/text_extractor.py,sha256=SOi7VgiAupZTfh4GG8XDB5N2AMeEP727o_eVlTdlhiQ,13918
17
+ ebk/ai/llm_providers/__init__.py,sha256=dqXtt2zh2iAHfpBLRQWpyDIOrJ8IggcxwvD0isHplcI,438
18
+ ebk/ai/llm_providers/base.py,sha256=am-TO4LTYLNeyMop9qy4TQdS7LJ2VTNvqy4iri1YvNk,5694
19
+ ebk/ai/llm_providers/ollama.py,sha256=MwXhJSsAc7cYfKyrmiXvk25QzIpvfNOZcugs9TNZta8,10132
20
+ ebk/db/__init__.py,sha256=CN1R77Ut5ai8kGSh1ohprGaS5qvOUMdtMT-y--5DIpw,792
21
+ ebk/db/migrations.py,sha256=H0nAl8I-hUKctV7sPj14iockSL9IL0qHihy3krxZPsc,5513
22
+ ebk/db/models.py,sha256=PGcq9knATE06_MStiCpg7zNvO2nDhU592xDt0vhNCxg,19967
23
+ ebk/db/session.py,sha256=3Oc7Xo1gb7B430JtKRtF4JFyFi8hdIKz1GQq-Bjjz_o,3549
24
+ ebk/exports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ ebk/exports/base_exporter.py,sha256=OQFmuyP9vxB5SVErTacOGonjD2Kyc1j1_1G9VcpdlDY,6783
26
+ ebk/exports/html_library.py,sha256=L8b__nU_c_4nZYym-W_M90Eb-GRVZfb3OqLm0nE-0Lk,54107
27
+ ebk/exports/html_utils.py,sha256=ft4PLHBKjx6OFD3bxCEI9tpaEUEqWzhLNqwKc196ulo,3899
28
+ ebk/exports/hugo.py,sha256=0rKNCegtm_Xd2g7IU77DmzlNiHZkoHTd2Wyr_y2gLRk,2209
29
+ ebk/exports/jinja_export.py,sha256=wWy_eQIp16yuZ0i-Ssj8PaGHSNiSdjOwjt5OpKuv-q0,10996
30
+ ebk/exports/multi_facet_export.py,sha256=xgy5Vjkow49ESVUXcjmLcTbMa-nSyaAlYW8HH-SeB4U,6364
31
+ ebk/exports/symlink_dag.py,sha256=ChTSp9qA5Ibui4eIFfbWNDeYBfYh--RtOI4RvP54-Vs,20499
32
+ ebk/exports/zip.py,sha256=erteF1NcymUbka5m_pnXWaRZmkUihALXe4A_cYg8hXs,779
33
+ ebk/plugins/__init__.py,sha256=-YhhLSqVI6lZSCoW-8fJfib7qL12AO3h4IGOFGcs_qU,765
34
+ ebk/plugins/base.py,sha256=x523_EFtHiFgBBTJqzgArIF8D8KtmD20jJUTqew29eM,14121
35
+ ebk/plugins/hooks.py,sha256=ym7fkmqqirc17Q5GYtOO6ahfyYMJHUX1QwRUkwaZs20,14442
36
+ ebk/plugins/registry.py,sha256=xCmKMc1ioSpXM8nBPcep5RxUy-BZQMKYCInDp2NqzWY,15497
37
+ ebk/repl/__init__.py,sha256=zSAp76GfAEtxYsXe8W2m-PtDFeHX668y6jnpzz2yeWA,252
38
+ ebk/repl/find.py,sha256=Yl-4qv_uKl8dJefHzCyNpjJxngtB_fxj4MqXbMGEkKc,3799
39
+ ebk/repl/grep.py,sha256=VIFdcMrFqSHBB2rmLllf9PDW4UjBelf60znUPfBh-AA,4802
40
+ ebk/repl/shell.py,sha256=emzjK1aXiGnYdEAwUybntnaXS62tQhtQZkDeQ3sDIGg,60896
41
+ ebk/repl/text_utils.py,sha256=nSCE-qPL3xHyXjt_5ltjYnOCBXpyPqJiyaKztrfAKuQ,8382
42
+ ebk/services/__init__.py,sha256=j6Gqdqp7MRRWIuSjJH_3TxL7JA5nID4kcNGbWXKXcDs,199
43
+ ebk/services/import_service.py,sha256=WThx2sEuooFQwgFmjyuOZPlIo8FQ-08g-CzVr5r6QaQ,16478
44
+ ebk/services/tag_service.py,sha256=KhuYqSUalOmQc1ikfrfEZ1__wf9f-VVEoWPpFSSGMi0,7982
45
+ ebk/services/text_extraction.py,sha256=jUPBolw45w5cKuL0m92K1i2Qo0FrqCwoX8p7GuBxvzU,10525
46
+ ebk/similarity/__init__.py,sha256=ySprbU_qfMuOEFyUqnBHe-4HSyvGyZHjRUZ4y6Z20-A,1942
47
+ ebk/similarity/base.py,sha256=uWY1whl1jdjvntnuOaB9Mal3f3yiLt7HqjHSBZyM324,4473
48
+ ebk/similarity/core.py,sha256=ohXY61L3xUOjcbtIkfqLwRKiiEST_oTRt8TFLdXIReQ,13340
49
+ ebk/similarity/extractors.py,sha256=UqxnIWM2sBdY_d6mse_NYTuOxdT2f7rtPDfe9b8QR0k,4220
50
+ ebk/similarity/metrics.py,sha256=2rbX10xXauebBSH7fKN9fCSMgSOuxRHKwUyJhd_hFfU,10419
51
+ ebk/vfs/__init__.py,sha256=5d2jCVRNboJlD576IrFWKIUeOVVVv3zVdyMbSMZ0zEY,3129
52
+ ebk/vfs/base.py,sha256=NkOBkefa6I4O1_GY0SmBFHMHhQbt7edyNAR8ncZeVWM,8231
53
+ ebk/vfs/library_vfs.py,sha256=Lkrn1Wkx2f5qnegnmLGUKiHUfOOUoqnQOADq80xaJNg,3353
54
+ ebk/vfs/resolver.py,sha256=D_pvOzmeErl8oMxw73g4X4nKBzxc_OgoAmHDDP_fJA4,6228
55
+ ebk/vfs/nodes/__init__.py,sha256=W6MCMNUh8129Uw2xtNoWi00rrMHNreTXb26KS7IrXrA,1360
56
+ ebk/vfs/nodes/authors.py,sha256=Dqr8Leb2NM3W_qKwvEz8r3-C3uNFQVjNGxf56-qhm9c,5731
57
+ ebk/vfs/nodes/books.py,sha256=5YzFXFKrK1aUCzD7uAYPmuGCTVh_j4vj9VNfVFywo9o,15461
58
+ ebk/vfs/nodes/files.py,sha256=49kCsPFe5zBstcUcC44E-KEUHkoHTKT6AzQOXhirnDA,4673
59
+ ebk/vfs/nodes/metadata.py,sha256=wo2TDV2JshuWAzdjGEVOWM5-9S3XwqoRoqRIgSX3o1g,11392
60
+ ebk/vfs/nodes/root.py,sha256=4WkU3GnNcnnqg6dpkjUXI0pnWX4oAR6UJDNpEqIexLs,3030
61
+ ebk/vfs/nodes/similar.py,sha256=LJeFZigiu7dDyduGi9SJrxNJNP_qix661o8pyGsARFs,4866
62
+ ebk/vfs/nodes/subjects.py,sha256=7U9Raj0b2PXb6LfOo3zlHAqTD0lW3IQk7m5u0nU8WPU,5327
63
+ ebk/vfs/nodes/tags.py,sha256=uFJ5lWQYXD0dDmRP6v9NpfQDieTZJfGtSgaz2zzHVZo,11119
64
+ ebk-0.3.2.dist-info/licenses/LICENSE,sha256=1eh_aOAZz71hpva42M9f8Vqj_FtSUqzE-1EimifmM_8,1068
65
+ ebk-0.3.2.dist-info/METADATA,sha256=P7E6L37hRP087ibZqpd4EV4szzlIHLh_cgAGxkYHOk4,21712
66
+ ebk-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
67
+ ebk-0.3.2.dist-info/entry_points.txt,sha256=M24WNtCeBq-nmIDPU-3i0DtM9VP3bM_ul2nQIC1r_RA,36
68
+ ebk-0.3.2.dist-info/top_level.txt,sha256=OLATFvDsJQh-6TqJCili349OH47DoH-6dMih88BWcyg,4
69
+ ebk-0.3.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Alex Towell
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
ebk/imports/__init__.py DELETED
File without changes
ebk/imports/calibre.py DELETED
@@ -1,144 +0,0 @@
1
- import os
2
- import shutil
3
- import json
4
- from slugify import slugify
5
- from typing import Dict
6
- import logging
7
- from ..extract_metadata import extract_metadata
8
- from ..ident import add_unique_id
9
- from ..utils import get_unique_filename
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- ebook_exts = (".pdf", ".epub", ".mobi", ".azw3", ".txt", ".docx", ".odt",
14
- ".html", ".rtf", ".md", ".fb2", ".cbz", ".cbr", ".djvu",
15
- ".xps", ".ibooks", ".azw", ".lit", ".pdb", ".prc", ".lrf",
16
- ".pdb", ".pml", ".rb", ".snb", ".tcr", ".txtz", ".azw1")
17
-
18
- def import_calibre(calibre_dir: str,
19
- output_dir: str,
20
- ebook_exts: tuple = ebook_exts):
21
- if not os.path.exists(output_dir):
22
- os.makedirs(output_dir)
23
-
24
- metadata_list = []
25
-
26
- for root, _, files in os.walk(calibre_dir):
27
- # Look for OPF
28
- opf_file_path = os.path.join(root, "metadata.opf")
29
-
30
- # Gather valid ebook files
31
- ebook_files = [f for f in files if f.lower().endswith(ebook_exts)]
32
-
33
- if not ebook_files:
34
- logger.debug(f"No recognized ebook files found in {root}. Skipping.")
35
- continue # skip if no recognized ebook files
36
-
37
- # Pick the "primary" ebook file. This is arbitrary and can be changed.
38
- primary_ebook_file = ebook_files[0]
39
- ebook_full_path = os.path.join(root, primary_ebook_file)
40
-
41
- # Extract metadata
42
- if os.path.exists(opf_file_path):
43
- logger.debug(f"Found metadata.opf in {root}. Extracting metadata from OPF.")
44
- metadata = extract_metadata(ebook_full_path, opf_file_path)
45
- else:
46
- logger.warning(f"No metadata.opf found in {root}. Inferring metadata from ebook files.")
47
- metadata = extract_metadata(ebook_full_path) # Only ebook file path is provided
48
-
49
- # Extract metadata (OPF + ebook)
50
- metadata = extract_metadata(ebook_full_path, opf_file_path)
51
- metadata["root"] = root
52
- metadata["source_folder"] = calibre_dir
53
- metadata["output_folder"] = output_dir
54
- metadata["imported_from"] = "calibre"
55
- metadata["virtual_libs"] = [slugify(output_dir)]
56
-
57
- # Generate base name
58
- title_slug = slugify(metadata.get("title", "unknown_title"))
59
- creator_slug = slugify(
60
- metadata["creators"][0]) if metadata.get("creators") else "unknown_creator"
61
-
62
- base_name = f"{title_slug}__{creator_slug}"
63
-
64
- # Copy ebooks
65
- file_paths = []
66
- for ebook_file in ebook_files:
67
- _, ext = os.path.splitext(ebook_file)
68
- src = os.path.join(root, ebook_file)
69
- dst = os.path.join(output_dir, f"{base_name}{ext}")
70
- dst = get_unique_filename(dst)
71
- shutil.copy(src, dst)
72
- file_paths.append(os.path.relpath(dst, output_dir))
73
-
74
- # Optionally handle cover.jpg
75
- if "cover.jpg" in files:
76
- cover_src = os.path.join(root, "cover.jpg")
77
- cover_dst = os.path.join(output_dir, f"{base_name}_cover.jpg")
78
- shutil.copy(cover_src, cover_dst)
79
- metadata["cover_path"] = os.path.relpath(cover_dst, output_dir)
80
-
81
- # Store relative paths in metadata
82
- metadata["file_paths"] = file_paths
83
- metadata_list.append(metadata)
84
-
85
- for entry in metadata_list:
86
- add_unique_id(entry)
87
-
88
- # Write out metadata.json
89
- output_json = os.path.join(output_dir, "metadata.json")
90
- with open(output_json, "w", encoding="utf-8") as f:
91
- json.dump(metadata_list, f, indent=2, ensure_ascii=False)
92
-
93
-
94
- def ensure_metadata_completeness(metadata: Dict) -> Dict:
95
- """
96
- Ensure that all required metadata fields are present.
97
- If a field is missing or empty, attempt to infer or set default values.
98
-
99
- Args:
100
- metadata (Dict): The metadata dictionary extracted from OPF or inferred.
101
-
102
- Returns:
103
- Dict: The updated metadata dictionary with all necessary fields.
104
- """
105
- required_fields = ["title", "creators",
106
- "subjects", "description",
107
- "language", "date", "identifiers",
108
- "file_paths", "cover_path", "unique_id",
109
- "source_folder", "output_folder",
110
- "imported_from", "virtual_libs"]
111
- for field in required_fields:
112
- if field not in metadata:
113
- if field == "creators":
114
- metadata[field] = ["Unknown Author"]
115
- logger.debug(f"Set default value for '{field}'.")
116
- elif field == "subjects":
117
- metadata[field] = []
118
- logger.debug(f"Set default value for '{field}'.")
119
- elif field == "description":
120
- metadata[field] = "No description available."
121
- logger.debug(f"Set default value for '{field}'.")
122
- elif field == "language":
123
- metadata[field] = "en" # Default to English
124
- logger.debug(f"Set default value for '{field}'.")
125
- elif field == "date":
126
- metadata[field] = None # Unknown date
127
- logger.debug(f"Set default value for '{field}'.")
128
- elif field == "title":
129
- metadata[field] = "Unknown Title"
130
- logger.debug(f"Set default value for '{field}'.")
131
- elif field == "identifiers":
132
- metadata[field] = {}
133
- logger.debug(f"Set default value for '{field}'.")
134
- elif field == "file_paths":
135
- metadata[field] = []
136
- logger.debug(f"Set default value for '{field}'.")
137
- elif field == "cover_path":
138
- metadata[field] = None
139
- logger.debug(f"Set default value for '{field}'.")
140
- elif field == "unique_id":
141
- metadata[field] = None
142
- logger.debug(f"Set default value for '{field}'.")
143
-
144
- return metadata
ebk/imports/ebooks.py DELETED
@@ -1,116 +0,0 @@
1
- import os
2
- import json
3
- import shutil
4
-
5
- from pathlib import Path
6
-
7
- import fitz
8
- from PIL import Image
9
- from io import BytesIO
10
-
11
- from rich.console import Console
12
-
13
- from typing import Dict
14
- from slugify import slugify
15
- from ..extract_metadata import extract_metadata_from_pdf
16
- from ..ident import add_unique_id
17
- from ..utils import get_unique_filename
18
-
19
- import logging
20
-
21
- def import_ebooks(ebooks_dir, output_dir, output_formats):
22
- """
23
- Import ebooks from a directory into the library.
24
-
25
- Args:
26
- ebooks_dir (str): Path to the directory containing the ebooks
27
- output_dir (str): Path to the output directory
28
- output_formats (list): List of output formats to convert the ebooks to
29
- """
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
- if os.path.exists(output_dir):
34
- logger.error(f"Output directory already exists: {output_dir}")
35
- return
36
- os.makedirs(output_dir)
37
-
38
- metadata_list = []
39
- for root, _, files in os.walk(ebooks_dir):
40
- for file in files:
41
- try:
42
- # create the dictionary item for file
43
- item = {
44
- "title": file
45
- }
46
- path = Path(root) / Path(file)
47
-
48
- # infer the format of the file
49
- _, ext = os.path.splitext(file)
50
- ext = ext.lower().strip(".")
51
- if ext not in output_formats:
52
- continue
53
-
54
- cover_image = None
55
- if ext == "pdf":
56
- metadata = extract_metadata_from_pdf(path)
57
- cover_image = extract_cover_from_pdf(path)
58
- else:
59
- continue
60
-
61
- logger.debug(f"Importing ebook {file} in {root}")
62
- metadata = {key: item.get(key) or metadata.get(key) or value for key, value in metadata.items()}
63
-
64
- item["root"] = root
65
- item["source_folder"] = ebooks_dir
66
- item["output_folder"] = output_dir
67
- item["imported_from"] = "ebooks"
68
- item["virtual_libs"] = [slugify(output_dir)]
69
-
70
- title_slug = slugify(item.get("title", "unknown_title"))
71
- creator_slug = slugify(item.get("creators", ["unknown_creator"])[0])
72
- base_name = f"{title_slug}__{creator_slug}"
73
-
74
- _, ext = os.path.splitext(file)
75
- src = os.path.join(root, file)
76
- dst = os.path.join(output_dir, f"{base_name}{ext}")
77
- dst = get_unique_filename(dst)
78
- shutil.copy(src, dst)
79
- file_paths = [ os.path.relpath(dst, output_dir) ]
80
- item["file_paths"] = file_paths
81
-
82
- if cover_image:
83
- cover_image_file = os.path.join(output_dir, f"{base_name}_cover.jpg")
84
- with open(cover_image_file, "wb") as cover:
85
- cover.write(cover_image)
86
-
87
- item["cover_path"] = os.path.relpath(cover_image_file, output_dir)
88
- metadata_list.append(item)
89
-
90
- except Exception as e:
91
- logger.error(f"Error processing file {file} in {root}: {e}")
92
-
93
- for entry in metadata_list:
94
- add_unique_id(entry)
95
-
96
- metadata_file = os.path.join(output_dir, "metadata.json")
97
- with open(metadata_file, "w") as f:
98
- json.dump(metadata_list, f, indent=2)
99
-
100
-
101
- def extract_cover_from_pdf(pdf_path):
102
- # Open the PDF file
103
- pdf_document = fitz.open(pdf_path)
104
- first_page = pdf_document[0]
105
-
106
- # Render the first page as a PNG image
107
- pix = first_page.get_pixmap()
108
- image = Image.open(BytesIO(pix.tobytes(output="png")))
109
-
110
- # Create a thumbnail
111
- image.thumbnail((256, 256))
112
-
113
- # Convert the image to JPEG bytes
114
- image_bytes = BytesIO()
115
- image.save(image_bytes, format="JPEG")
116
- return image_bytes.getvalue()
ebk/llm.py DELETED
@@ -1,58 +0,0 @@
1
- import os
2
- import requests
3
- from string import Template
4
- from .config import load_ebkrc_config
5
-
6
-
7
- def query_llm(lib_dir, prompt):
8
- """
9
- Queries an OpenAI-compatible LLM endpoint with the given prompt.
10
-
11
- :param prompt: The user query or conversation prompt text.
12
- :param model: The OpenAI model name to use, defaults to gpt-3.5-turbo.
13
- :param temperature: Sampling temperature, defaults to 0.7.
14
- :return: The JSON response from the endpoint.
15
- """
16
-
17
- return {}
18
-
19
- # endpoint, api_key, model = load_ebkrc_config()
20
-
21
- # headers = {
22
- # "Content-Type": "application/json",
23
- # "Authorization": f"Bearer {api_key}"
24
- # }
25
-
26
- # # let's prefix the prompt with the contents of the file `llm-instructions.md`
27
- # # however, since this is a ypi package, we need to find the path to the file
28
- # # we can use the `__file__` variable to get the path to this file, and then
29
- # # construct the path to the `llm-instructions.md` file
30
- # file_instr_path = os.path.join(os.path.dirname(__file__), "llm-instructions.md")
31
-
32
- # # Read the markdown file
33
- # with open(file_instr_path, "r") as f:
34
- # template = Template(f.read())
35
-
36
- # data = {
37
- # "lib_dir": lib_dir
38
- # }
39
-
40
- # instructions = template.safe_substitute(data)
41
- # prompt = instructions + "\n\Natural language query: " + prompt
42
-
43
- # data = {
44
- # "model": model,
45
- # "prompt": prompt,
46
- # "stream": False,
47
- # "format": "json"
48
- # }
49
-
50
- # try:
51
- # response = requests.post(endpoint, headers=headers, json=data)
52
- # response.raise_for_status()
53
- # except requests.RequestException as e:
54
- # raise SystemError(f"Error calling LLM endpoint: {e}")
55
- # except Exception as e:
56
- # raise SystemError(f"Unknown Error: {e}")
57
-
58
- # return response.json()
ebk/manager.py DELETED
@@ -1,44 +0,0 @@
1
- import json
2
-
3
- class LibraryManager:
4
- def __init__(self, json_file):
5
- self.json_file = json_file
6
- self._load_library()
7
-
8
- def _load_library(self):
9
- """Load the JSON library into memory."""
10
- with open(self.json_file, "r") as f:
11
- self.library = json.load(f)
12
-
13
- def save_library(self):
14
- """Save the in-memory library back to the JSON file."""
15
- with open(self.json_file, "w") as f:
16
- json.dump(self.library, f, indent=4)
17
-
18
- def list_books(self):
19
- """List all books in the library."""
20
- return self.library
21
-
22
- def search_books(self, query):
23
- """Search for books by title, author, or tags."""
24
- return [
25
- book for book in self.library
26
- if query.lower() in (book["Title"].lower() + book["Author"].lower() + book["Tags"].lower())
27
- ]
28
-
29
- def add_book(self, book_metadata):
30
- """Add a new book to the library."""
31
- self.library.append(book_metadata)
32
- self.save_library()
33
-
34
- def delete_book(self, title):
35
- """Delete a book by title."""
36
- self.library = [book for book in self.library if book["Title"] != title]
37
- self.save_library()
38
-
39
- def update_book(self, title, new_metadata):
40
- """Update metadata for a specific book."""
41
- for book in self.library:
42
- if book["Title"] == title:
43
- book.update(new_metadata)
44
- self.save_library()