epubchapterize 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epubchapterize-0.2.0/epubchapterize.egg-info → epubchapterize-0.2.1}/PKG-INFO +1 -1
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/README.md +2 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/epub_chapterize/chapterize.py +6 -3
- {epubchapterize-0.2.0 → epubchapterize-0.2.1/epubchapterize.egg-info}/PKG-INFO +1 -1
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/pyproject.toml +1 -1
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/LICENSE +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/README_PyPi.md +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/epub_chapterize/__init__.py +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/epubchapterize.egg-info/SOURCES.txt +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/epubchapterize.egg-info/dependency_links.txt +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/epubchapterize.egg-info/entry_points.txt +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/epubchapterize.egg-info/requires.txt +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/epubchapterize.egg-info/top_level.txt +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/setup.cfg +0 -0
- {epubchapterize-0.2.0 → epubchapterize-0.2.1}/test/test_import.py +0 -0
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# EpubChapterize
|
|
2
2
|
### A tool to split out chapters from ePub documents. Initially just for Project Gutenberg ePub3s.
|
|
3
3
|
|
|
4
|
+
[](https://pypi.org/project/epubchapterize/)
|
|
5
|
+
|
|
4
6
|
## Setup
|
|
5
7
|
|
|
6
8
|
To set up the project, follow these steps:
|
|
@@ -244,8 +244,11 @@ def chapterize(file_path):
|
|
|
244
244
|
for matched_header in matched_candidate_headers:
|
|
245
245
|
print(f"Matched Header: {matched_header.header_text}, XPath: {matched_header.header_xpath}, Nav Label: {matched_header.nav_item.nav_label}")
|
|
246
246
|
|
|
247
|
-
for
|
|
248
|
-
|
|
247
|
+
spine_ids = [item_id for item_id, _ in book.spine]
|
|
248
|
+
spine_items = [book.get_item_with_id(item_id) for item_id in spine_ids]
|
|
249
|
+
all_items = spine_items + [item for item in book.get_items() if item not in spine_items]
|
|
250
|
+
for item in all_items:
|
|
251
|
+
if item and item.get_type() == ebooklib.ITEM_DOCUMENT:
|
|
249
252
|
soup = BeautifulSoup(item.get_body_content(), 'html.parser')
|
|
250
253
|
|
|
251
254
|
current_document_all_headers = []
|
|
@@ -309,7 +312,7 @@ if __name__ == "__main__":
|
|
|
309
312
|
books_directory = "books/to_import"
|
|
310
313
|
|
|
311
314
|
all_books = glob(os.path.join(books_directory, "**", "*.epub"), recursive=True)
|
|
312
|
-
individual_book = ["/Users/matthewgrant/Source/EpubChapterize/epub_chapterize/books/to_import/
|
|
315
|
+
individual_book = ["/Users/matthewgrant/Source/EpubChapterize/epub_chapterize/books/to_import/german/Remarque.epub"]
|
|
313
316
|
for file_path in individual_book:
|
|
314
317
|
if "archive" in file_path: # Include only files in the archive folder
|
|
315
318
|
continue
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|