PyPI - epubchapterize - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

epubchapterize 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{epubchapterize-0.2.0/epubchapterize.egg-info → epubchapterize-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: epubchapterize
-Version: 0.2.0
+Version: 0.2.1
 Summary: A Python package for parsing chapters from EPUBs.
 Author-email: Matthew Grant <nzmattgrant@gmail.com>
 License: MIT

{epubchapterize-0.2.0 → epubchapterize-0.2.1}/README.md RENAMED Viewed

@@ -1,6 +1,8 @@
 # EpubChapterize
 ### A tool to split out chapters from ePub documents. Initially just for Project Gutenberg ePub3s.
+[![PyPI version](https://img.shields.io/pypi/v/epubchapterize.svg)](https://pypi.org/project/epubchapterize/)
 ## Setup
 To set up the project, follow these steps:

{epubchapterize-0.2.0 → epubchapterize-0.2.1}/epub_chapterize/chapterize.py RENAMED Viewed

@@ -244,8 +244,11 @@ def chapterize(file_path):
     for matched_header in matched_candidate_headers:
         print(f"Matched Header: {matched_header.header_text}, XPath: {matched_header.header_xpath}, Nav Label: {matched_header.nav_item.nav_label}")
-    for item in book.get_items():
-        if item.get_type() == ebooklib.ITEM_DOCUMENT:
+    spine_ids = [item_id for item_id, _ in book.spine]
+    spine_items = [book.get_item_with_id(item_id) for item_id in spine_ids]
+    all_items = spine_items + [item for item in book.get_items() if item not in spine_items]
+    for item in all_items:
+        if item and item.get_type() == ebooklib.ITEM_DOCUMENT:
             soup = BeautifulSoup(item.get_body_content(), 'html.parser')
             current_document_all_headers = []
@@ -309,7 +312,7 @@ if __name__ == "__main__":
     books_directory = "books/to_import"
     all_books = glob(os.path.join(books_directory, "**", "*.epub"), recursive=True)
-    individual_book = ["/Users/matthewgrant/Source/EpubChapterize/epub_chapterize/books/to_import/english/h-p-lovecraft_at-the-mountains-of-madness.epub"]
+    individual_book = ["/Users/matthewgrant/Source/EpubChapterize/epub_chapterize/books/to_import/german/Remarque.epub"]
     for file_path in individual_book:
         if "archive" in file_path:  # Include only files in the archive folder
             continue

{epubchapterize-0.2.0 → epubchapterize-0.2.1/epubchapterize.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: epubchapterize
-Version: 0.2.0
+Version: 0.2.1
 Summary: A Python package for parsing chapters from EPUBs.
 Author-email: Matthew Grant <nzmattgrant@gmail.com>
 License: MIT

{epubchapterize-0.2.0 → epubchapterize-0.2.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "epubchapterize"
-version = "0.2.0"
+version = "0.2.1"
 description = "A Python package for parsing chapters from EPUBs."
 authors = [
     { name = "Matthew Grant", email = "nzmattgrant@gmail.com" }