epubchapterize 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: epubchapterize
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: A Python package for parsing chapters from EPUBs.
5
5
  Author-email: Matthew Grant <nzmattgrant@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,8 @@
1
1
  # EpubChapterize
2
2
  ### A tool to split out chapters from ePub documents. Initially just for Project Gutenberg ePub3s.
3
3
 
4
+ [![PyPI version](https://img.shields.io/pypi/v/epubchapterize.svg)](https://pypi.org/project/epubchapterize/)
5
+
4
6
  ## Setup
5
7
 
6
8
  To set up the project, follow these steps:
@@ -244,8 +244,11 @@ def chapterize(file_path):
244
244
  for matched_header in matched_candidate_headers:
245
245
  print(f"Matched Header: {matched_header.header_text}, XPath: {matched_header.header_xpath}, Nav Label: {matched_header.nav_item.nav_label}")
246
246
 
247
- for item in book.get_items():
248
- if item.get_type() == ebooklib.ITEM_DOCUMENT:
247
+ spine_ids = [item_id for item_id, _ in book.spine]
248
+ spine_items = [book.get_item_with_id(item_id) for item_id in spine_ids]
249
+ all_items = spine_items + [item for item in book.get_items() if item not in spine_items]
250
+ for item in all_items:
251
+ if item and item.get_type() == ebooklib.ITEM_DOCUMENT:
249
252
  soup = BeautifulSoup(item.get_body_content(), 'html.parser')
250
253
 
251
254
  current_document_all_headers = []
@@ -309,7 +312,7 @@ if __name__ == "__main__":
309
312
  books_directory = "books/to_import"
310
313
 
311
314
  all_books = glob(os.path.join(books_directory, "**", "*.epub"), recursive=True)
312
- individual_book = ["/Users/matthewgrant/Source/EpubChapterize/epub_chapterize/books/to_import/english/h-p-lovecraft_at-the-mountains-of-madness.epub"]
315
+ individual_book = ["/Users/matthewgrant/Source/EpubChapterize/epub_chapterize/books/to_import/german/Remarque.epub"]
313
316
  for file_path in individual_book:
314
317
  if "archive" in file_path: # Include only files in the archive folder
315
318
  continue
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: epubchapterize
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: A Python package for parsing chapters from EPUBs.
5
5
  Author-email: Matthew Grant <nzmattgrant@gmail.com>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "epubchapterize"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "A Python package for parsing chapters from EPUBs."
9
9
  authors = [
10
10
  { name = "Matthew Grant", email = "nzmattgrant@gmail.com" }
File without changes
File without changes