PyPI - fb2-tools - Versions diffs - 0.1.0__py3-none-any.whl - Mend

fb2-tools 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

fb2_tools/fb2_edgetts.py +224 -0
fb2_tools/fb2_inline.py +67 -0
fb2_tools/fb2_inline_plaintext.py +120 -0
fb2_tools/fb2_split.py +98 -0
fb2_tools-0.1.0.dist-info/METADATA +15 -0
fb2_tools-0.1.0.dist-info/RECORD +8 -0
fb2_tools-0.1.0.dist-info/WHEEL +4 -0
fb2_tools-0.1.0.dist-info/entry_points.txt +6 -0

fb2_tools/fb2_edgetts.py ADDED Viewed

@@ -0,0 +1,224 @@
+import argparse
+import os
+import re
+import subprocess
+import time
+import zipfile
+from collections import deque
+from pathlib import Path
+from typing import Literal
+from selenium import webdriver
+from splinter import Browser
+def init_browser(download_dir: Path):
+    options = webdriver.firefox.options.Options()
+    options.binary_location = "/usr/bin/librewolf"
+    options.set_preference("browser.download.folderList", 2)  # use custom download path
+    options.set_preference("browser.download.dir", str(download_dir.absolute()))
+    options.set_preference("browser.download.useDownloadDir", True)
+    options.set_preference("browser.download.manager.showWhenStarting", False)
+    options.set_preference("browser.helperApps.neverAsk.saveToDisk", "audio/mpeg")
+    browser = Browser("firefox", options=options)
+    return browser
+def set_value(id_: str, value, browser: Browser):
+    browser.execute_script(f"document.getElementById('{id_}').value = {value}")
+    browser.execute_script(
+        f"document.getElementById('{id_}').dispatchEvent(new Event('input'))"
+    )
+def set_expanding(id_: str, value, browser: Browser):
+    if not browser.find_by_id(id_).visible:
+        browser.find_by_id("dop-settings-label").click()
+    set_value(id_, value, browser)
+def display_block(id_: str, browser: Browser):
+    browser.evaluate_script(f"document.getElementById('{id_}').style.display = 'block'")
+def set_up_browser(
+    browser: Browser,
+    pointstype: Literal["V1", "V2", "V3"],
+    rate: int,
+    pitch: int,
+    max_threads: int,
+    mergefiles: int,
+):
+    browser.visit("https://edgetts.github.io")
+    while browser.find_by_id("pointstype").value != pointstype:
+        browser.find_by_id("pointstype").click()
+    set_value("rate", rate, browser)
+    set_expanding("pitch", pitch, browser)
+    set_expanding("max-threads", max_threads, browser)
+    set_expanding("mergefiles", mergefiles, browser)
+    display_block('file-input', browser)
+    browser.evaluate_script("document.getElementById('file-input').name = 'file-input'")
+    display_block("stat-area", browser)
+pieces_regex = re.compile(r"(\d+) / (\d+)")
+def get_stats(index: int, browser: Browser):
+    assert index == 1 or index == 2
+    return pieces_regex.search(browser.find_by_id("stat-str").value).group(index)
+def finished_downloading(path: Path):
+    part_path = path.with_name(path.name + ".part")
+    return path.exists() and not part_path.exists()
+def zip_dir(dir_path: Path, target: Path):
+    with zipfile.ZipFile(target, "w", zipfile.ZIP_STORED) as zipf:
+        for file_path in dir_path.rglob("*"):
+            if file_path.is_file():
+                arcname = file_path.relative_to(dir_path)
+                zipf.write(file_path, arcname)
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("source", nargs="*", default=[Path(".")], type=Path)
+    parser.add_argument(
+        "-o", "--output", default=Path("~/Music/tmp").expanduser(), type=Path
+    )
+    parser.add_argument("--pointstype", choices=["V1", "V2", "V3"], default="V3")
+    parser.add_argument("--rate", type=int, choices=range(-50, 100 + 1), default=75)
+    parser.add_argument("--pitch", type=int, choices=range(-50, 50 + 1), default=0)
+    parser.add_argument("--max-threads", type=int, choices=range(1, 30 + 1), default=20)
+    parser.add_argument(
+        "--mergefiles",
+        type=int,
+        choices=range(1, 100 + 1),
+        default=100,
+        help="100 means merge all pieces",
+    )
+    parser.add_argument("--skip-downrate", action="store_true")
+    parser.add_argument("--skip-archive", action="store_true")
+    args = parser.parse_args()
+    if len(args.source) == 1:
+        source = args.source[0]
+        if source.is_dir():
+            out_name = source.name
+        else:
+            out_name = source.stem.removesuffix(".fb2")
+        output_dir = args.output / out_name
+    else:
+        output_dir = args.output
+    if not output_dir.exists():
+        os.makedirs(output_dir)
+    elif not output_dir.is_dir():
+        print(f"Output must be a directory: '{output_dir}'")
+        exit(1)
+    elif len(list(output_dir.iterdir())) != 0 and not args.skip_downrate:
+        print(
+            f"Output directory '{output_dir}' is not empty."
+            f" Running downrate.sh on it may cause unwanted files changed and messed up statistics."
+        )
+        if input("Continue? (y/N): ").lower() != "y":
+            exit(0)
+    browser = init_browser(output_dir)
+    set_up_browser(
+        browser,
+        args.pointstype,
+        args.rate,
+        args.pitch,
+        args.max_threads,
+        args.mergefiles,
+    )
+    to_process: deque[Path] = deque(args.source)
+    outputs = set()
+    while to_process:
+        path = to_process.popleft()
+        if path.is_dir():
+            to_process.extend(path.iterdir())
+        else:
+            if path.exists():
+                browser.attach_file("file-input", str(path))
+                outputs.add(output_dir / path.with_suffix(".mp3").name)
+    if browser.is_text_present("Открыты"):
+        overall_pieces = get_stats(2, browser)
+        browser.find_by_id("savebutton").click()
+        processed_pieces = get_stats(1, browser)
+        while any(
+            status in browser.find_by_id("stat-area").value
+            for status in ("Открыта", "Запущена", "Обработка", "ПЕРЕЗАПУСК")
+        ):
+            print(
+                f"Processing {processed_pieces}/{overall_pieces}",
+                end="\r",
+                flush=True,
+            )
+            time.sleep(0.5)
+            processed_pieces = get_stats(1, browser)
+        print(
+            f"Processed {get_stats(1, browser)}/{overall_pieces}   "
+        )  # spaces to flush remainings of previous line contents
+        i = 0
+        while outputs:
+            print(f"Downloading{'.' * i}", end="\r", flush=True)
+            for path in outputs.copy():
+                if finished_downloading(path):
+                    outputs.remove(path)
+                    print(f"Produced '{path}'")
+            time.sleep(0.5)
+            i += 1
+    browser.quit()
+    if not args.skip_downrate:
+        print("Downrating")
+        try:
+            res = subprocess.run(
+                ["downrate.sh"],
+                capture_output=True,
+                check=True,
+                cwd=output_dir,
+                text=True,
+            )
+            print(res.stdout.strip())
+        except subprocess.CalledProcessError as e:
+            print("Failed to downrate output directory:")
+            print(e.stderr)
+    if not args.skip_archive:
+        zip_path = Path(output_dir.with_name(output_dir.name + ".zip"))
+        if zip_path.exists():
+            print(f"'{zip_path}' already exists. Do you want to recreate it? (y/N): ")
+            if input().lower() == "y":
+                zip_dir(output_dir, zip_path)
+        else:
+            zip_dir(output_dir, zip_path)
+        print(f"Compressed to '{zip_path}")
+if __name__ == "__main__":
+    main()

fb2_tools/fb2_inline.py ADDED Viewed

@@ -0,0 +1,67 @@
+import argparse
+import copy
+import re
+import traceback
+from bs4 import BeautifulSoup
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("filename")
+    args = parser.parse_args()
+    with open(args.filename, "r") as fp:
+        soup = BeautifulSoup(fp, "xml")
+    for attr in soup.FictionBook.attrs.items():
+        if attr[0].startswith("xmlns:") and attr[1].endswith("xlink"):
+            xlink_ns = attr[0].removeprefix("xmlns:")
+            href_attr = xlink_ns + ":href"
+    try:
+        description = soup.FictionBook.description
+        lang = description.lang
+        annotation = (copy.copy(child) for child in description.annotation.contents)
+        section = soup.new_tag("section")
+        title_str = "Аннотация" if lang == "ru" else "Annotation"
+        title = soup.new_tag("title", string=title_str)
+        section.append(title)
+        section.extend(annotation)
+        body = soup.FictionBook.body
+        body.insert(0, section)
+    except Exception:
+        print("No annotation in the book")
+    to_remove = set()
+    for link in soup.find_all("a"):
+        try:
+            href = link.attrs[href_attr]
+            id_ = href.removeprefix("#")
+            target = soup.find("section", id=id_)
+            link.replace_with(" [[ " + target.get_text() + " ]] ")
+            to_remove.add(target)
+        except Exception:
+            print("-" * 10)
+            print(f"Unable to process {link}:")
+            print(traceback.format_exc())
+            print("-" * 10)
+    for section in to_remove:
+        section.decompose()
+    for body in soup.find_all("body", attrs={"name": re.compile("^(notes|comments)$")}):
+        if body.find("section") is None:
+            body.decompose()
+    with open(args.filename.replace(".fb2", "_inlined.fb2"), "w") as fp:
+        fp.write(soup.prettify())
+if __name__ == "__main__":
+    main()

fb2_tools/fb2_inline_plaintext.py ADDED Viewed

@@ -0,0 +1,120 @@
+import argparse
+import os
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--file-name", default="fb2_inline_plaintext.py")
+    args = parser.parse_args()
+    if os.path.isfile(args.file_name):
+        print(
+            f"{args.file_name} already exists in current directory. Run it like './{args.file_name} or remove if want to start from scratch"
+        )
+        exit(1)
+    with open(args.file_name, "w") as f:
+        f.write(
+            r"""\
+#!/usr/bin/env python3
+import os
+import re
+import sys
+import zipfile
+def is_fb2(x: str):
+    return x.endswith(".fb2")
+def process_file(contents: str) -> str:
+    while (link := re.search(r"\[(\d+)\]", contents)) is not None:
+        num = link.group(1)
+        target = re.search(r"<p>" + str(num) + r". (.+?)<\/p>", contents)
+        if target is None:
+            print(f"Dangling link [{num}]")
+            exit(1)
+        text = target.group(1)
+        contents = (
+            contents[: link.start()] + f" [[{num}: {text}]] " + contents[link.end() :]
+        )
+    return contents
+if len(sys.argv) != 2:
+    print("Supply book file as the only argument")
+    exit(1)
+filename = sys.argv[1]
+print(
+    f"Make shure the script has correct regexes for {filename}. Process (y/N):",
+    end=" ",
+)
+if input()[:1].lower() != "y":
+    exit(0)
+if filename.endswith(".zip"):
+    new_contents: dict[str, str] = {}
+    with zipfile.ZipFile(filename, "r") as zip_ref:
+        files = zip_ref.namelist()
+        for file in files:
+            if file.endswith(".fb2"):
+                print(f"Processing {file}")
+                with zip_ref.open(file, "r") as file_ref:
+                    contents = file_ref.read().decode()
+                new_contents[file] = process_file(contents)
+        base_name = os.path.splitext(filename)[0]
+        match len(new_contents.values()):
+            case 0:
+                print("No fb2 book found")
+                exit(1)
+            case 1:
+                new_file = (
+                    os.path.join(
+                        os.path.dirname(base_name),
+                        os.path.splitext(list(new_contents.keys())[0])[0],
+                    )
+                    + "_notes.fb2"
+                )
+                with open(new_file, "w") as f:
+                    f.write(list(new_contents.values())[0])
+            case _:
+                os.makedirs(base_name, exist_ok=True)
+                for file, contents in new_contents.items():
+                    new_file = os.path.join(
+                        base_name, os.path.splitext(file)[0] + "_notes.fb2"
+                    )
+                    with open(new_file, "w") as f:
+                        f.write(contents)
+                pass
+elif filename.endswith(".fb2"):
+    with open(filename, "r") as f:
+        contents = f.read()
+    new_content = process_file(contents)
+    with open(os.path.splitext(filename)[0] + "_notes.fb2", "w") as f:
+        f.write(new_content)
+else:
+    print("Unsupported file type")
+    exit(1)
+"""
+        )
+    print(
+        f"./{args.file_name} was created. Edit it to properly handle specific book. Then run like an ordinary executable `./{args.file_name}`"
+    )
+if __name__ == "__main__":
+    main()

fb2_tools/fb2_split.py ADDED Viewed

@@ -0,0 +1,98 @@
+import argparse
+import os
+import re
+import shutil
+from bs4 import BeautifulSoup
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("filename")
+    parser.add_argument("-i", "--preserve-images", action="store_true")
+    args = parser.parse_args()
+    filename = args.filename
+    out_dir = filename.removesuffix(".fb2")
+    if os.path.exists(out_dir):
+        if input("Output path exists. Remove it? (y/N): ").lower() == "y":
+            shutil.rmtree(out_dir)
+        else:
+            exit(0)
+    os.mkdir(out_dir)
+    with open(filename, "r") as fp:
+        soup = BeautifulSoup(fp, "xml")
+    if args.preserve_images:
+        for attr in soup.FictionBook.attrs.items():
+            if attr[0].startswith("xmlns:") and attr[1].endswith("xlink"):
+                xlink_ns = attr[0].removeprefix("xmlns:")
+                href_attr = xlink_ns + ":href"
+    else:
+        coverpage = soup.FictionBook.description.find("coverpage")
+        if coverpage is not None:
+            coverpage.decompose()
+    title_info = soup.description.find("title-info")
+    book_title = title_info.find("book-title").string
+    body = soup.body
+    if args.preserve_images:
+        binaries = {
+            binary["id"]: binary.extract() for binary in body.find_all_next("binary")
+        }
+    else:
+        for binary in body.find_all_next("binary"):
+            binary.decompose()
+        for image in body.find_all("image"):
+            image.decompose()
+    sections = [
+        section.extract() for section in body.find_all("section", recursive=False)
+    ]
+    for i, section in enumerate(sections):
+        try:
+            body.append(section)
+            title = section.title.text
+            title = title.replace("\n", " ")
+            title = re.sub(r"\[\[.*?\]\]", "", title)
+            title = title.strip()
+            if args.preserve_images:
+                for image in soup.find_all("image"):
+                    href = image[href_attr]
+                    id_ = href.removeprefix("#")
+                    binary = binaries[id_]
+                    soup.FictionBook.append(binary)
+            title_info.find("book-title").string = f"{title} from {book_title}"
+            with open(os.path.join(out_dir, f"{i + 1:02}.{title}.fb2"), "w") as fp:
+                fp.write(soup.prettify())
+            body.clear(
+                decompose=True
+            )  # necessary to run after the first run to remove titles and epigraphs
+            section.decompose()
+            if args.preserve_images:
+                for binary in body.find_all_next("binary"):
+                    binary.extract()
+        except Exception as e:
+            terminal_width = shutil.get_terminal_size(fallback=(80, 1))[0]
+            print("-" * terminal_width)
+            print(f"Error while processing {i + 1:02}")
+            print(repr(e))
+            print(section)
+            print("-" * terminal_width)
+if __name__ == "__main__":
+    main()

fb2_tools-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,15 @@
+Metadata-Version: 2.4
+Name: fb2-tools
+Version: 0.1.0
+Summary:
+Author: dm1sh
+Author-email: me@dmitriy.icu
+Requires-Python: >=3.14
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.14
+Requires-Dist: beautifulsoup4 (>=4.14.2,<5.0.0)
+Requires-Dist: lxml (>=6.0.2,<7.0.0)
+Requires-Dist: splinter[selenium] (>=0.21.0,<0.22.0)
+Description-Content-Type: text/markdown

fb2_tools-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+fb2_tools/fb2_edgetts.py,sha256=khuoBw_NaLJDOC0JnBJTMGBnJ3aT9RNtrK5UTtfssOU,7064
+fb2_tools/fb2_inline.py,sha256=ZpOTGq6NSmzdU3qeuQRW5RH6IfTjlNOo6SLW2GTsoI8,1904
+fb2_tools/fb2_inline_plaintext.py,sha256=UAeREmL4GS0VqoQ5qjPYjC0yEI7CHgJBueywE9CD8Os,3250
+fb2_tools/fb2_split.py,sha256=x7Hswd4olMB7HOTcpqH5gWFs2vi52ngQ_M-ix0JGtCw,2978
+fb2_tools-0.1.0.dist-info/METADATA,sha256=fUAqlU5Jlb-krFkJMzDCQMBaj01kj9vesFJ8UMH_E40,409
+fb2_tools-0.1.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+fb2_tools-0.1.0.dist-info/entry_points.txt,sha256=e-7ojQy7U8e-4WNKKTmcH9g_lSZF5Fr4ClVhYcdPCyY,187
+fb2_tools-0.1.0.dist-info/RECORD,,

fb2_tools-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 2.2.1
+Root-Is-Purelib: true
+Tag: py3-none-any

fb2_tools-0.1.0.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,6 @@
+[console_scripts]
+fb2_edgetts=fb2_tools.fb2_edgetts:main
+fb2_inline=fb2_tools.fb2_inline:main
+fb2_inline_plaintext=fb2_tools.fb2_inline_plaintext:main
+fb2_split=fb2_tools.fb2_split:main