fb2-tools 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: fb2-tools
3
+ Version: 0.1.0
4
+ Summary:
5
+ Author: dm1sh
6
+ Author-email: me@dmitriy.icu
7
+ Requires-Python: >=3.14
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.14
10
+ Requires-Dist: beautifulsoup4 (>=4.14.2,<5.0.0)
11
+ Requires-Dist: lxml (>=6.0.2,<7.0.0)
12
+ Requires-Dist: splinter[selenium] (>=0.21.0,<0.22.0)
13
+ Description-Content-Type: text/markdown
14
+
15
+
File without changes
@@ -0,0 +1,224 @@
1
+ import argparse
2
+ import os
3
+ import re
4
+ import subprocess
5
+ import time
6
+ import zipfile
7
+ from collections import deque
8
+ from pathlib import Path
9
+ from typing import Literal
10
+
11
+ from selenium import webdriver
12
+ from splinter import Browser
13
+
14
+
15
+ def init_browser(download_dir: Path):
16
+ options = webdriver.firefox.options.Options()
17
+ options.binary_location = "/usr/bin/librewolf"
18
+
19
+ options.set_preference("browser.download.folderList", 2) # use custom download path
20
+ options.set_preference("browser.download.dir", str(download_dir.absolute()))
21
+ options.set_preference("browser.download.useDownloadDir", True)
22
+ options.set_preference("browser.download.manager.showWhenStarting", False)
23
+ options.set_preference("browser.helperApps.neverAsk.saveToDisk", "audio/mpeg")
24
+
25
+ browser = Browser("firefox", options=options)
26
+
27
+ return browser
28
+
29
+
30
+ def set_value(id_: str, value, browser: Browser):
31
+ browser.execute_script(f"document.getElementById('{id_}').value = {value}")
32
+ browser.execute_script(
33
+ f"document.getElementById('{id_}').dispatchEvent(new Event('input'))"
34
+ )
35
+
36
+
37
+ def set_expanding(id_: str, value, browser: Browser):
38
+ if not browser.find_by_id(id_).visible:
39
+ browser.find_by_id("dop-settings-label").click()
40
+ set_value(id_, value, browser)
41
+
42
+
43
+ def display_block(id_: str, browser: Browser):
44
+ browser.evaluate_script(f"document.getElementById('{id_}').style.display = 'block'")
45
+
46
+
47
+ def set_up_browser(
48
+ browser: Browser,
49
+ pointstype: Literal["V1", "V2", "V3"],
50
+ rate: int,
51
+ pitch: int,
52
+ max_threads: int,
53
+ mergefiles: int,
54
+ ):
55
+ browser.visit("https://edgetts.github.io")
56
+
57
+ while browser.find_by_id("pointstype").value != pointstype:
58
+ browser.find_by_id("pointstype").click()
59
+
60
+ set_value("rate", rate, browser)
61
+
62
+ set_expanding("pitch", pitch, browser)
63
+ set_expanding("max-threads", max_threads, browser)
64
+ set_expanding("mergefiles", mergefiles, browser)
65
+
66
+ display_block('file-input', browser)
67
+ browser.evaluate_script("document.getElementById('file-input').name = 'file-input'")
68
+
69
+ display_block("stat-area", browser)
70
+
71
+ pieces_regex = re.compile(r"(\d+) / (\d+)")
72
+
73
+
74
+ def get_stats(index: int, browser: Browser):
75
+ assert index == 1 or index == 2
76
+
77
+ return pieces_regex.search(browser.find_by_id("stat-str").value).group(index)
78
+
79
+
80
+ def finished_downloading(path: Path):
81
+ part_path = path.with_name(path.name + ".part")
82
+
83
+ return path.exists() and not part_path.exists()
84
+
85
+
86
+ def zip_dir(dir_path: Path, target: Path):
87
+ with zipfile.ZipFile(target, "w", zipfile.ZIP_STORED) as zipf:
88
+ for file_path in dir_path.rglob("*"):
89
+ if file_path.is_file():
90
+ arcname = file_path.relative_to(dir_path)
91
+ zipf.write(file_path, arcname)
92
+
93
+
94
+ def main():
95
+ parser = argparse.ArgumentParser()
96
+ parser.add_argument("source", nargs="*", default=[Path(".")], type=Path)
97
+ parser.add_argument(
98
+ "-o", "--output", default=Path("~/Music/tmp").expanduser(), type=Path
99
+ )
100
+
101
+ parser.add_argument("--pointstype", choices=["V1", "V2", "V3"], default="V3")
102
+ parser.add_argument("--rate", type=int, choices=range(-50, 100 + 1), default=75)
103
+ parser.add_argument("--pitch", type=int, choices=range(-50, 50 + 1), default=0)
104
+ parser.add_argument("--max-threads", type=int, choices=range(1, 30 + 1), default=20)
105
+ parser.add_argument(
106
+ "--mergefiles",
107
+ type=int,
108
+ choices=range(1, 100 + 1),
109
+ default=100,
110
+ help="100 means merge all pieces",
111
+ )
112
+
113
+ parser.add_argument("--skip-downrate", action="store_true")
114
+ parser.add_argument("--skip-archive", action="store_true")
115
+ args = parser.parse_args()
116
+
117
+ if len(args.source) == 1:
118
+ source = args.source[0]
119
+ if source.is_dir():
120
+ out_name = source.name
121
+ else:
122
+ out_name = source.stem.removesuffix(".fb2")
123
+
124
+ output_dir = args.output / out_name
125
+ else:
126
+ output_dir = args.output
127
+
128
+ if not output_dir.exists():
129
+ os.makedirs(output_dir)
130
+ elif not output_dir.is_dir():
131
+ print(f"Output must be a directory: '{output_dir}'")
132
+ exit(1)
133
+ elif len(list(output_dir.iterdir())) != 0 and not args.skip_downrate:
134
+ print(
135
+ f"Output directory '{output_dir}' is not empty."
136
+ f" Running downrate.sh on it may cause unwanted files changed and messed up statistics."
137
+ )
138
+ if input("Continue? (y/N): ").lower() != "y":
139
+ exit(0)
140
+
141
+ browser = init_browser(output_dir)
142
+ set_up_browser(
143
+ browser,
144
+ args.pointstype,
145
+ args.rate,
146
+ args.pitch,
147
+ args.max_threads,
148
+ args.mergefiles,
149
+ )
150
+
151
+ to_process: deque[Path] = deque(args.source)
152
+ outputs = set()
153
+ while to_process:
154
+ path = to_process.popleft()
155
+
156
+ if path.is_dir():
157
+ to_process.extend(path.iterdir())
158
+ else:
159
+ if path.exists():
160
+ browser.attach_file("file-input", str(path))
161
+ outputs.add(output_dir / path.with_suffix(".mp3").name)
162
+
163
+ if browser.is_text_present("Открыты"):
164
+ overall_pieces = get_stats(2, browser)
165
+ browser.find_by_id("savebutton").click()
166
+
167
+ processed_pieces = get_stats(1, browser)
168
+ while any(
169
+ status in browser.find_by_id("stat-area").value
170
+ for status in ("Открыта", "Запущена", "Обработка", "ПЕРЕЗАПУСК")
171
+ ):
172
+ print(
173
+ f"Processing {processed_pieces}/{overall_pieces}",
174
+ end="\r",
175
+ flush=True,
176
+ )
177
+ time.sleep(0.5)
178
+ processed_pieces = get_stats(1, browser)
179
+ print(
180
+ f"Processed {get_stats(1, browser)}/{overall_pieces} "
181
+ ) # spaces to flush remainings of previous line contents
182
+
183
+ i = 0
184
+ while outputs:
185
+ print(f"Downloading{'.' * i}", end="\r", flush=True)
186
+ for path in outputs.copy():
187
+ if finished_downloading(path):
188
+ outputs.remove(path)
189
+ print(f"Produced '{path}'")
190
+
191
+ time.sleep(0.5)
192
+ i += 1
193
+
194
+ browser.quit()
195
+
196
+ if not args.skip_downrate:
197
+ print("Downrating")
198
+ try:
199
+ res = subprocess.run(
200
+ ["downrate.sh"],
201
+ capture_output=True,
202
+ check=True,
203
+ cwd=output_dir,
204
+ text=True,
205
+ )
206
+
207
+ print(res.stdout.strip())
208
+ except subprocess.CalledProcessError as e:
209
+ print("Failed to downrate output directory:")
210
+ print(e.stderr)
211
+
212
+ if not args.skip_archive:
213
+ zip_path = Path(output_dir.with_name(output_dir.name + ".zip"))
214
+ if zip_path.exists():
215
+ print(f"'{zip_path}' already exists. Do you want to recreate it? (y/N): ")
216
+ if input().lower() == "y":
217
+ zip_dir(output_dir, zip_path)
218
+ else:
219
+ zip_dir(output_dir, zip_path)
220
+ print(f"Compressed to '{zip_path}")
221
+
222
+
223
+ if __name__ == "__main__":
224
+ main()
@@ -0,0 +1,67 @@
1
+ import argparse
2
+ import copy
3
+ import re
4
+ import traceback
5
+
6
+ from bs4 import BeautifulSoup
7
+
8
+
9
+ def main():
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument("filename")
12
+ args = parser.parse_args()
13
+
14
+ with open(args.filename, "r") as fp:
15
+ soup = BeautifulSoup(fp, "xml")
16
+
17
+ for attr in soup.FictionBook.attrs.items():
18
+ if attr[0].startswith("xmlns:") and attr[1].endswith("xlink"):
19
+ xlink_ns = attr[0].removeprefix("xmlns:")
20
+ href_attr = xlink_ns + ":href"
21
+
22
+ try:
23
+ description = soup.FictionBook.description
24
+ lang = description.lang
25
+ annotation = (copy.copy(child) for child in description.annotation.contents)
26
+
27
+ section = soup.new_tag("section")
28
+
29
+ title_str = "Аннотация" if lang == "ru" else "Annotation"
30
+ title = soup.new_tag("title", string=title_str)
31
+ section.append(title)
32
+ section.extend(annotation)
33
+
34
+ body = soup.FictionBook.body
35
+ body.insert(0, section)
36
+ except Exception:
37
+ print("No annotation in the book")
38
+
39
+ to_remove = set()
40
+
41
+ for link in soup.find_all("a"):
42
+ try:
43
+ href = link.attrs[href_attr]
44
+ id_ = href.removeprefix("#")
45
+ target = soup.find("section", id=id_)
46
+ link.replace_with(" [[ " + target.get_text() + " ]] ")
47
+ to_remove.add(target)
48
+
49
+ except Exception:
50
+ print("-" * 10)
51
+ print(f"Unable to process {link}:")
52
+ print(traceback.format_exc())
53
+ print("-" * 10)
54
+
55
+ for section in to_remove:
56
+ section.decompose()
57
+
58
+ for body in soup.find_all("body", attrs={"name": re.compile("^(notes|comments)$")}):
59
+ if body.find("section") is None:
60
+ body.decompose()
61
+
62
+ with open(args.filename.replace(".fb2", "_inlined.fb2"), "w") as fp:
63
+ fp.write(soup.prettify())
64
+
65
+
66
+ if __name__ == "__main__":
67
+ main()
@@ -0,0 +1,120 @@
1
+ import argparse
2
+ import os
3
+
4
+
5
+ def main():
6
+ parser = argparse.ArgumentParser()
7
+ parser.add_argument("--file-name", default="fb2_inline_plaintext.py")
8
+ args = parser.parse_args()
9
+
10
+ if os.path.isfile(args.file_name):
11
+ print(
12
+ f"{args.file_name} already exists in current directory. Run it like './{args.file_name} or remove if want to start from scratch"
13
+ )
14
+ exit(1)
15
+
16
+ with open(args.file_name, "w") as f:
17
+ f.write(
18
+ r"""\
19
+ #!/usr/bin/env python3
20
+
21
+ import os
22
+ import re
23
+ import sys
24
+ import zipfile
25
+
26
+
27
+ def is_fb2(x: str):
28
+ return x.endswith(".fb2")
29
+
30
+
31
+ def process_file(contents: str) -> str:
32
+ while (link := re.search(r"\[(\d+)\]", contents)) is not None:
33
+ num = link.group(1)
34
+ target = re.search(r"<p>" + str(num) + r". (.+?)<\/p>", contents)
35
+
36
+ if target is None:
37
+ print(f"Dangling link [{num}]")
38
+ exit(1)
39
+
40
+ text = target.group(1)
41
+
42
+ contents = (
43
+ contents[: link.start()] + f" [[{num}: {text}]] " + contents[link.end() :]
44
+ )
45
+
46
+ return contents
47
+
48
+
49
+ if len(sys.argv) != 2:
50
+ print("Supply book file as the only argument")
51
+ exit(1)
52
+
53
+ filename = sys.argv[1]
54
+
55
+ print(
56
+ f"Make shure the script has correct regexes for {filename}. Process (y/N):",
57
+ end=" ",
58
+ )
59
+ if input()[:1].lower() != "y":
60
+ exit(0)
61
+
62
+ if filename.endswith(".zip"):
63
+ new_contents: dict[str, str] = {}
64
+
65
+ with zipfile.ZipFile(filename, "r") as zip_ref:
66
+ files = zip_ref.namelist()
67
+ for file in files:
68
+ if file.endswith(".fb2"):
69
+ print(f"Processing {file}")
70
+ with zip_ref.open(file, "r") as file_ref:
71
+ contents = file_ref.read().decode()
72
+
73
+ new_contents[file] = process_file(contents)
74
+
75
+ base_name = os.path.splitext(filename)[0]
76
+
77
+ match len(new_contents.values()):
78
+ case 0:
79
+ print("No fb2 book found")
80
+ exit(1)
81
+ case 1:
82
+ new_file = (
83
+ os.path.join(
84
+ os.path.dirname(base_name),
85
+ os.path.splitext(list(new_contents.keys())[0])[0],
86
+ )
87
+ + "_notes.fb2"
88
+ )
89
+ with open(new_file, "w") as f:
90
+ f.write(list(new_contents.values())[0])
91
+ case _:
92
+ os.makedirs(base_name, exist_ok=True)
93
+ for file, contents in new_contents.items():
94
+ new_file = os.path.join(
95
+ base_name, os.path.splitext(file)[0] + "_notes.fb2"
96
+ )
97
+ with open(new_file, "w") as f:
98
+ f.write(contents)
99
+ pass
100
+ elif filename.endswith(".fb2"):
101
+ with open(filename, "r") as f:
102
+ contents = f.read()
103
+
104
+ new_content = process_file(contents)
105
+
106
+ with open(os.path.splitext(filename)[0] + "_notes.fb2", "w") as f:
107
+ f.write(new_content)
108
+ else:
109
+ print("Unsupported file type")
110
+ exit(1)
111
+ """
112
+ )
113
+
114
+ print(
115
+ f"./{args.file_name} was created. Edit it to properly handle specific book. Then run like an ordinary executable `./{args.file_name}`"
116
+ )
117
+
118
+
119
+ if __name__ == "__main__":
120
+ main()
@@ -0,0 +1,98 @@
1
+ import argparse
2
+ import os
3
+ import re
4
+ import shutil
5
+
6
+ from bs4 import BeautifulSoup
7
+
8
+
9
+ def main():
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument("filename")
12
+ parser.add_argument("-i", "--preserve-images", action="store_true")
13
+ args = parser.parse_args()
14
+
15
+ filename = args.filename
16
+ out_dir = filename.removesuffix(".fb2")
17
+ if os.path.exists(out_dir):
18
+ if input("Output path exists. Remove it? (y/N): ").lower() == "y":
19
+ shutil.rmtree(out_dir)
20
+ else:
21
+ exit(0)
22
+
23
+ os.mkdir(out_dir)
24
+
25
+ with open(filename, "r") as fp:
26
+ soup = BeautifulSoup(fp, "xml")
27
+
28
+ if args.preserve_images:
29
+ for attr in soup.FictionBook.attrs.items():
30
+ if attr[0].startswith("xmlns:") and attr[1].endswith("xlink"):
31
+ xlink_ns = attr[0].removeprefix("xmlns:")
32
+ href_attr = xlink_ns + ":href"
33
+ else:
34
+ coverpage = soup.FictionBook.description.find("coverpage")
35
+ if coverpage is not None:
36
+ coverpage.decompose()
37
+
38
+ title_info = soup.description.find("title-info")
39
+
40
+ book_title = title_info.find("book-title").string
41
+
42
+ body = soup.body
43
+
44
+ if args.preserve_images:
45
+ binaries = {
46
+ binary["id"]: binary.extract() for binary in body.find_all_next("binary")
47
+ }
48
+ else:
49
+ for binary in body.find_all_next("binary"):
50
+ binary.decompose()
51
+ for image in body.find_all("image"):
52
+ image.decompose()
53
+
54
+ sections = [
55
+ section.extract() for section in body.find_all("section", recursive=False)
56
+ ]
57
+
58
+ for i, section in enumerate(sections):
59
+ try:
60
+ body.append(section)
61
+
62
+ title = section.title.text
63
+ title = title.replace("\n", " ")
64
+ title = re.sub(r"\[\[.*?\]\]", "", title)
65
+ title = title.strip()
66
+
67
+ if args.preserve_images:
68
+ for image in soup.find_all("image"):
69
+ href = image[href_attr]
70
+ id_ = href.removeprefix("#")
71
+
72
+ binary = binaries[id_]
73
+ soup.FictionBook.append(binary)
74
+
75
+ title_info.find("book-title").string = f"{title} from {book_title}"
76
+
77
+ with open(os.path.join(out_dir, f"{i + 1:02}.{title}.fb2"), "w") as fp:
78
+ fp.write(soup.prettify())
79
+
80
+ body.clear(
81
+ decompose=True
82
+ ) # necessary to run after the first run to remove titles and epigraphs
83
+ section.decompose()
84
+
85
+ if args.preserve_images:
86
+ for binary in body.find_all_next("binary"):
87
+ binary.extract()
88
+ except Exception as e:
89
+ terminal_width = shutil.get_terminal_size(fallback=(80, 1))[0]
90
+ print("-" * terminal_width)
91
+ print(f"Error while processing {i + 1:02}")
92
+ print(repr(e))
93
+ print(section)
94
+ print("-" * terminal_width)
95
+
96
+
97
+ if __name__ == "__main__":
98
+ main()
@@ -0,0 +1,18 @@
1
+ [project]
2
+ name = "fb2-tools"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = [{ name = "dm1sh", email = "me@dmitriy.icu" }]
6
+ readme = "README.md"
7
+ requires-python = ">=3.14"
8
+ dependencies = ["splinter[selenium] (>=0.21.0,<0.22.0)", "beautifulsoup4 (>=4.14.2,<5.0.0)", "lxml (>=6.0.2,<7.0.0)"]
9
+
10
+ [project.scripts]
11
+ fb2_edgetts = "fb2_tools.fb2_edgetts:main"
12
+ fb2_inline = "fb2_tools.fb2_inline:main"
13
+ fb2_inline_plaintext = "fb2_tools.fb2_inline_plaintext:main"
14
+ fb2_split = "fb2_tools.fb2_split:main"
15
+
16
+ [build-system]
17
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
18
+ build-backend = "poetry.core.masonry.api"