karaoke-lyrics-processor 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_lyrics_processor/karaoke_lyrics_processor.py +30 -25
- {karaoke_lyrics_processor-0.4.0.dist-info → karaoke_lyrics_processor-0.4.2.dist-info}/METADATA +3 -3
- karaoke_lyrics_processor-0.4.2.dist-info/RECORD +8 -0
- {karaoke_lyrics_processor-0.4.0.dist-info → karaoke_lyrics_processor-0.4.2.dist-info}/WHEEL +1 -1
- karaoke_lyrics_processor-0.4.0.dist-info/RECORD +0 -8
- {karaoke_lyrics_processor-0.4.0.dist-info → karaoke_lyrics_processor-0.4.2.dist-info}/LICENSE +0 -0
- {karaoke_lyrics_processor-0.4.0.dist-info → karaoke_lyrics_processor-0.4.2.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
import logging
|
3
3
|
import pyperclip
|
4
|
-
import unicodedata
|
5
4
|
import docx2txt
|
6
5
|
from striprtf.striprtf import rtf_to_text
|
7
6
|
import os
|
@@ -61,11 +60,8 @@ class KaraokeLyricsProcessor:
|
|
61
60
|
def read_txt_file(self):
|
62
61
|
with codecs.open(self.input_filename, "r", encoding="utf-8") as infile:
|
63
62
|
content = infile.read()
|
64
|
-
self.logger.debug(f"Raw content read from file: {repr(content)}")
|
65
63
|
lines = content.splitlines()
|
66
|
-
self.logger.debug(f"
|
67
|
-
for i, line in enumerate(lines):
|
68
|
-
self.logger.debug(f"Line {i}: {repr(line)}")
|
64
|
+
self.logger.debug(f"Read {len(lines)} lines from {self.input_filename}")
|
69
65
|
return self.clean_text(content).splitlines()
|
70
66
|
|
71
67
|
def read_doc_file(self):
|
@@ -79,16 +75,29 @@ class KaraokeLyricsProcessor:
|
|
79
75
|
return self.clean_text(plain_text).splitlines()
|
80
76
|
|
81
77
|
def clean_text(self, text):
|
82
|
-
|
83
|
-
|
78
|
+
# Remove any non-printable characters except newlines and U+2005
|
79
|
+
original_len = len(text)
|
84
80
|
cleaned = "".join(char for char in text if char.isprintable() or char in ["\n", "\u2005"])
|
85
|
-
|
81
|
+
if len(cleaned) != original_len:
|
82
|
+
self.logger.debug(f"Removed {original_len - len(cleaned)} non-printable characters")
|
83
|
+
|
86
84
|
# Replace multiple newlines with a single newline
|
85
|
+
newlines_before = cleaned.count("\n")
|
87
86
|
cleaned = re.sub(r"\n{2,}", "\n", cleaned)
|
88
|
-
|
87
|
+
newlines_after = cleaned.count("\n")
|
88
|
+
if newlines_before != newlines_after:
|
89
|
+
self.logger.debug(f"Consolidated {newlines_before - newlines_after} extra newlines")
|
90
|
+
|
89
91
|
# Remove leading/trailing whitespace from each line
|
90
|
-
|
91
|
-
|
92
|
+
lines_before = cleaned.splitlines()
|
93
|
+
cleaned = "\n".join(line.strip() for line in lines_before)
|
94
|
+
lines_after = cleaned.splitlines()
|
95
|
+
|
96
|
+
# Count lines that changed due to stripping
|
97
|
+
changed_lines = sum(1 for before, after in zip(lines_before, lines_after) if before != after)
|
98
|
+
if changed_lines > 0:
|
99
|
+
self.logger.debug(f"Stripped whitespace from {changed_lines} lines")
|
100
|
+
|
92
101
|
return cleaned
|
93
102
|
|
94
103
|
def find_best_split_point(self, line):
|
@@ -148,8 +157,6 @@ class KaraokeLyricsProcessor:
|
|
148
157
|
Replace non-printable space-like characters, tabs, and other whitespace with regular spaces,
|
149
158
|
excluding newline characters.
|
150
159
|
"""
|
151
|
-
self.logger.debug(f"Replacing non-printable spaces in: {repr(text)}")
|
152
|
-
|
153
160
|
# Log each character and its Unicode code point
|
154
161
|
# for i, char in enumerate(text):
|
155
162
|
# self.logger.debug(f"Character at position {i}: {repr(char)} (Unicode: U+{ord(char):04X})")
|
@@ -160,35 +167,29 @@ class KaraokeLyricsProcessor:
|
|
160
167
|
# Replace matched characters with a regular space
|
161
168
|
cleaned_text = re.sub(space_pattern, " ", text)
|
162
169
|
|
163
|
-
# Log the result of the replacement
|
164
|
-
self.logger.debug(f"Text after replacing non-printable spaces: {repr(cleaned_text)}")
|
165
|
-
|
166
170
|
# Remove leading/trailing spaces and collapse multiple spaces into one, preserving newlines
|
167
171
|
final_text = re.sub(r" +", " ", cleaned_text).strip()
|
168
172
|
|
169
|
-
# Log the final result
|
170
|
-
self.logger.debug(f"Final text after cleaning: {repr(final_text)}")
|
171
|
-
|
172
173
|
return final_text
|
173
174
|
|
174
175
|
def clean_punctuation_spacing(self, text):
|
175
176
|
"""
|
176
177
|
Remove unnecessary spaces before punctuation marks.
|
177
178
|
"""
|
178
|
-
self.logger.debug(f"Cleaning punctuation spacing
|
179
|
+
self.logger.debug(f"Cleaning punctuation spacing")
|
179
180
|
# Remove space before comma, period, exclamation mark, question mark, colon, and semicolon
|
180
181
|
cleaned_text = re.sub(r"\s+([,\.!?:;])", r"\1", text)
|
181
|
-
|
182
|
+
|
182
183
|
return cleaned_text
|
183
184
|
|
184
185
|
def fix_commas_inside_quotes(self, text):
|
185
186
|
"""
|
186
187
|
Move commas inside quotes to after the closing quote.
|
187
188
|
"""
|
188
|
-
self.logger.debug(f"Fixing commas inside quotes
|
189
|
+
self.logger.debug(f"Fixing commas inside quotes")
|
189
190
|
# Use regex to find patterns where a comma is inside quotes and move it outside
|
190
191
|
fixed_text = re.sub(r'(".*?)(,)(\s*")', r"\1\3\2", text)
|
191
|
-
|
192
|
+
|
192
193
|
return fixed_text
|
193
194
|
|
194
195
|
def process_line(self, line):
|
@@ -308,9 +309,13 @@ class KaraokeLyricsProcessor:
|
|
308
309
|
processed_lyrics_text = self.clean_punctuation_spacing(processed_lyrics_text)
|
309
310
|
|
310
311
|
self.processed_lyrics_text = processed_lyrics_text
|
311
|
-
pyperclip.copy(processed_lyrics_text)
|
312
312
|
|
313
|
-
|
313
|
+
# Try to copy to clipboard, but don't fail if it's not available
|
314
|
+
try:
|
315
|
+
pyperclip.copy(processed_lyrics_text)
|
316
|
+
self.logger.info("Processed lyrics copied to clipboard.")
|
317
|
+
except pyperclip.PyperclipException as e:
|
318
|
+
self.logger.warning(f"Could not copy to clipboard: {str(e)}")
|
314
319
|
|
315
320
|
return processed_lyrics_text
|
316
321
|
|
{karaoke_lyrics_processor-0.4.0.dist-info → karaoke_lyrics_processor-0.4.2.dist-info}/METADATA
RENAMED
@@ -1,8 +1,7 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: karaoke-lyrics-processor
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.2
|
4
4
|
Summary: Process song lyrics to prepare them for karaoke video production, e.g. by splitting long lines
|
5
|
-
Home-page: https://github.com/karaokenerds/karaoke-lyrics-processor
|
6
5
|
License: MIT
|
7
6
|
Author: Andrew Beveridge
|
8
7
|
Author-email: andrew@beveridge.uk
|
@@ -18,6 +17,7 @@ Requires-Dist: pyperclip (>=1.8)
|
|
18
17
|
Requires-Dist: python-docx (>=1)
|
19
18
|
Requires-Dist: striprtf (>=0.0.27)
|
20
19
|
Project-URL: Documentation, https://github.com/karaokenerds/karaoke-lyrics-processor/blob/main/README.md
|
20
|
+
Project-URL: Homepage, https://github.com/karaokenerds/karaoke-lyrics-processor
|
21
21
|
Project-URL: Repository, https://github.com/karaokenerds/karaoke-lyrics-processor
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
@@ -0,0 +1,8 @@
|
|
1
|
+
karaoke_lyrics_processor/__init__.py,sha256=rLRkJQi61qkRiNXdlTleE3ahJ1oBKcghYVkz64x7IIg,62
|
2
|
+
karaoke_lyrics_processor/cli.py,sha256=bdtseRI2jcChb1bMr92pc5mpSWpHXh4TSzA2tknbyjU,2522
|
3
|
+
karaoke_lyrics_processor/karaoke_lyrics_processor.py,sha256=Yyj9OSwwUPyqYPcMRcaVu-i0-f89Em-LnUqfAbFskOE,13468
|
4
|
+
karaoke_lyrics_processor-0.4.2.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
|
5
|
+
karaoke_lyrics_processor-0.4.2.dist-info/METADATA,sha256=T0DNBQ3lzcZh_kpYsos3Z631ShgXq5apSlJYJ3esBcc,4276
|
6
|
+
karaoke_lyrics_processor-0.4.2.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
7
|
+
karaoke_lyrics_processor-0.4.2.dist-info/entry_points.txt,sha256=hjFp6CUxl1p-1WJYfB6TbNcI_DHEnVzX3BXAs4y_0O8,78
|
8
|
+
karaoke_lyrics_processor-0.4.2.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
karaoke_lyrics_processor/__init__.py,sha256=rLRkJQi61qkRiNXdlTleE3ahJ1oBKcghYVkz64x7IIg,62
|
2
|
-
karaoke_lyrics_processor/cli.py,sha256=bdtseRI2jcChb1bMr92pc5mpSWpHXh4TSzA2tknbyjU,2522
|
3
|
-
karaoke_lyrics_processor/karaoke_lyrics_processor.py,sha256=3x5Ev9xzJ_FHarHDiYHtxftpXH0PaWZGJl9GjYeeIg0,13510
|
4
|
-
karaoke_lyrics_processor-0.4.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
|
5
|
-
karaoke_lyrics_processor-0.4.0.dist-info/METADATA,sha256=_tWFNbjNUj4WoA79ZqVCsRwH2J5KlMqXQ8j8ZthzoGI,4264
|
6
|
-
karaoke_lyrics_processor-0.4.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
7
|
-
karaoke_lyrics_processor-0.4.0.dist-info/entry_points.txt,sha256=hjFp6CUxl1p-1WJYfB6TbNcI_DHEnVzX3BXAs4y_0O8,78
|
8
|
-
karaoke_lyrics_processor-0.4.0.dist-info/RECORD,,
|
{karaoke_lyrics_processor-0.4.0.dist-info → karaoke_lyrics_processor-0.4.2.dist-info}/LICENSE
RENAMED
File without changes
|
File without changes
|