karaoke-lyrics-processor 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,7 +61,8 @@ def main():
61
61
  processor.process()
62
62
  processor.write_to_output_file()
63
63
 
64
- logger.info(f"Lyrics processing complete, lyrics written to output file: {output_filename}")
64
+ output_file = processor.output_filename
65
+ logger.info(f"Lyrics processing complete, lyrics written to output file: {output_file}")
65
66
 
66
67
 
67
68
  if __name__ == "__main__":
@@ -2,6 +2,9 @@ import re
2
2
  import logging
3
3
  import pyperclip
4
4
  import unicodedata
5
+ import docx2txt
6
+ from striprtf.striprtf import rtf_to_text
7
+ import os
5
8
 
6
9
 
7
10
  class KaraokeLyricsProcessor:
@@ -36,13 +39,44 @@ class KaraokeLyricsProcessor:
36
39
  if input_lyrics_text is not None and input_filename is None:
37
40
  self.input_lyrics_lines = input_lyrics_text.splitlines()
38
41
  elif input_filename is not None and input_lyrics_text is None:
39
- self.input_lyrics_lines = self.read_input_lyrics_file()
42
+ self.input_lyrics_lines = self.read_input_file()
40
43
  else:
41
44
  raise ValueError("Either input_lyrics or input_filename must be set, but not both.")
42
45
 
43
- def read_input_lyrics_file(self):
44
- with open(self.input_filename, "r") as infile:
45
- return infile.readlines()
46
+ def read_input_file(self):
47
+ file_extension = os.path.splitext(self.input_filename)[1].lower()
48
+
49
+ if file_extension == ".txt":
50
+ return self.read_txt_file()
51
+ elif file_extension in [".docx", ".doc"]:
52
+ return self.read_doc_file()
53
+ elif file_extension == ".rtf":
54
+ return self.read_rtf_file()
55
+ else:
56
+ raise ValueError(f"Unsupported file format: {file_extension}")
57
+
58
+ def read_txt_file(self):
59
+ with open(self.input_filename, "r", encoding="utf-8") as infile:
60
+ return self.clean_text(infile.read()).splitlines()
61
+
62
+ def read_doc_file(self):
63
+ text = docx2txt.process(self.input_filename)
64
+ return self.clean_text(text).splitlines()
65
+
66
+ def read_rtf_file(self):
67
+ with open(self.input_filename, "r", encoding="utf-8") as file:
68
+ rtf_text = file.read()
69
+ plain_text = rtf_to_text(rtf_text)
70
+ return self.clean_text(plain_text).splitlines()
71
+
72
+ def clean_text(self, text):
73
+ # Remove any non-printable characters except newlines
74
+ text = "".join(char for char in text if char.isprintable() or char == "\n")
75
+ # Replace multiple newlines with a single newline
76
+ text = re.sub(r"\n{2,}", "\n", text)
77
+ # Remove leading/trailing whitespace from each line
78
+ text = "\n".join(line.strip() for line in text.splitlines())
79
+ return text
46
80
 
47
81
  def find_best_split_point(self, line):
48
82
  """
@@ -82,11 +116,19 @@ class KaraokeLyricsProcessor:
82
116
  self.logger.debug(f"Splitting at middle word index: {mid_word_index}")
83
117
  return split_at_middle
84
118
 
85
- # If the line is still too long, forcibly split at the maximum length
86
- forced_split_point = self.max_line_length
87
- if len(line) > forced_split_point:
88
- self.logger.debug(f"Line is still too long, forcibly splitting at position {forced_split_point}")
89
- return forced_split_point
119
+ # If the line is still too long, find the last space before max_line_length
120
+ if len(line) > self.max_line_length:
121
+ last_space = line.rfind(" ", 0, self.max_line_length)
122
+ if last_space != -1:
123
+ self.logger.debug(f"Splitting at last space before max_line_length: {last_space}")
124
+ return last_space
125
+ else:
126
+ # If no space is found, split at max_line_length
127
+ self.logger.debug(f"No space found, forcibly splitting at max_line_length: {self.max_line_length}")
128
+ return self.max_line_length
129
+
130
+ # If the line is shorter than max_line_length, return its length
131
+ return len(line)
90
132
 
91
133
  def replace_non_printable_spaces(self, text):
92
134
  """
@@ -103,6 +145,16 @@ class KaraokeLyricsProcessor:
103
145
  self.logger.debug(f"Text after replacing non-printable spaces: {cleaned_text}")
104
146
  return cleaned_text
105
147
 
148
+ def clean_punctuation_spacing(self, text):
149
+ """
150
+ Remove unnecessary spaces before punctuation marks.
151
+ """
152
+ self.logger.debug(f"Cleaning punctuation spacing in: {text}")
153
+ # Remove space before comma, period, exclamation mark, question mark, colon, and semicolon
154
+ cleaned_text = re.sub(r"\s+([,\.!?:;])", r"\1", text)
155
+ self.logger.debug(f"Text after cleaning punctuation spacing: {cleaned_text}")
156
+ return cleaned_text
157
+
106
158
  def process_line(self, line):
107
159
  """
108
160
  Process a single line to ensure it's within the maximum length,
@@ -110,6 +162,8 @@ class KaraokeLyricsProcessor:
110
162
  """
111
163
  # Replace non-printable spaces at the beginning
112
164
  line = self.replace_non_printable_spaces(line)
165
+ # Clean up punctuation spacing
166
+ line = self.clean_punctuation_spacing(line)
113
167
 
114
168
  processed_lines = []
115
169
  iteration_count = 0
@@ -172,8 +226,9 @@ class KaraokeLyricsProcessor:
172
226
 
173
227
  processed_lyrics_text = "\n".join(lyrics_lines)
174
228
 
175
- # Final pass to replace any remaining non-printable spaces
229
+ # Final pass to replace any remaining non-printable spaces and clean punctuation
176
230
  processed_lyrics_text = self.replace_non_printable_spaces(processed_lyrics_text)
231
+ processed_lyrics_text = self.clean_punctuation_spacing(processed_lyrics_text)
177
232
 
178
233
  self.processed_lyrics_text = processed_lyrics_text
179
234
  pyperclip.copy(processed_lyrics_text)
@@ -183,8 +238,11 @@ class KaraokeLyricsProcessor:
183
238
  return processed_lyrics_text
184
239
 
185
240
  def write_to_output_file(self):
241
+ # Ensure the output filename has a .txt extension
242
+ base, _ = os.path.splitext(self.output_filename)
243
+ self.output_filename = f"{base}.txt"
186
244
 
187
- with open(self.output_filename, "w") as outfile:
245
+ with open(self.output_filename, "w", encoding="utf-8") as outfile:
188
246
  outfile.write(self.processed_lyrics_text)
189
247
 
190
248
  self.logger.info(f"Processed lyrics written to output file {self.output_filename}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: karaoke-lyrics-processor
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Process song lyrics to prepare them for karaoke video production, e.g. by splitting long lines
5
5
  Home-page: https://github.com/karaokenerds/karaoke-lyrics-processor
6
6
  License: MIT
@@ -13,7 +13,10 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Dist: docx2txt (>=0.8)
16
17
  Requires-Dist: pyperclip (>=1.8)
18
+ Requires-Dist: python-docx (>=1)
19
+ Requires-Dist: striprtf (>=0.0.27)
17
20
  Project-URL: Documentation, https://github.com/karaokenerds/karaoke-lyrics-processor/blob/main/README.md
18
21
  Project-URL: Repository, https://github.com/karaokenerds/karaoke-lyrics-processor
19
22
  Description-Content-Type: text/markdown
@@ -0,0 +1,8 @@
1
+ karaoke_lyrics_processor/__init__.py,sha256=rLRkJQi61qkRiNXdlTleE3ahJ1oBKcghYVkz64x7IIg,62
2
+ karaoke_lyrics_processor/cli.py,sha256=bdtseRI2jcChb1bMr92pc5mpSWpHXh4TSzA2tknbyjU,2522
3
+ karaoke_lyrics_processor/karaoke_lyrics_processor.py,sha256=LmsciDtBS1-apCbvya2RBmYCSH4S-svrIDpOb8Ut0Gw,10387
4
+ karaoke_lyrics_processor-0.3.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
5
+ karaoke_lyrics_processor-0.3.0.dist-info/METADATA,sha256=JuGcHlIyUvoesSQbFxN1iu-JP-B4mmGQIIrgIVv72pE,4264
6
+ karaoke_lyrics_processor-0.3.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
7
+ karaoke_lyrics_processor-0.3.0.dist-info/entry_points.txt,sha256=hjFp6CUxl1p-1WJYfB6TbNcI_DHEnVzX3BXAs4y_0O8,78
8
+ karaoke_lyrics_processor-0.3.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 1.9.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,8 +0,0 @@
1
- karaoke_lyrics_processor/__init__.py,sha256=rLRkJQi61qkRiNXdlTleE3ahJ1oBKcghYVkz64x7IIg,62
2
- karaoke_lyrics_processor/cli.py,sha256=84utSfU-AZZU3okHS8tBFSucJ9-59hXJugfMn48oAKQ,2482
3
- karaoke_lyrics_processor/karaoke_lyrics_processor.py,sha256=3UOEHFU61aZveDMbPDYcIhAwGc6qKeHRLhwHdY9akLM,7813
4
- karaoke_lyrics_processor-0.2.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
5
- karaoke_lyrics_processor-0.2.0.dist-info/METADATA,sha256=6axf30tfSxbLylh6YF_kfVF2B6CytMoVPKYq2qhB8Tc,4164
6
- karaoke_lyrics_processor-0.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
7
- karaoke_lyrics_processor-0.2.0.dist-info/entry_points.txt,sha256=hjFp6CUxl1p-1WJYfB6TbNcI_DHEnVzX3BXAs4y_0O8,78
8
- karaoke_lyrics_processor-0.2.0.dist-info/RECORD,,