karaoke-lyrics-processor 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import unicodedata
5
5
  import docx2txt
6
6
  from striprtf.striprtf import rtf_to_text
7
7
  import os
8
+ import codecs
8
9
 
9
10
 
10
11
  class KaraokeLyricsProcessor:
@@ -46,6 +47,8 @@ class KaraokeLyricsProcessor:
46
47
  def read_input_file(self):
47
48
  file_extension = os.path.splitext(self.input_filename)[1].lower()
48
49
 
50
+ self.logger.debug(f"Reading input file: {self.input_filename}")
51
+
49
52
  if file_extension == ".txt":
50
53
  return self.read_txt_file()
51
54
  elif file_extension in [".docx", ".doc"]:
@@ -56,8 +59,14 @@ class KaraokeLyricsProcessor:
56
59
  raise ValueError(f"Unsupported file format: {file_extension}")
57
60
 
58
61
  def read_txt_file(self):
59
- with open(self.input_filename, "r", encoding="utf-8") as infile:
60
- return self.clean_text(infile.read()).splitlines()
62
+ with codecs.open(self.input_filename, "r", encoding="utf-8") as infile:
63
+ content = infile.read()
64
+ self.logger.debug(f"Raw content read from file: {repr(content)}")
65
+ lines = content.splitlines()
66
+ self.logger.debug(f"Number of lines read: {len(lines)}")
67
+ for i, line in enumerate(lines):
68
+ self.logger.debug(f"Line {i}: {repr(line)}")
69
+ return self.clean_text(content).splitlines()
61
70
 
62
71
  def read_doc_file(self):
63
72
  text = docx2txt.process(self.input_filename)
@@ -70,13 +79,17 @@ class KaraokeLyricsProcessor:
70
79
  return self.clean_text(plain_text).splitlines()
71
80
 
72
81
  def clean_text(self, text):
73
- # Remove any non-printable characters except newlines
74
- text = "".join(char for char in text if char.isprintable() or char == "\n")
82
+ self.logger.debug(f"Cleaning text: {repr(text)}")
83
+ # Remove any non-printable characters except newlines and U+2005 (four-per-em space)
84
+ cleaned = "".join(char for char in text if char.isprintable() or char in ["\n", "\u2005"])
85
+ self.logger.debug(f"Text after removing non-printable characters: {repr(cleaned)}")
75
86
  # Replace multiple newlines with a single newline
76
- text = re.sub(r"\n{2,}", "\n", text)
87
+ cleaned = re.sub(r"\n{2,}", "\n", cleaned)
88
+ self.logger.debug(f"Text after replacing multiple newlines: {repr(cleaned)}")
77
89
  # Remove leading/trailing whitespace from each line
78
- text = "\n".join(line.strip() for line in text.splitlines())
79
- return text
90
+ cleaned = "\n".join(line.strip() for line in cleaned.splitlines())
91
+ self.logger.debug(f"Final cleaned text: {repr(cleaned)}")
92
+ return cleaned
80
93
 
81
94
  def find_best_split_point(self, line):
82
95
  """
@@ -135,15 +148,28 @@ class KaraokeLyricsProcessor:
135
148
  Replace non-printable space-like characters, tabs, and other whitespace with regular spaces,
136
149
  excluding newline characters.
137
150
  """
138
- self.logger.debug(f"Replacing non-printable spaces in: {text}")
151
+ self.logger.debug(f"Replacing non-printable spaces in: {repr(text)}")
152
+
153
+ # Log each character and its Unicode code point
154
+ # for i, char in enumerate(text):
155
+ # self.logger.debug(f"Character at position {i}: {repr(char)} (Unicode: U+{ord(char):04X})")
156
+
139
157
  # Define a pattern for space-like characters, including tabs and other whitespace, but excluding newlines
140
158
  space_pattern = r"[^\S\n\r]|\u00A0|\u1680|\u2000-\u200A|\u202F|\u205F|\u3000"
159
+
141
160
  # Replace matched characters with a regular space
142
161
  cleaned_text = re.sub(space_pattern, " ", text)
162
+
163
+ # Log the result of the replacement
164
+ self.logger.debug(f"Text after replacing non-printable spaces: {repr(cleaned_text)}")
165
+
143
166
  # Remove leading/trailing spaces and collapse multiple spaces into one, preserving newlines
144
- cleaned_text = re.sub(r" +", " ", cleaned_text).strip()
145
- self.logger.debug(f"Text after replacing non-printable spaces: {cleaned_text}")
146
- return cleaned_text
167
+ final_text = re.sub(r" +", " ", cleaned_text).strip()
168
+
169
+ # Log the final result
170
+ self.logger.debug(f"Final text after cleaning: {repr(final_text)}")
171
+
172
+ return final_text
147
173
 
148
174
  def clean_punctuation_spacing(self, text):
149
175
  """
@@ -155,36 +181,52 @@ class KaraokeLyricsProcessor:
155
181
  self.logger.debug(f"Text after cleaning punctuation spacing: {cleaned_text}")
156
182
  return cleaned_text
157
183
 
184
+ def fix_commas_inside_quotes(self, text):
185
+ """
186
+ Move commas inside quotes to after the closing quote.
187
+ """
188
+ self.logger.debug(f"Fixing commas inside quotes in: {text}")
189
+ # Use regex to find patterns where a comma is inside quotes and move it outside
190
+ fixed_text = re.sub(r'(".*?)(,)(\s*")', r"\1\3\2", text)
191
+ self.logger.debug(f"Text after fixing commas inside quotes: {fixed_text}")
192
+ return fixed_text
193
+
158
194
  def process_line(self, line):
159
195
  """
160
196
  Process a single line to ensure it's within the maximum length,
161
197
  handle parentheses, and replace non-printable spaces.
162
198
  """
163
- # Replace non-printable spaces at the beginning
164
199
  line = self.replace_non_printable_spaces(line)
165
- # Clean up punctuation spacing
166
200
  line = self.clean_punctuation_spacing(line)
201
+ line = self.fix_commas_inside_quotes(line)
167
202
 
168
203
  processed_lines = []
169
204
  iteration_count = 0
170
205
  max_iterations = 100 # Failsafe limit
171
206
 
172
- while len(line) > self.max_line_length:
173
- if iteration_count > max_iterations:
174
- self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}")
175
- break
176
-
207
+ while len(line) > self.max_line_length and iteration_count < max_iterations:
177
208
  # Check if the line contains parentheses
178
209
  if "(" in line and ")" in line:
179
210
  start_paren = line.find("(")
180
- end_paren = line.find(")") + 1
211
+ end_paren = self.find_matching_paren(line, start_paren)
181
212
  if end_paren < len(line) and line[end_paren] == ",":
182
213
  end_paren += 1
183
214
 
215
+ # Process text before parentheses if it exists
184
216
  if start_paren > 0:
185
- processed_lines.append(line[:start_paren].strip())
186
- processed_lines.append(line[start_paren:end_paren].strip())
187
- line = line[end_paren:].strip()
217
+ before_paren = line[:start_paren].strip()
218
+ processed_lines.extend(self.split_line(before_paren))
219
+
220
+ # Process text within parentheses
221
+ paren_content = line[start_paren : end_paren + 1].strip()
222
+ if len(paren_content) > self.max_line_length:
223
+ # Split the content within parentheses if it's too long
224
+ split_paren_content = self.split_line(paren_content)
225
+ processed_lines.extend(split_paren_content)
226
+ else:
227
+ processed_lines.append(paren_content)
228
+
229
+ line = line[end_paren + 1 :].strip()
188
230
  else:
189
231
  split_point = self.find_best_split_point(line)
190
232
  processed_lines.append(line[:split_point].strip())
@@ -192,11 +234,46 @@ class KaraokeLyricsProcessor:
192
234
 
193
235
  iteration_count += 1
194
236
 
195
- if line: # Add the remaining part if not empty
196
- processed_lines.append(line)
237
+ if line: # Add any remaining part
238
+ processed_lines.extend(self.split_line(line))
239
+
240
+ if iteration_count >= max_iterations:
241
+ self.logger.error(f"Maximum iterations exceeded in process_line for line: {line}")
197
242
 
198
243
  return processed_lines
199
244
 
245
+ def find_matching_paren(self, line, start_index):
246
+ """
247
+ Find the index of the matching closing parenthesis for the opening parenthesis at start_index.
248
+ """
249
+ stack = 0
250
+ for i in range(start_index, len(line)):
251
+ if line[i] == "(":
252
+ stack += 1
253
+ elif line[i] == ")":
254
+ stack -= 1
255
+ if stack == 0:
256
+ return i
257
+ return -1 # No matching parenthesis found
258
+
259
+ def split_line(self, line):
260
+ """
261
+ Split a line into multiple lines if it exceeds the maximum length.
262
+ """
263
+ if len(line) <= self.max_line_length:
264
+ return [line]
265
+
266
+ split_lines = []
267
+ while len(line) > self.max_line_length:
268
+ split_point = self.find_best_split_point(line)
269
+ split_lines.append(line[:split_point].strip())
270
+ line = line[split_point:].strip()
271
+
272
+ if line:
273
+ split_lines.append(line)
274
+
275
+ return split_lines
276
+
200
277
  def process(self):
201
278
  self.logger.info(f"Processing input lyrics from {self.input_filename}")
202
279
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: karaoke-lyrics-processor
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Process song lyrics to prepare them for karaoke video production, e.g. by splitting long lines
5
5
  Home-page: https://github.com/karaokenerds/karaoke-lyrics-processor
6
6
  License: MIT
@@ -0,0 +1,8 @@
1
+ karaoke_lyrics_processor/__init__.py,sha256=rLRkJQi61qkRiNXdlTleE3ahJ1oBKcghYVkz64x7IIg,62
2
+ karaoke_lyrics_processor/cli.py,sha256=bdtseRI2jcChb1bMr92pc5mpSWpHXh4TSzA2tknbyjU,2522
3
+ karaoke_lyrics_processor/karaoke_lyrics_processor.py,sha256=3x5Ev9xzJ_FHarHDiYHtxftpXH0PaWZGJl9GjYeeIg0,13510
4
+ karaoke_lyrics_processor-0.4.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
5
+ karaoke_lyrics_processor-0.4.0.dist-info/METADATA,sha256=_tWFNbjNUj4WoA79ZqVCsRwH2J5KlMqXQ8j8ZthzoGI,4264
6
+ karaoke_lyrics_processor-0.4.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
7
+ karaoke_lyrics_processor-0.4.0.dist-info/entry_points.txt,sha256=hjFp6CUxl1p-1WJYfB6TbNcI_DHEnVzX3BXAs4y_0O8,78
8
+ karaoke_lyrics_processor-0.4.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- karaoke_lyrics_processor/__init__.py,sha256=rLRkJQi61qkRiNXdlTleE3ahJ1oBKcghYVkz64x7IIg,62
2
- karaoke_lyrics_processor/cli.py,sha256=bdtseRI2jcChb1bMr92pc5mpSWpHXh4TSzA2tknbyjU,2522
3
- karaoke_lyrics_processor/karaoke_lyrics_processor.py,sha256=LmsciDtBS1-apCbvya2RBmYCSH4S-svrIDpOb8Ut0Gw,10387
4
- karaoke_lyrics_processor-0.3.0.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
5
- karaoke_lyrics_processor-0.3.0.dist-info/METADATA,sha256=JuGcHlIyUvoesSQbFxN1iu-JP-B4mmGQIIrgIVv72pE,4264
6
- karaoke_lyrics_processor-0.3.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
7
- karaoke_lyrics_processor-0.3.0.dist-info/entry_points.txt,sha256=hjFp6CUxl1p-1WJYfB6TbNcI_DHEnVzX3BXAs4y_0O8,78
8
- karaoke_lyrics_processor-0.3.0.dist-info/RECORD,,