karaoke-gen 0.86.7__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +742 -0
  11. backend/api/routes/audio_search.py +903 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2076 -0
  14. backend/api/routes/health.py +344 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1610 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1014 -0
  20. backend/config.py +172 -0
  21. backend/main.py +133 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +405 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +842 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/local_encoding_service.py +590 -0
  56. backend/services/local_preview_encoding_service.py +407 -0
  57. backend/services/lyrics_cache_service.py +216 -0
  58. backend/services/metrics.py +413 -0
  59. backend/services/packaging_service.py +287 -0
  60. backend/services/rclone_service.py +106 -0
  61. backend/services/storage_service.py +209 -0
  62. backend/services/stripe_service.py +275 -0
  63. backend/services/structured_logging.py +254 -0
  64. backend/services/template_service.py +330 -0
  65. backend/services/theme_service.py +469 -0
  66. backend/services/tracing.py +543 -0
  67. backend/services/user_service.py +721 -0
  68. backend/services/worker_service.py +558 -0
  69. backend/services/youtube_service.py +112 -0
  70. backend/services/youtube_upload_service.py +445 -0
  71. backend/tests/__init__.py +4 -0
  72. backend/tests/conftest.py +224 -0
  73. backend/tests/emulator/__init__.py +7 -0
  74. backend/tests/emulator/conftest.py +88 -0
  75. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  76. backend/tests/emulator/test_emulator_integration.py +356 -0
  77. backend/tests/emulator/test_style_loading_direct.py +436 -0
  78. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  79. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  80. backend/tests/requirements-test.txt +10 -0
  81. backend/tests/requirements.txt +6 -0
  82. backend/tests/test_admin_email_endpoints.py +411 -0
  83. backend/tests/test_api_integration.py +460 -0
  84. backend/tests/test_api_routes.py +93 -0
  85. backend/tests/test_audio_analysis_service.py +294 -0
  86. backend/tests/test_audio_editing_service.py +386 -0
  87. backend/tests/test_audio_search.py +1398 -0
  88. backend/tests/test_audio_services.py +378 -0
  89. backend/tests/test_auth_firestore.py +231 -0
  90. backend/tests/test_config_extended.py +68 -0
  91. backend/tests/test_credential_manager.py +377 -0
  92. backend/tests/test_dependencies.py +54 -0
  93. backend/tests/test_discord_service.py +244 -0
  94. backend/tests/test_distribution_services.py +820 -0
  95. backend/tests/test_dropbox_service.py +472 -0
  96. backend/tests/test_email_service.py +492 -0
  97. backend/tests/test_emulator_integration.py +322 -0
  98. backend/tests/test_encoding_interface.py +412 -0
  99. backend/tests/test_file_upload.py +1739 -0
  100. backend/tests/test_flacfetch_client.py +632 -0
  101. backend/tests/test_gdrive_service.py +524 -0
  102. backend/tests/test_instrumental_api.py +431 -0
  103. backend/tests/test_internal_api.py +343 -0
  104. backend/tests/test_job_creation_regression.py +583 -0
  105. backend/tests/test_job_manager.py +339 -0
  106. backend/tests/test_job_manager_notifications.py +329 -0
  107. backend/tests/test_job_notification_service.py +443 -0
  108. backend/tests/test_jobs_api.py +273 -0
  109. backend/tests/test_local_encoding_service.py +423 -0
  110. backend/tests/test_local_preview_encoding_service.py +567 -0
  111. backend/tests/test_main.py +87 -0
  112. backend/tests/test_models.py +918 -0
  113. backend/tests/test_packaging_service.py +382 -0
  114. backend/tests/test_requests.py +201 -0
  115. backend/tests/test_routes_jobs.py +282 -0
  116. backend/tests/test_routes_review.py +337 -0
  117. backend/tests/test_services.py +556 -0
  118. backend/tests/test_services_extended.py +112 -0
  119. backend/tests/test_storage_service.py +448 -0
  120. backend/tests/test_style_upload.py +261 -0
  121. backend/tests/test_template_service.py +295 -0
  122. backend/tests/test_theme_service.py +516 -0
  123. backend/tests/test_unicode_sanitization.py +522 -0
  124. backend/tests/test_upload_api.py +256 -0
  125. backend/tests/test_validate.py +156 -0
  126. backend/tests/test_video_worker_orchestrator.py +847 -0
  127. backend/tests/test_worker_log_subcollection.py +509 -0
  128. backend/tests/test_worker_logging.py +365 -0
  129. backend/tests/test_workers.py +1116 -0
  130. backend/tests/test_workers_extended.py +178 -0
  131. backend/tests/test_youtube_service.py +247 -0
  132. backend/tests/test_youtube_upload_service.py +568 -0
  133. backend/validate.py +173 -0
  134. backend/version.py +27 -0
  135. backend/workers/README.md +597 -0
  136. backend/workers/__init__.py +11 -0
  137. backend/workers/audio_worker.py +618 -0
  138. backend/workers/lyrics_worker.py +683 -0
  139. backend/workers/render_video_worker.py +483 -0
  140. backend/workers/screens_worker.py +525 -0
  141. backend/workers/style_helper.py +198 -0
  142. backend/workers/video_worker.py +1277 -0
  143. backend/workers/video_worker_orchestrator.py +701 -0
  144. backend/workers/worker_logging.py +278 -0
  145. karaoke_gen/instrumental_review/static/index.html +7 -4
  146. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  147. karaoke_gen/style_loader.py +3 -1
  148. karaoke_gen/utils/__init__.py +163 -8
  149. karaoke_gen/video_background_processor.py +9 -4
  150. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/METADATA +2 -1
  151. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/RECORD +187 -42
  152. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  153. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +1 -51
  154. lyrics_transcriber/correction/corrector.py +192 -130
  155. lyrics_transcriber/correction/operations.py +24 -9
  156. lyrics_transcriber/frontend/package-lock.json +2 -2
  157. lyrics_transcriber/frontend/package.json +1 -1
  158. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  159. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  160. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  161. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  162. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  163. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  164. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  165. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  166. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  167. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  168. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  170. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  171. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  172. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  173. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  174. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  175. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  176. lyrics_transcriber/frontend/src/theme.ts +42 -15
  177. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  178. lyrics_transcriber/frontend/vite.config.js +5 -0
  179. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  180. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  181. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  182. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  183. lyrics_transcriber/output/generator.py +17 -3
  184. lyrics_transcriber/output/video.py +60 -95
  185. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  186. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/WHEEL +0 -0
  187. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/entry_points.txt +0 -0
  188. {karaoke_gen-0.86.7.dist-info → karaoke_gen-0.96.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,278 @@
1
+ """
2
+ Worker logging utilities.
3
+
4
+ Provides logging that sends to both standard output AND Firestore
5
+ for remote debugging via the CLI.
6
+
7
+ Two approaches are provided:
8
+ 1. JobLogger - A custom logger class for explicit logging in workers
9
+ 2. JobLogHandler - A logging.Handler that captures logs from any logger (including dependencies)
10
+
11
+ IMPORTANT: Uses contextvars to ensure log isolation between concurrent jobs.
12
+ When multiple jobs run in parallel on the same Cloud Run instance, each job's
13
+ worker logs are correctly filtered to only include logs from that job.
14
+ """
15
+ import contextvars
16
+ import logging
17
+ from contextlib import contextmanager
18
+ from typing import Optional, Set
19
+
20
+
21
+ # Context variable to track the current job being processed
22
+ # This is used to filter logs when multiple jobs run concurrently
23
+ _current_job_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
24
+ 'current_job_id', default=None
25
+ )
26
+
27
+
28
+ @contextmanager
29
+ def job_logging_context(job_id: str):
30
+ """
31
+ Context manager to set the current job ID for log filtering.
32
+
33
+ When multiple jobs run concurrently on the same Cloud Run instance,
34
+ this ensures that each job's handler only captures logs from its own
35
+ processing thread/context.
36
+
37
+ Usage:
38
+ with job_logging_context(job_id):
39
+ # All logs emitted here will be associated with this job_id
40
+ process_job(job_id)
41
+
42
+ Args:
43
+ job_id: The job ID to set as the current context
44
+ """
45
+ token = _current_job_id.set(job_id)
46
+ try:
47
+ yield
48
+ finally:
49
+ _current_job_id.reset(token)
50
+
51
+
52
+ class JobLogHandler(logging.Handler):
53
+ """
54
+ A logging handler that captures log records and stores them in Firestore.
55
+
56
+ This handler can be added to any logger (including dependency loggers like
57
+ LyricsTranscriber) to capture their output for remote debugging.
58
+
59
+ Usage:
60
+ # Capture logs from LyricsTranscriber and its sub-components
61
+ handler = JobLogHandler(job_id, "lyrics", job_manager)
62
+
63
+ # Add to the logger that will be passed to LyricsTranscriber
64
+ lyrics_logger = logging.getLogger("karaoke_gen.lyrics_processor")
65
+ lyrics_logger.addHandler(handler)
66
+
67
+ # Also capture logs from lyrics_transcriber package itself
68
+ lt_logger = logging.getLogger("lyrics_transcriber")
69
+ lt_logger.addHandler(handler)
70
+ """
71
+
72
+ def __init__(self, job_id: str, worker_name: str, job_manager, level: int = logging.INFO):
73
+ """
74
+ Initialize the job log handler.
75
+
76
+ Args:
77
+ job_id: Job ID to log to
78
+ worker_name: Worker name for log entries
79
+ job_manager: JobManager instance for Firestore access
80
+ level: Minimum log level to capture (default INFO)
81
+ """
82
+ super().__init__(level)
83
+ self.job_id = job_id
84
+ self.worker_name = worker_name
85
+ self.job_manager = job_manager
86
+
87
+ # Track which messages we've already logged to avoid duplicates
88
+ self._logged_messages: Set[str] = set()
89
+
90
+ def emit(self, record: logging.LogRecord) -> None:
91
+ """Process a log record."""
92
+ try:
93
+ # CRITICAL: Filter by current job context to prevent log mixing
94
+ # When multiple jobs run concurrently, each attaches handlers to
95
+ # the same global loggers. Without this check, logs from Job A
96
+ # would be captured by Job B's handler and vice versa.
97
+ current_job = _current_job_id.get()
98
+ if current_job is not None and current_job != self.job_id:
99
+ # This log is from a different job's context, skip it
100
+ return
101
+
102
+ # Format the log message
103
+ message = self.format(record)
104
+
105
+ # Create a dedup key (to avoid duplicate messages from multiple handlers)
106
+ dedup_key = f"{record.created}:{record.levelname}:{message[:100]}"
107
+ if dedup_key in self._logged_messages:
108
+ return
109
+ self._logged_messages.add(dedup_key)
110
+
111
+ # Keep the set from growing unbounded
112
+ if len(self._logged_messages) > 1000:
113
+ self._logged_messages = set(list(self._logged_messages)[-500:])
114
+
115
+ # Store in Firestore
116
+ self.job_manager.append_worker_log(
117
+ job_id=self.job_id,
118
+ worker=self.worker_name,
119
+ level=record.levelname,
120
+ message=message
121
+ )
122
+ except Exception:
123
+ # Don't let logging errors break the worker
124
+ self.handleError(record)
125
+
126
+
127
+ class JobLogger:
128
+ """
129
+ Logger that writes to both standard logging and Firestore job logs.
130
+
131
+ This allows worker logs to be viewed remotely via the CLI.
132
+
133
+ Usage:
134
+ job_logger = JobLogger(job_id, "audio", job_manager)
135
+ job_logger.info("Processing audio...")
136
+ job_logger.error("Failed to process", exc_info=True)
137
+ """
138
+
139
+ def __init__(self, job_id: str, worker_name: str, job_manager):
140
+ """
141
+ Initialize job logger.
142
+
143
+ Args:
144
+ job_id: Job ID to log to
145
+ worker_name: Worker name (audio, lyrics, screens, video, render)
146
+ job_manager: JobManager instance for Firestore access
147
+ """
148
+ self.job_id = job_id
149
+ self.worker_name = worker_name
150
+ self.job_manager = job_manager
151
+ self._logger = logging.getLogger(f"worker.{worker_name}.{job_id}")
152
+
153
+ def _log(self, level: str, message: str, *args, **kwargs):
154
+ """Internal logging method."""
155
+ # Format message with args if provided
156
+ if args:
157
+ try:
158
+ formatted_message = message % args
159
+ except (TypeError, ValueError):
160
+ formatted_message = f"{message} {args}"
161
+ else:
162
+ formatted_message = message
163
+
164
+ # Log to standard logging
165
+ log_method = getattr(self._logger, level.lower())
166
+ log_method(formatted_message)
167
+
168
+ # Also append to Firestore job logs (async-safe)
169
+ try:
170
+ self.job_manager.append_worker_log(
171
+ job_id=self.job_id,
172
+ worker=self.worker_name,
173
+ level=level.upper(),
174
+ message=formatted_message
175
+ )
176
+ except Exception as e:
177
+ # Don't let Firestore errors break worker processing
178
+ self._logger.warning(f"Failed to append job log: {e}")
179
+
180
+ def debug(self, message: str, *args, **kwargs):
181
+ """Log debug message."""
182
+ self._log("DEBUG", message, *args, **kwargs)
183
+
184
+ def info(self, message: str, *args, **kwargs):
185
+ """Log info message."""
186
+ self._log("INFO", message, *args, **kwargs)
187
+
188
+ def warning(self, message: str, *args, **kwargs):
189
+ """Log warning message."""
190
+ self._log("WARNING", message, *args, **kwargs)
191
+
192
+ def error(self, message: str, *args, exc_info: bool = False, **kwargs):
193
+ """Log error message."""
194
+ self._log("ERROR", message, *args, **kwargs)
195
+ if exc_info:
196
+ import traceback
197
+ tb = traceback.format_exc()
198
+ if tb and tb != "NoneType: None\n":
199
+ self._log("ERROR", f"Traceback:\n{tb}")
200
+
201
+ def exception(self, message: str, *args, **kwargs):
202
+ """Log exception with traceback."""
203
+ self.error(message, *args, exc_info=True, **kwargs)
204
+
205
+
206
+ def create_job_logger(job_id: str, worker_name: str) -> JobLogger:
207
+ """
208
+ Create a JobLogger for a worker.
209
+
210
+ This is a convenience function that creates the JobManager internally.
211
+
212
+ Args:
213
+ job_id: Job ID
214
+ worker_name: Worker name
215
+
216
+ Returns:
217
+ JobLogger instance
218
+ """
219
+ from backend.services.job_manager import JobManager
220
+ job_manager = JobManager()
221
+ return JobLogger(job_id, worker_name, job_manager)
222
+
223
+
224
+ def setup_job_logging(job_id: str, worker_name: str, *logger_names: str) -> JobLogHandler:
225
+ """
226
+ Set up job logging for a worker and its dependencies.
227
+
228
+ This adds a JobLogHandler to capture logs from specified loggers
229
+ (including dependency loggers like lyrics_transcriber).
230
+
231
+ IMPORTANT: When using this function, wrap your job processing code in
232
+ `job_logging_context(job_id)` to ensure proper log isolation when multiple
233
+ jobs run concurrently on the same Cloud Run instance.
234
+
235
+ Args:
236
+ job_id: Job ID
237
+ worker_name: Worker name
238
+ *logger_names: Names of loggers to capture (e.g., "lyrics_transcriber", "karaoke_gen")
239
+
240
+ Returns:
241
+ The JobLogHandler (can be removed later if needed)
242
+
243
+ Example:
244
+ # In lyrics_worker.py:
245
+ handler = setup_job_logging(
246
+ job_id,
247
+ "lyrics",
248
+ "karaoke_gen.lyrics_processor",
249
+ "lyrics_transcriber", # Capture LyricsTranscriber logs
250
+ )
251
+
252
+ # IMPORTANT: Use job_logging_context for proper isolation
253
+ with job_logging_context(job_id):
254
+ # ... do work ...
255
+ pass
256
+
257
+ # Optional: remove handler when done
258
+ for name in logger_names:
259
+ logging.getLogger(name).removeHandler(handler)
260
+ """
261
+ from backend.services.job_manager import JobManager
262
+
263
+ job_manager = JobManager()
264
+ handler = JobLogHandler(job_id, worker_name, job_manager, level=logging.INFO)
265
+
266
+ # Simple formatter that just shows the message
267
+ formatter = logging.Formatter('%(message)s')
268
+ handler.setFormatter(formatter)
269
+
270
+ # Add handler to all specified loggers
271
+ for logger_name in logger_names:
272
+ logger = logging.getLogger(logger_name)
273
+ logger.addHandler(handler)
274
+ # Ensure logger level allows INFO messages through
275
+ if logger.level > logging.INFO or logger.level == logging.NOTSET:
276
+ logger.setLevel(logging.INFO)
277
+
278
+ return handler
@@ -6,21 +6,24 @@
6
6
  <title>Instrumental Review</title>
7
7
  <style>
8
8
  :root {
9
+ /* Nomad Karaoke brand colors - see docs/BRAND-STYLE-GUIDE.md */
9
10
  --bg: #0f0f0f;
10
11
  --card: #1a1a1a;
11
12
  --card-border: #2a2a2a;
12
13
  --waveform-bg: #0d1117;
13
14
  --text: #e5e5e5;
14
15
  --text-muted: #888;
15
- --primary: #3b82f6;
16
- --primary-hover: #2563eb;
16
+ --primary: #ff7acc;
17
+ --primary-hover: #ff5bb8;
17
18
  --secondary: #252525;
18
19
  --secondary-hover: #333;
19
20
  --success: #22c55e;
20
21
  --warning: #f59e0b;
21
22
  --danger: #ef4444;
22
- --pink: #ec4899;
23
- --blue: #60a5fa;
23
+ --brand-pink: #ff7acc;
24
+ --brand-gold: #ffdf6b;
25
+ --brand-purple: #8b5cf6;
26
+ --blue: #3b82f6;
24
27
  }
25
28
 
26
29
  * { box-sizing: border-box; margin: 0; padding: 0; }
@@ -20,6 +20,7 @@ from google.oauth2.credentials import Credentials
20
20
  import base64
21
21
  from email.mime.text import MIMEText
22
22
  from lyrics_transcriber.output.cdg import CDGGenerator
23
+ from ..utils import sanitize_filename
23
24
 
24
25
 
25
26
  class KaraokeFinalise:
@@ -1514,7 +1515,11 @@ class KaraokeFinalise:
1514
1515
 
1515
1516
  email_body = template.format(youtube_url=youtube_url, dropbox_url=dropbox_url)
1516
1517
 
1517
- subject = f"{self.brand_code}: {artist} - {title}"
1518
+ # Sanitize artist/title to handle Unicode characters (curly quotes, em dashes, etc.)
1519
+ # that cause email header encoding issues (MIME headers use latin-1)
1520
+ safe_artist = sanitize_filename(artist) if artist else "Unknown"
1521
+ safe_title = sanitize_filename(title) if title else "Unknown"
1522
+ subject = f"{self.brand_code}: {safe_artist} - {safe_title}"
1518
1523
 
1519
1524
  if self.dry_run:
1520
1525
  self.logger.info(f"DRY RUN: Would create email draft with subject: {subject}")
@@ -70,7 +70,9 @@ DEFAULT_KARAOKE_STYLE = {
70
70
  "background_color": "#000000",
71
71
  "background_image": None,
72
72
  # Font settings
73
- "font": "Arial",
73
+ # Using "Noto Sans" for full Unicode support (CJK, Arabic, symbols, etc.)
74
+ # Requires fonts-noto-* packages installed in Docker image
75
+ "font": "Noto Sans",
74
76
  "font_path": "", # Must be string, not None (for ASS generator)
75
77
  "ass_name": "Default",
76
78
  # Colors in "R, G, B, A" format (required by ASS)
@@ -1,7 +1,85 @@
1
1
  import re
2
2
 
3
- # Unicode character replacements for ASCII-safe filenames
4
- # These characters cause issues with HTTP headers (latin-1 encoding) and some filesystems
3
+ # =============================================================================
4
+ # Text Normalization
5
+ # =============================================================================
6
+ # These mappings normalize visually-similar Unicode characters to their standard
7
+ # ASCII equivalents. This ensures consistency in stored data and reliable search.
8
+
9
+ # Apostrophe-like characters -> standard apostrophe (U+0027)
10
+ APOSTROPHE_REPLACEMENTS = {
11
+ "\u2018": "'", # LEFT SINGLE QUOTATION MARK (')
12
+ "\u2019": "'", # RIGHT SINGLE QUOTATION MARK (') - common from Word/macOS
13
+ "\u201A": "'", # SINGLE LOW-9 QUOTATION MARK (‚)
14
+ "\u201B": "'", # SINGLE HIGH-REVERSED-9 QUOTATION MARK (‛)
15
+ "\u0060": "'", # GRAVE ACCENT (`) - backtick
16
+ "\u00B4": "'", # ACUTE ACCENT (´)
17
+ "\u2032": "'", # PRIME (′)
18
+ "\u02B9": "'", # MODIFIER LETTER PRIME (ʹ)
19
+ "\u02BC": "'", # MODIFIER LETTER APOSTROPHE (ʼ)
20
+ "\u02C8": "'", # MODIFIER LETTER VERTICAL LINE (ˈ)
21
+ "\u0301": "'", # COMBINING ACUTE ACCENT (standalone, rare)
22
+ }
23
+
24
+ # Double quote-like characters -> standard double quote (U+0022)
25
+ DOUBLE_QUOTE_REPLACEMENTS = {
26
+ "\u201C": '"', # LEFT DOUBLE QUOTATION MARK (")
27
+ "\u201D": '"', # RIGHT DOUBLE QUOTATION MARK (")
28
+ "\u201E": '"', # DOUBLE LOW-9 QUOTATION MARK („)
29
+ "\u201F": '"', # DOUBLE HIGH-REVERSED-9 QUOTATION MARK (‟)
30
+ "\u2033": '"', # DOUBLE PRIME (″)
31
+ "\u02DD": '"', # DOUBLE ACUTE ACCENT (˝)
32
+ "\u3003": '"', # DITTO MARK (〃) - CJK
33
+ }
34
+
35
+ # Dash-like characters -> standard hyphen-minus (U+002D)
36
+ DASH_REPLACEMENTS = {
37
+ "\u2013": "-", # EN DASH (–)
38
+ "\u2014": "-", # EM DASH (—)
39
+ "\u2015": "-", # HORIZONTAL BAR (―)
40
+ "\u2212": "-", # MINUS SIGN (−)
41
+ "\u2010": "-", # HYPHEN (‐)
42
+ "\u2011": "-", # NON-BREAKING HYPHEN (‑)
43
+ "\u2012": "-", # FIGURE DASH (‒)
44
+ "\u00AD": "-", # SOFT HYPHEN (invisible, but normalize anyway)
45
+ }
46
+
47
+ # Whitespace characters -> standard space (U+0020)
48
+ WHITESPACE_REPLACEMENTS = {
49
+ "\u00A0": " ", # NO-BREAK SPACE
50
+ "\u2002": " ", # EN SPACE
51
+ "\u2003": " ", # EM SPACE
52
+ "\u2004": " ", # THREE-PER-EM SPACE
53
+ "\u2005": " ", # FOUR-PER-EM SPACE
54
+ "\u2006": " ", # SIX-PER-EM SPACE
55
+ "\u2007": " ", # FIGURE SPACE
56
+ "\u2008": " ", # PUNCTUATION SPACE
57
+ "\u2009": " ", # THIN SPACE
58
+ "\u200A": " ", # HAIR SPACE
59
+ "\u200B": "", # ZERO WIDTH SPACE (remove entirely)
60
+ "\u202F": " ", # NARROW NO-BREAK SPACE
61
+ "\u205F": " ", # MEDIUM MATHEMATICAL SPACE
62
+ "\u3000": " ", # IDEOGRAPHIC SPACE (CJK full-width space)
63
+ "\uFEFF": "", # ZERO WIDTH NO-BREAK SPACE / BOM (remove entirely)
64
+ }
65
+
66
+ # Other replacements
67
+ OTHER_REPLACEMENTS = {
68
+ "\u2026": "...", # HORIZONTAL ELLIPSIS (…)
69
+ "\u22EF": "...", # MIDLINE HORIZONTAL ELLIPSIS (⋯)
70
+ }
71
+
72
+ # Combined replacement dict for normalize_text()
73
+ TEXT_NORMALIZATIONS = {
74
+ **APOSTROPHE_REPLACEMENTS,
75
+ **DOUBLE_QUOTE_REPLACEMENTS,
76
+ **DASH_REPLACEMENTS,
77
+ **WHITESPACE_REPLACEMENTS,
78
+ **OTHER_REPLACEMENTS,
79
+ }
80
+
81
+ # Legacy dict for backwards compatibility (used by sanitize_filename)
82
+ # This is a subset focused on HTTP header safety
5
83
  UNICODE_REPLACEMENTS = {
6
84
  # Curly/smart quotes -> straight quotes
7
85
  "\u2018": "'", # LEFT SINGLE QUOTATION MARK
@@ -20,25 +98,102 @@ UNICODE_REPLACEMENTS = {
20
98
  }
21
99
 
22
100
 
101
+ def normalize_text(text: str) -> str:
102
+ """
103
+ Normalize visually-similar Unicode characters to standard ASCII equivalents.
104
+
105
+ This function standardizes text for consistency in stored data and reliable
106
+ search/matching. It converts:
107
+ - Curly quotes and backticks -> straight quotes
108
+ - Various dashes (en dash, em dash, minus) -> hyphen
109
+ - Various whitespace characters -> regular space
110
+ - Ellipsis character -> three dots
111
+
112
+ This function also:
113
+ - Collapses multiple spaces to a single space
114
+ - Strips leading/trailing whitespace
115
+
116
+ Unlike sanitize_filename(), this does NOT:
117
+ - Remove filesystem-unsafe characters (/, \, :, *, ?, ", <, >, |)
118
+ - Collapse multiple underscores
119
+ - Strip leading/trailing periods
120
+
121
+ This should be applied to user-facing text like artist names and song titles
122
+ at input time to ensure data consistency.
123
+
124
+ Args:
125
+ text: The text to normalize
126
+
127
+ Returns:
128
+ Normalized text with standard ASCII equivalents, or None if input is None
129
+
130
+ Examples:
131
+ >>> normalize_text("Don't Stop") # curly apostrophe
132
+ "Don't Stop"
133
+ >>> normalize_text("Song — Title") # em dash
134
+ "Song - Title"
135
+ >>> normalize_text("Hello\u00A0World") # non-breaking space
136
+ "Hello World"
137
+ """
138
+ if text is None:
139
+ return None
140
+
141
+ if not isinstance(text, str):
142
+ return text
143
+
144
+ # Apply all normalizations
145
+ for unicode_char, replacement in TEXT_NORMALIZATIONS.items():
146
+ text = text.replace(unicode_char, replacement)
147
+
148
+ # Collapse multiple spaces (but preserve intentional spacing structure)
149
+ text = re.sub(r' {2,}', ' ', text)
150
+
151
+ # Strip leading/trailing whitespace
152
+ text = text.strip()
153
+
154
+ return text
155
+
156
+
23
157
  def sanitize_filename(filename):
24
- """Replace or remove characters that are unsafe for filenames."""
158
+ """
159
+ Replace or remove characters that are unsafe for filenames.
160
+
161
+ This function makes text safe for use in:
162
+ - Filesystem paths
163
+ - HTTP headers (Content-Disposition)
164
+ - API requests (Modal, Google Drive, Dropbox)
165
+
166
+ It applies normalize_text() first, then additionally:
167
+ - Replaces filesystem-unsafe characters with underscores
168
+ - Strips leading/trailing periods and spaces
169
+ - Collapses multiple underscores/spaces
170
+
171
+ Args:
172
+ filename: The filename to sanitize
173
+
174
+ Returns:
175
+ Sanitized filename safe for filesystems and HTTP headers
176
+ """
25
177
  if filename is None:
26
178
  return None
27
179
 
28
- # First, normalize Unicode characters that cause HTTP header encoding issues
29
- # (e.g., curly quotes from macOS/Word that can't be encoded in latin-1)
30
- for unicode_char, ascii_replacement in UNICODE_REPLACEMENTS.items():
31
- filename = filename.replace(unicode_char, ascii_replacement)
180
+ # First, normalize Unicode characters
181
+ filename = normalize_text(filename)
182
+
183
+ if filename is None:
184
+ return None
32
185
 
33
186
  # Replace problematic characters with underscores
34
187
  for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
35
188
  filename = filename.replace(char, "_")
189
+
36
190
  # Remove any trailing periods or spaces
37
- filename = filename.rstrip(". ") # Added period here as well
191
+ filename = filename.rstrip(". ")
38
192
  # Remove any leading periods or spaces
39
193
  filename = filename.lstrip(". ")
40
194
  # Replace multiple underscores with a single one
41
195
  filename = re.sub(r'_+', '_', filename)
42
196
  # Replace multiple spaces with a single one
43
197
  filename = re.sub(r' +', ' ', filename)
198
+
44
199
  return filename
@@ -156,15 +156,20 @@ class VideoBackgroundProcessor:
156
156
  """
157
157
  Escape a file path for use in ffmpeg filter expressions.
158
158
 
159
+ FFmpeg filter syntax uses single quotes to protect special characters
160
+ like spaces, colons, and semicolons. Single quotes within the path
161
+ are escaped using the '\\'' pattern (end quote, literal quote, start quote).
162
+
159
163
  Args:
160
164
  path: File path to escape
161
165
 
162
166
  Returns:
163
- str: Escaped path
167
+ str: Escaped path (wrapped in single quotes)
164
168
  """
165
- # Escape backslashes and colons for ffmpeg filter syntax
166
- escaped = path.replace("\\", "\\\\").replace(":", "\\:")
167
- return escaped
169
+ # Escape single quotes: ' becomes '\'' (end quote, \', start quote)
170
+ escaped = path.replace("'", "'\\''")
171
+ # Wrap entire path in single quotes
172
+ return f"'{escaped}'"
168
173
 
169
174
  def build_video_filter(self, ass_subtitles_path, darkness_percent, fonts_dir=None):
170
175
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: karaoke-gen
3
- Version: 0.86.7
3
+ Version: 0.96.0
4
4
  Summary: Generate karaoke videos with synchronized lyrics. Handles the entire process from downloading audio and lyrics to creating the final video with title screens.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -14,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Provides-Extra: local-whisper
17
+ Requires-Dist: aiohttp (>=3.13.2,<4.0.0)
17
18
  Requires-Dist: argparse (>=1.4.0)
18
19
  Requires-Dist: attrs (>=24.2.0)
19
20
  Requires-Dist: audio-separator[cpu] (>=0.34.0)