karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. backend/.coveragerc +20 -0
  2. backend/.gitignore +37 -0
  3. backend/Dockerfile +43 -0
  4. backend/Dockerfile.base +74 -0
  5. backend/README.md +242 -0
  6. backend/__init__.py +0 -0
  7. backend/api/__init__.py +0 -0
  8. backend/api/dependencies.py +457 -0
  9. backend/api/routes/__init__.py +0 -0
  10. backend/api/routes/admin.py +835 -0
  11. backend/api/routes/audio_search.py +913 -0
  12. backend/api/routes/auth.py +348 -0
  13. backend/api/routes/file_upload.py +2112 -0
  14. backend/api/routes/health.py +409 -0
  15. backend/api/routes/internal.py +435 -0
  16. backend/api/routes/jobs.py +1629 -0
  17. backend/api/routes/review.py +652 -0
  18. backend/api/routes/themes.py +162 -0
  19. backend/api/routes/users.py +1513 -0
  20. backend/config.py +172 -0
  21. backend/main.py +157 -0
  22. backend/middleware/__init__.py +5 -0
  23. backend/middleware/audit_logging.py +124 -0
  24. backend/models/__init__.py +0 -0
  25. backend/models/job.py +519 -0
  26. backend/models/requests.py +123 -0
  27. backend/models/theme.py +153 -0
  28. backend/models/user.py +254 -0
  29. backend/models/worker_log.py +164 -0
  30. backend/pyproject.toml +29 -0
  31. backend/quick-check.sh +93 -0
  32. backend/requirements.txt +29 -0
  33. backend/run_tests.sh +60 -0
  34. backend/services/__init__.py +0 -0
  35. backend/services/audio_analysis_service.py +243 -0
  36. backend/services/audio_editing_service.py +278 -0
  37. backend/services/audio_search_service.py +702 -0
  38. backend/services/auth_service.py +630 -0
  39. backend/services/credential_manager.py +792 -0
  40. backend/services/discord_service.py +172 -0
  41. backend/services/dropbox_service.py +301 -0
  42. backend/services/email_service.py +1093 -0
  43. backend/services/encoding_interface.py +454 -0
  44. backend/services/encoding_service.py +502 -0
  45. backend/services/firestore_service.py +512 -0
  46. backend/services/flacfetch_client.py +573 -0
  47. backend/services/gce_encoding/README.md +72 -0
  48. backend/services/gce_encoding/__init__.py +22 -0
  49. backend/services/gce_encoding/main.py +589 -0
  50. backend/services/gce_encoding/requirements.txt +16 -0
  51. backend/services/gdrive_service.py +356 -0
  52. backend/services/job_logging.py +258 -0
  53. backend/services/job_manager.py +853 -0
  54. backend/services/job_notification_service.py +271 -0
  55. backend/services/langfuse_preloader.py +98 -0
  56. backend/services/local_encoding_service.py +590 -0
  57. backend/services/local_preview_encoding_service.py +407 -0
  58. backend/services/lyrics_cache_service.py +216 -0
  59. backend/services/metrics.py +413 -0
  60. backend/services/nltk_preloader.py +122 -0
  61. backend/services/packaging_service.py +287 -0
  62. backend/services/rclone_service.py +106 -0
  63. backend/services/spacy_preloader.py +65 -0
  64. backend/services/storage_service.py +209 -0
  65. backend/services/stripe_service.py +371 -0
  66. backend/services/structured_logging.py +254 -0
  67. backend/services/template_service.py +330 -0
  68. backend/services/theme_service.py +469 -0
  69. backend/services/tracing.py +543 -0
  70. backend/services/user_service.py +721 -0
  71. backend/services/worker_service.py +558 -0
  72. backend/services/youtube_service.py +112 -0
  73. backend/services/youtube_upload_service.py +445 -0
  74. backend/tests/__init__.py +4 -0
  75. backend/tests/conftest.py +224 -0
  76. backend/tests/emulator/__init__.py +7 -0
  77. backend/tests/emulator/conftest.py +109 -0
  78. backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
  79. backend/tests/emulator/test_emulator_integration.py +356 -0
  80. backend/tests/emulator/test_style_loading_direct.py +436 -0
  81. backend/tests/emulator/test_worker_logs_direct.py +229 -0
  82. backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
  83. backend/tests/requirements-test.txt +10 -0
  84. backend/tests/requirements.txt +6 -0
  85. backend/tests/test_admin_email_endpoints.py +411 -0
  86. backend/tests/test_api_integration.py +460 -0
  87. backend/tests/test_api_routes.py +93 -0
  88. backend/tests/test_audio_analysis_service.py +294 -0
  89. backend/tests/test_audio_editing_service.py +386 -0
  90. backend/tests/test_audio_search.py +1398 -0
  91. backend/tests/test_audio_services.py +378 -0
  92. backend/tests/test_auth_firestore.py +231 -0
  93. backend/tests/test_config_extended.py +68 -0
  94. backend/tests/test_credential_manager.py +377 -0
  95. backend/tests/test_dependencies.py +54 -0
  96. backend/tests/test_discord_service.py +244 -0
  97. backend/tests/test_distribution_services.py +820 -0
  98. backend/tests/test_dropbox_service.py +472 -0
  99. backend/tests/test_email_service.py +492 -0
  100. backend/tests/test_emulator_integration.py +322 -0
  101. backend/tests/test_encoding_interface.py +412 -0
  102. backend/tests/test_file_upload.py +1739 -0
  103. backend/tests/test_flacfetch_client.py +632 -0
  104. backend/tests/test_gdrive_service.py +524 -0
  105. backend/tests/test_instrumental_api.py +431 -0
  106. backend/tests/test_internal_api.py +343 -0
  107. backend/tests/test_job_creation_regression.py +583 -0
  108. backend/tests/test_job_manager.py +356 -0
  109. backend/tests/test_job_manager_notifications.py +329 -0
  110. backend/tests/test_job_notification_service.py +443 -0
  111. backend/tests/test_jobs_api.py +283 -0
  112. backend/tests/test_local_encoding_service.py +423 -0
  113. backend/tests/test_local_preview_encoding_service.py +567 -0
  114. backend/tests/test_main.py +87 -0
  115. backend/tests/test_models.py +918 -0
  116. backend/tests/test_packaging_service.py +382 -0
  117. backend/tests/test_requests.py +201 -0
  118. backend/tests/test_routes_jobs.py +282 -0
  119. backend/tests/test_routes_review.py +337 -0
  120. backend/tests/test_services.py +556 -0
  121. backend/tests/test_services_extended.py +112 -0
  122. backend/tests/test_spacy_preloader.py +119 -0
  123. backend/tests/test_storage_service.py +448 -0
  124. backend/tests/test_style_upload.py +261 -0
  125. backend/tests/test_template_service.py +295 -0
  126. backend/tests/test_theme_service.py +516 -0
  127. backend/tests/test_unicode_sanitization.py +522 -0
  128. backend/tests/test_upload_api.py +256 -0
  129. backend/tests/test_validate.py +156 -0
  130. backend/tests/test_video_worker_orchestrator.py +847 -0
  131. backend/tests/test_worker_log_subcollection.py +509 -0
  132. backend/tests/test_worker_logging.py +365 -0
  133. backend/tests/test_workers.py +1116 -0
  134. backend/tests/test_workers_extended.py +178 -0
  135. backend/tests/test_youtube_service.py +247 -0
  136. backend/tests/test_youtube_upload_service.py +568 -0
  137. backend/utils/test_data.py +27 -0
  138. backend/validate.py +173 -0
  139. backend/version.py +27 -0
  140. backend/workers/README.md +597 -0
  141. backend/workers/__init__.py +11 -0
  142. backend/workers/audio_worker.py +618 -0
  143. backend/workers/lyrics_worker.py +683 -0
  144. backend/workers/render_video_worker.py +483 -0
  145. backend/workers/screens_worker.py +535 -0
  146. backend/workers/style_helper.py +198 -0
  147. backend/workers/video_worker.py +1277 -0
  148. backend/workers/video_worker_orchestrator.py +701 -0
  149. backend/workers/worker_logging.py +278 -0
  150. karaoke_gen/instrumental_review/static/index.html +7 -4
  151. karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
  152. karaoke_gen/utils/__init__.py +163 -8
  153. karaoke_gen/video_background_processor.py +9 -4
  154. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
  155. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
  156. lyrics_transcriber/correction/agentic/agent.py +17 -6
  157. lyrics_transcriber/correction/agentic/providers/config.py +9 -5
  158. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
  159. lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
  160. lyrics_transcriber/correction/anchor_sequence.py +151 -37
  161. lyrics_transcriber/correction/corrector.py +192 -130
  162. lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
  163. lyrics_transcriber/correction/operations.py +24 -9
  164. lyrics_transcriber/correction/phrase_analyzer.py +18 -0
  165. lyrics_transcriber/frontend/package-lock.json +2 -2
  166. lyrics_transcriber/frontend/package.json +1 -1
  167. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
  168. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
  169. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
  170. lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
  171. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
  172. lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
  173. lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
  174. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
  175. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
  176. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
  177. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
  178. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
  179. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
  180. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
  181. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
  182. lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
  183. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
  184. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
  185. lyrics_transcriber/frontend/src/theme.ts +42 -15
  186. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  187. lyrics_transcriber/frontend/vite.config.js +5 -0
  188. lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
  189. lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/index.html +6 -2
  191. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
  192. lyrics_transcriber/output/generator.py +17 -3
  193. lyrics_transcriber/output/video.py +60 -95
  194. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
  195. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
  196. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
  197. {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,287 @@
1
+ """
2
+ Packaging Service.
3
+
4
+ Provides CDG and TXT package generation functionality, extracted from KaraokeFinalise
5
+ for use by both the cloud backend (video_worker) and local CLI.
6
+
7
+ This service handles:
8
+ - CDG (CD+G) file generation from LRC files
9
+ - TXT lyric file generation from LRC files
10
+ - ZIP packaging of CDG/MP3 and TXT/MP3 pairs
11
+ """
12
+
13
+ import logging
14
+ import os
15
+ import zipfile
16
+ from typing import Optional, Dict, Any, Tuple
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class PackagingService:
22
+ """
23
+ Service for creating CDG and TXT karaoke packages.
24
+
25
+ CDG (CD+Graphics) is a format used by karaoke machines.
26
+ TXT packages are used by software karaoke players.
27
+ Both formats are packaged as ZIP files with an MP3 audio track.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ cdg_styles: Optional[Dict[str, Any]] = None,
33
+ dry_run: bool = False,
34
+ non_interactive: bool = False,
35
+ logger: Optional[logging.Logger] = None,
36
+ ):
37
+ """
38
+ Initialize the packaging service.
39
+
40
+ Args:
41
+ cdg_styles: CDG style configuration for CDG generation
42
+ dry_run: If True, log actions without performing them
43
+ non_interactive: If True, skip interactive prompts
44
+ logger: Optional logger instance
45
+ """
46
+ self.cdg_styles = cdg_styles
47
+ self.dry_run = dry_run
48
+ self.non_interactive = non_interactive
49
+ self.logger = logger or logging.getLogger(__name__)
50
+
51
+ def create_cdg_package(
52
+ self,
53
+ lrc_file: str,
54
+ audio_file: str,
55
+ output_zip_path: str,
56
+ artist: str,
57
+ title: str,
58
+ output_mp3_path: Optional[str] = None,
59
+ output_cdg_path: Optional[str] = None,
60
+ ) -> Tuple[str, Optional[str], Optional[str]]:
61
+ """
62
+ Create a CDG package (ZIP containing CDG and MP3 files).
63
+
64
+ Args:
65
+ lrc_file: Path to the LRC lyrics file
66
+ audio_file: Path to the instrumental audio file
67
+ output_zip_path: Path for the output ZIP file
68
+ artist: Artist name for metadata
69
+ title: Song title for metadata
70
+ output_mp3_path: Optional path for the extracted MP3 file
71
+ output_cdg_path: Optional path for the extracted CDG file
72
+
73
+ Returns:
74
+ Tuple of (zip_path, mp3_path, cdg_path)
75
+
76
+ Raises:
77
+ ValueError: If CDG styles are not configured
78
+ FileNotFoundError: If input files are missing
79
+ Exception: If CDG generation fails
80
+ """
81
+ self.logger.info(f"Creating CDG package for {artist} - {title}")
82
+
83
+ # Validate inputs
84
+ if not os.path.isfile(lrc_file):
85
+ raise FileNotFoundError(f"LRC file not found: {lrc_file}")
86
+ if not os.path.isfile(audio_file):
87
+ raise FileNotFoundError(f"Audio file not found: {audio_file}")
88
+
89
+ # Check if ZIP already exists
90
+ if os.path.isfile(output_zip_path):
91
+ if self.non_interactive:
92
+ self.logger.info(
93
+ f"CDG ZIP exists, will be overwritten: {output_zip_path}"
94
+ )
95
+ else:
96
+ self.logger.info(f"CDG ZIP already exists: {output_zip_path}")
97
+
98
+ # Check if individual files exist (allows skipping generation)
99
+ if output_mp3_path and output_cdg_path:
100
+ if os.path.isfile(output_mp3_path) and os.path.isfile(output_cdg_path):
101
+ self.logger.info("Found existing MP3 and CDG files, creating ZIP directly")
102
+ if not self.dry_run:
103
+ self._create_zip_from_files(
104
+ output_zip_path,
105
+ [(output_mp3_path, os.path.basename(output_mp3_path)),
106
+ (output_cdg_path, os.path.basename(output_cdg_path))]
107
+ )
108
+ return output_zip_path, output_mp3_path, output_cdg_path
109
+
110
+ if self.dry_run:
111
+ self.logger.info(
112
+ f"DRY RUN: Would generate CDG package: {output_zip_path}"
113
+ )
114
+ return output_zip_path, output_mp3_path, output_cdg_path
115
+
116
+ # Generate CDG files
117
+ if self.cdg_styles is None:
118
+ raise ValueError(
119
+ "CDG styles configuration is required for CDG generation"
120
+ )
121
+
122
+ self.logger.info("Generating CDG and MP3 files")
123
+ from lyrics_transcriber.output.cdg import CDGGenerator
124
+
125
+ output_dir = os.path.dirname(output_zip_path) or os.getcwd()
126
+ generator = CDGGenerator(output_dir=output_dir, logger=self.logger)
127
+
128
+ cdg_file, mp3_file, zip_file = generator.generate_cdg_from_lrc(
129
+ lrc_file=lrc_file,
130
+ audio_file=audio_file,
131
+ title=title,
132
+ artist=artist,
133
+ cdg_styles=self.cdg_styles,
134
+ )
135
+
136
+ # Rename ZIP to expected output path if different
137
+ if os.path.isfile(zip_file) and zip_file != output_zip_path:
138
+ os.rename(zip_file, output_zip_path)
139
+ self.logger.info(f"Renamed CDG ZIP: {zip_file} -> {output_zip_path}")
140
+
141
+ if not os.path.isfile(output_zip_path):
142
+ raise Exception(f"Failed to create CDG ZIP file: {output_zip_path}")
143
+
144
+ # Extract the ZIP to get individual files if paths provided
145
+ extracted_mp3 = None
146
+ extracted_cdg = None
147
+ if output_mp3_path or output_cdg_path:
148
+ self.logger.info(f"Extracting CDG ZIP file: {output_zip_path}")
149
+ with zipfile.ZipFile(output_zip_path, "r") as zip_ref:
150
+ zip_ref.extractall(output_dir)
151
+
152
+ # Find extracted files
153
+ if output_mp3_path and os.path.isfile(output_mp3_path):
154
+ extracted_mp3 = output_mp3_path
155
+ self.logger.info(f"Extracted MP3: {extracted_mp3}")
156
+ if output_cdg_path and os.path.isfile(output_cdg_path):
157
+ extracted_cdg = output_cdg_path
158
+ self.logger.info(f"Extracted CDG: {extracted_cdg}")
159
+
160
+ self.logger.info(f"CDG package created: {output_zip_path}")
161
+ return output_zip_path, extracted_mp3, extracted_cdg
162
+
163
+ def create_txt_package(
164
+ self,
165
+ lrc_file: str,
166
+ mp3_file: str,
167
+ output_zip_path: str,
168
+ output_txt_path: Optional[str] = None,
169
+ ) -> Tuple[str, Optional[str]]:
170
+ """
171
+ Create a TXT package (ZIP containing TXT lyrics and MP3 files).
172
+
173
+ Args:
174
+ lrc_file: Path to the LRC lyrics file
175
+ mp3_file: Path to the MP3 audio file
176
+ output_zip_path: Path for the output ZIP file
177
+ output_txt_path: Optional path for the generated TXT file
178
+
179
+ Returns:
180
+ Tuple of (zip_path, txt_path)
181
+
182
+ Raises:
183
+ FileNotFoundError: If input files are missing
184
+ Exception: If TXT generation fails
185
+ """
186
+ self.logger.info(f"Creating TXT package from {lrc_file}")
187
+
188
+ # Validate inputs
189
+ if not os.path.isfile(lrc_file):
190
+ raise FileNotFoundError(f"LRC file not found: {lrc_file}")
191
+ if not os.path.isfile(mp3_file):
192
+ raise FileNotFoundError(f"MP3 file not found: {mp3_file}")
193
+
194
+ # Check if ZIP already exists
195
+ if os.path.isfile(output_zip_path):
196
+ if self.non_interactive:
197
+ self.logger.info(
198
+ f"TXT ZIP exists, will be overwritten: {output_zip_path}"
199
+ )
200
+ else:
201
+ self.logger.info(f"TXT ZIP already exists: {output_zip_path}")
202
+
203
+ if self.dry_run:
204
+ self.logger.info(
205
+ f"DRY RUN: Would create TXT package: {output_zip_path}"
206
+ )
207
+ return output_zip_path, output_txt_path
208
+
209
+ # Generate TXT from LRC
210
+ self.logger.info(f"Converting LRC to TXT format: {lrc_file}")
211
+ from lyrics_converter import LyricsConverter
212
+
213
+ txt_converter = LyricsConverter(output_format="txt", filepath=lrc_file)
214
+ converted_txt = txt_converter.convert_file()
215
+
216
+ # Write TXT file
217
+ if output_txt_path is None:
218
+ # Default to same name as ZIP but with .txt extension
219
+ output_txt_path = output_zip_path.replace(".zip", ".txt")
220
+
221
+ with open(output_txt_path, "w") as txt_file:
222
+ txt_file.write(converted_txt)
223
+ self.logger.info(f"TXT file written: {output_txt_path}")
224
+
225
+ # Create ZIP containing MP3 and TXT
226
+ self.logger.info(f"Creating TXT ZIP: {output_zip_path}")
227
+ self._create_zip_from_files(
228
+ output_zip_path,
229
+ [(mp3_file, os.path.basename(mp3_file)),
230
+ (output_txt_path, os.path.basename(output_txt_path))]
231
+ )
232
+
233
+ if not os.path.isfile(output_zip_path):
234
+ raise Exception(f"Failed to create TXT ZIP file: {output_zip_path}")
235
+
236
+ self.logger.info(f"TXT package created: {output_zip_path}")
237
+ return output_zip_path, output_txt_path
238
+
239
+ def _create_zip_from_files(
240
+ self,
241
+ zip_path: str,
242
+ files: list,
243
+ ) -> None:
244
+ """
245
+ Create a ZIP file from a list of files.
246
+
247
+ Args:
248
+ zip_path: Path for the output ZIP file
249
+ files: List of (file_path, archive_name) tuples
250
+ """
251
+ with zipfile.ZipFile(zip_path, "w") as zipf:
252
+ for file_path, archive_name in files:
253
+ if os.path.isfile(file_path):
254
+ zipf.write(file_path, archive_name)
255
+ self.logger.debug(f"Added to ZIP: {archive_name}")
256
+ else:
257
+ self.logger.warning(f"File not found for ZIP: {file_path}")
258
+
259
+
260
+ # Singleton instance and factory function (following existing service pattern)
261
+ _packaging_service: Optional[PackagingService] = None
262
+
263
+
264
+ def get_packaging_service(
265
+ cdg_styles: Optional[Dict[str, Any]] = None,
266
+ **kwargs
267
+ ) -> PackagingService:
268
+ """
269
+ Get a packaging service instance.
270
+
271
+ Args:
272
+ cdg_styles: CDG style configuration
273
+ **kwargs: Additional arguments passed to PackagingService
274
+
275
+ Returns:
276
+ PackagingService instance
277
+ """
278
+ global _packaging_service
279
+
280
+ # Create new instance if settings changed
281
+ if _packaging_service is None or cdg_styles:
282
+ _packaging_service = PackagingService(
283
+ cdg_styles=cdg_styles,
284
+ **kwargs
285
+ )
286
+
287
+ return _packaging_service
@@ -0,0 +1,106 @@
1
+ """
2
+ Rclone configuration service for cloud storage integration.
3
+
4
+ This service manages the rclone configuration needed for Dropbox
5
+ and other cloud storage uploads from the backend workers.
6
+ """
7
+ import logging
8
+ import os
9
+ import tempfile
10
+ from typing import Optional
11
+
12
+ from backend.config import get_settings
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class RcloneService:
18
+ """Service for managing rclone configuration."""
19
+
20
+ # Secret Manager secret name for rclone config
21
+ RCLONE_CONFIG_SECRET = "rclone-config"
22
+
23
+ def __init__(self):
24
+ self.settings = get_settings()
25
+ self._config_file: Optional[str] = None
26
+ self._config_loaded = False
27
+
28
+ def setup_rclone_config(self) -> bool:
29
+ """
30
+ Load rclone config from Secret Manager and set up environment.
31
+
32
+ Writes the config to a temp file and sets RCLONE_CONFIG env var.
33
+
34
+ Returns:
35
+ True if successful, False otherwise
36
+ """
37
+ if self._config_loaded:
38
+ logger.debug("Rclone config already loaded")
39
+ return True
40
+
41
+ try:
42
+ # Get rclone config from Secret Manager
43
+ config_content = self.settings.get_secret(self.RCLONE_CONFIG_SECRET)
44
+
45
+ if not config_content:
46
+ logger.warning("Rclone config not found in Secret Manager")
47
+ return False
48
+
49
+ # Write to a temp file
50
+ fd, config_path = tempfile.mkstemp(prefix="rclone_", suffix=".conf")
51
+ try:
52
+ with os.fdopen(fd, 'w') as f:
53
+ f.write(config_content)
54
+
55
+ self._config_file = config_path
56
+
57
+ # Set environment variable for rclone to find the config
58
+ os.environ["RCLONE_CONFIG"] = config_path
59
+
60
+ logger.info(f"Rclone config loaded and written to {config_path}")
61
+ self._config_loaded = True
62
+ return True
63
+
64
+ except Exception:
65
+ # Clean up the temp file on error
66
+ # Note: os.fdopen() takes ownership of fd, so it's already closed
67
+ # We only need to remove the temp file if it exists
68
+ if os.path.exists(config_path):
69
+ os.unlink(config_path)
70
+ raise
71
+
72
+ except Exception as e:
73
+ logger.error(f"Failed to setup rclone config: {e}")
74
+ return False
75
+
76
+ def cleanup(self) -> None:
77
+ """Remove the temporary config file."""
78
+ if self._config_file and os.path.exists(self._config_file):
79
+ try:
80
+ os.unlink(self._config_file)
81
+ logger.debug(f"Cleaned up rclone config file: {self._config_file}")
82
+ except Exception as e:
83
+ logger.warning(f"Failed to cleanup rclone config: {e}")
84
+
85
+ # Always reset internal state and environment, even if the file was missing
86
+ if self._config_file is not None:
87
+ os.environ.pop("RCLONE_CONFIG", None)
88
+ self._config_file = None
89
+ self._config_loaded = False
90
+
91
+ @property
92
+ def is_configured(self) -> bool:
93
+ """Check if rclone is configured and ready to use."""
94
+ return self._config_loaded and self._config_file is not None
95
+
96
+
97
+ # Singleton instance
98
+ _rclone_service: Optional[RcloneService] = None
99
+
100
+
101
+ def get_rclone_service() -> RcloneService:
102
+ """Get the singleton rclone service instance."""
103
+ global _rclone_service
104
+ if _rclone_service is None:
105
+ _rclone_service = RcloneService()
106
+ return _rclone_service
@@ -0,0 +1,65 @@
1
+ """SpaCy model preloader for container startup.
2
+
3
+ Loads SpaCy models at container startup to avoid slow loading during request processing.
4
+ Cloud Run filesystem I/O can cause 60+ second delays when loading SpaCy models lazily.
5
+ """
6
+
7
+ import logging
8
+ import time
9
+ from typing import Optional
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Singleton storage for preloaded models
14
+ _preloaded_models: dict = {}
15
+
16
+
17
+ def preload_spacy_model(model_name: str = "en_core_web_sm") -> None:
18
+ """Preload a SpaCy model at startup.
19
+
20
+ Args:
21
+ model_name: The SpaCy model to load (default: en_core_web_sm)
22
+ """
23
+ global _preloaded_models
24
+
25
+ if model_name in _preloaded_models:
26
+ logger.info(f"SpaCy model '{model_name}' already preloaded")
27
+ return
28
+
29
+ logger.info(f"Preloading SpaCy model '{model_name}'...")
30
+ start_time = time.time()
31
+
32
+ try:
33
+ import spacy
34
+
35
+ nlp = spacy.load(model_name)
36
+ _preloaded_models[model_name] = nlp
37
+
38
+ elapsed = time.time() - start_time
39
+ logger.info(f"SpaCy model '{model_name}' preloaded in {elapsed:.2f}s")
40
+ except Exception as e:
41
+ logger.error(f"Failed to preload SpaCy model '{model_name}': {e}")
42
+ raise
43
+
44
+
45
+ def get_preloaded_model(model_name: str = "en_core_web_sm") -> Optional[object]:
46
+ """Get a preloaded SpaCy model if available.
47
+
48
+ Args:
49
+ model_name: The SpaCy model name
50
+
51
+ Returns:
52
+ The preloaded SpaCy Language object, or None if not preloaded
53
+ """
54
+ return _preloaded_models.get(model_name)
55
+
56
+
57
+ def is_model_preloaded(model_name: str = "en_core_web_sm") -> bool:
58
+ """Check if a SpaCy model has been preloaded."""
59
+ return model_name in _preloaded_models
60
+
61
+
62
+ def clear_preloaded_models() -> None:
63
+ """Clear all preloaded models. Useful for testing."""
64
+ global _preloaded_models
65
+ _preloaded_models.clear()
@@ -0,0 +1,209 @@
1
+ """
2
+ Google Cloud Storage operations for file management.
3
+ """
4
+ import logging
5
+ import os
6
+ import json
7
+ from typing import Optional, BinaryIO, Any, Dict
8
+ from pathlib import Path
9
+ from google.cloud import storage
10
+ from datetime import timedelta
11
+
12
+ from backend.config import settings
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class StorageService:
19
+ """Service for Google Cloud Storage operations."""
20
+
21
+ def __init__(self):
22
+ """Initialize GCS client."""
23
+ self.client = storage.Client(project=settings.google_cloud_project)
24
+ self.bucket = self.client.bucket(settings.gcs_bucket_name)
25
+
26
+ def upload_file(self, local_path: str, destination_path: str) -> str:
27
+ """Upload a file to GCS."""
28
+ try:
29
+ blob = self.bucket.blob(destination_path)
30
+ blob.upload_from_filename(local_path)
31
+ logger.info(f"Uploaded {local_path} to gs://{settings.gcs_bucket_name}/{destination_path}")
32
+ return destination_path
33
+ except Exception as e:
34
+ logger.error(f"Error uploading file {local_path}: {e}")
35
+ raise
36
+
37
+ def upload_fileobj(self, file_obj: BinaryIO, destination_path: str, content_type: Optional[str] = None) -> str:
38
+ """Upload a file object to GCS."""
39
+ try:
40
+ blob = self.bucket.blob(destination_path)
41
+ if content_type:
42
+ blob.content_type = content_type
43
+ blob.upload_from_file(file_obj, rewind=True)
44
+ logger.info(f"Uploaded file object to gs://{settings.gcs_bucket_name}/{destination_path}")
45
+ return destination_path
46
+ except Exception as e:
47
+ logger.error(f"Error uploading file object: {e}")
48
+ raise
49
+
50
+ def download_file(self, source_path: str, destination_path: str) -> str:
51
+ """Download a file from GCS."""
52
+ try:
53
+ blob = self.bucket.blob(source_path)
54
+ blob.download_to_filename(destination_path)
55
+ logger.info(f"Downloaded gs://{settings.gcs_bucket_name}/{source_path} to {destination_path}")
56
+ return destination_path
57
+ except Exception as e:
58
+ logger.error(f"Error downloading file {source_path}: {e}")
59
+ raise
60
+
61
+ def generate_signed_url(self, blob_path: str, expiration_minutes: int = 60) -> str:
62
+ """Generate a signed URL for downloading a file.
63
+
64
+ In Cloud Run, this uses the IAM signBlob API since we don't have
65
+ a private key available. Requires the service account to have
66
+ roles/iam.serviceAccountTokenCreator on itself.
67
+ """
68
+ return self._generate_signed_url_internal(blob_path, "GET", expiration_minutes)
69
+
70
+ def generate_signed_upload_url(self, blob_path: str, content_type: str = "application/octet-stream", expiration_minutes: int = 60) -> str:
71
+ """Generate a signed URL for uploading a file directly to GCS.
72
+
73
+ This allows clients to upload files directly to GCS without going through
74
+ the backend, bypassing any request body size limits.
75
+
76
+ Args:
77
+ blob_path: The destination path in GCS
78
+ content_type: The expected content type of the upload
79
+ expiration_minutes: How long the URL is valid for
80
+
81
+ Returns:
82
+ A signed URL that accepts PUT requests with the file content
83
+ """
84
+ return self._generate_signed_url_internal(blob_path, "PUT", expiration_minutes, content_type)
85
+
86
+ def _generate_signed_url_internal(self, blob_path: str, method: str, expiration_minutes: int = 60, content_type: Optional[str] = None) -> str:
87
+ """Internal method to generate signed URLs for GET or PUT operations."""
88
+ import google.auth
89
+ from google.auth.transport import requests
90
+
91
+ try:
92
+ blob = self.bucket.blob(blob_path)
93
+
94
+ # Get default credentials and refresh to ensure we have a valid token
95
+ credentials, project = google.auth.default()
96
+
97
+ # Common kwargs for signed URL generation
98
+ kwargs = {
99
+ "version": "v4",
100
+ "expiration": timedelta(minutes=expiration_minutes),
101
+ "method": method,
102
+ }
103
+
104
+ # For PUT requests, we need to specify the content type in headers
105
+ if method == "PUT" and content_type:
106
+ kwargs["headers"] = {"Content-Type": content_type}
107
+
108
+ # Check if we're using compute credentials (Cloud Run/GCE)
109
+ # These need to use IAM signBlob via service_account_email + access_token
110
+ if hasattr(credentials, 'service_account_email'):
111
+ # Refresh credentials to get a valid access token
112
+ auth_request = requests.Request()
113
+ credentials.refresh(auth_request)
114
+
115
+ kwargs["service_account_email"] = credentials.service_account_email
116
+ kwargs["access_token"] = credentials.token
117
+
118
+ url = blob.generate_signed_url(**kwargs)
119
+
120
+ logger.info(f"Generated signed {method} URL for {blob_path}")
121
+ return url
122
+ except Exception as e:
123
+ logger.error(f"Error generating signed {method} URL for {blob_path}: {e}")
124
+ raise
125
+
126
+ def delete_file(self, blob_path: str) -> None:
127
+ """Delete a file from GCS."""
128
+ try:
129
+ blob = self.bucket.blob(blob_path)
130
+ blob.delete()
131
+ logger.info(f"Deleted gs://{settings.gcs_bucket_name}/{blob_path}")
132
+ except Exception as e:
133
+ logger.error(f"Error deleting file {blob_path}: {e}")
134
+ raise
135
+
136
+ def delete_folder(self, prefix: str) -> int:
137
+ """
138
+ Delete all files in GCS with a given prefix (folder).
139
+
140
+ Args:
141
+ prefix: The folder prefix to delete (e.g., "uploads/abc123/")
142
+
143
+ Returns:
144
+ Number of files deleted
145
+ """
146
+ try:
147
+ blobs = list(self.bucket.list_blobs(prefix=prefix))
148
+ deleted_count = 0
149
+
150
+ for blob in blobs:
151
+ try:
152
+ blob.delete()
153
+ deleted_count += 1
154
+ except Exception as e:
155
+ logger.warning(f"Error deleting blob {blob.name}: {e}")
156
+
157
+ if deleted_count > 0:
158
+ logger.info(f"Deleted {deleted_count} files from gs://{settings.gcs_bucket_name}/{prefix}")
159
+
160
+ return deleted_count
161
+ except Exception as e:
162
+ logger.error(f"Error deleting folder {prefix}: {e}")
163
+ return 0 # Don't raise - folder deletion shouldn't break operations
164
+
165
+ def list_files(self, prefix: str) -> list:
166
+ """List files in GCS with a given prefix."""
167
+ try:
168
+ blobs = self.bucket.list_blobs(prefix=prefix)
169
+ return [blob.name for blob in blobs]
170
+ except Exception as e:
171
+ logger.error(f"Error listing files with prefix {prefix}: {e}")
172
+ raise
173
+
174
+ def file_exists(self, blob_path: str) -> bool:
175
+ """Check if a file exists in GCS."""
176
+ try:
177
+ blob = self.bucket.blob(blob_path)
178
+ return blob.exists()
179
+ except Exception as e:
180
+ logger.error(f"Error checking file existence {blob_path}: {e}")
181
+ raise
182
+
183
+ def upload_json(self, destination_path: str, data: Dict[str, Any]) -> str:
184
+ """Upload a JSON object to GCS."""
185
+ try:
186
+ blob = self.bucket.blob(destination_path)
187
+ blob.content_type = "application/json"
188
+ blob.upload_from_string(
189
+ json.dumps(data, indent=2, ensure_ascii=False),
190
+ content_type="application/json"
191
+ )
192
+ logger.info(f"Uploaded JSON to gs://{settings.gcs_bucket_name}/{destination_path}")
193
+ return destination_path
194
+ except Exception as e:
195
+ logger.error(f"Error uploading JSON to {destination_path}: {e}")
196
+ raise
197
+
198
+ def download_json(self, source_path: str) -> Dict[str, Any]:
199
+ """Download and parse a JSON file from GCS."""
200
+ try:
201
+ blob = self.bucket.blob(source_path)
202
+ content = blob.download_as_text()
203
+ data = json.loads(content)
204
+ logger.info(f"Downloaded JSON from gs://{settings.gcs_bucket_name}/{source_path}")
205
+ return data
206
+ except Exception as e:
207
+ logger.error(f"Error downloading JSON from {source_path}: {e}")
208
+ raise
209
+