karaoke-gen 0.90.1__py3-none-any.whl → 0.99.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- backend/.coveragerc +20 -0
- backend/.gitignore +37 -0
- backend/Dockerfile +43 -0
- backend/Dockerfile.base +74 -0
- backend/README.md +242 -0
- backend/__init__.py +0 -0
- backend/api/__init__.py +0 -0
- backend/api/dependencies.py +457 -0
- backend/api/routes/__init__.py +0 -0
- backend/api/routes/admin.py +835 -0
- backend/api/routes/audio_search.py +913 -0
- backend/api/routes/auth.py +348 -0
- backend/api/routes/file_upload.py +2112 -0
- backend/api/routes/health.py +409 -0
- backend/api/routes/internal.py +435 -0
- backend/api/routes/jobs.py +1629 -0
- backend/api/routes/review.py +652 -0
- backend/api/routes/themes.py +162 -0
- backend/api/routes/users.py +1513 -0
- backend/config.py +172 -0
- backend/main.py +157 -0
- backend/middleware/__init__.py +5 -0
- backend/middleware/audit_logging.py +124 -0
- backend/models/__init__.py +0 -0
- backend/models/job.py +519 -0
- backend/models/requests.py +123 -0
- backend/models/theme.py +153 -0
- backend/models/user.py +254 -0
- backend/models/worker_log.py +164 -0
- backend/pyproject.toml +29 -0
- backend/quick-check.sh +93 -0
- backend/requirements.txt +29 -0
- backend/run_tests.sh +60 -0
- backend/services/__init__.py +0 -0
- backend/services/audio_analysis_service.py +243 -0
- backend/services/audio_editing_service.py +278 -0
- backend/services/audio_search_service.py +702 -0
- backend/services/auth_service.py +630 -0
- backend/services/credential_manager.py +792 -0
- backend/services/discord_service.py +172 -0
- backend/services/dropbox_service.py +301 -0
- backend/services/email_service.py +1093 -0
- backend/services/encoding_interface.py +454 -0
- backend/services/encoding_service.py +502 -0
- backend/services/firestore_service.py +512 -0
- backend/services/flacfetch_client.py +573 -0
- backend/services/gce_encoding/README.md +72 -0
- backend/services/gce_encoding/__init__.py +22 -0
- backend/services/gce_encoding/main.py +589 -0
- backend/services/gce_encoding/requirements.txt +16 -0
- backend/services/gdrive_service.py +356 -0
- backend/services/job_logging.py +258 -0
- backend/services/job_manager.py +853 -0
- backend/services/job_notification_service.py +271 -0
- backend/services/langfuse_preloader.py +98 -0
- backend/services/local_encoding_service.py +590 -0
- backend/services/local_preview_encoding_service.py +407 -0
- backend/services/lyrics_cache_service.py +216 -0
- backend/services/metrics.py +413 -0
- backend/services/nltk_preloader.py +122 -0
- backend/services/packaging_service.py +287 -0
- backend/services/rclone_service.py +106 -0
- backend/services/spacy_preloader.py +65 -0
- backend/services/storage_service.py +209 -0
- backend/services/stripe_service.py +371 -0
- backend/services/structured_logging.py +254 -0
- backend/services/template_service.py +330 -0
- backend/services/theme_service.py +469 -0
- backend/services/tracing.py +543 -0
- backend/services/user_service.py +721 -0
- backend/services/worker_service.py +558 -0
- backend/services/youtube_service.py +112 -0
- backend/services/youtube_upload_service.py +445 -0
- backend/tests/__init__.py +4 -0
- backend/tests/conftest.py +224 -0
- backend/tests/emulator/__init__.py +7 -0
- backend/tests/emulator/conftest.py +109 -0
- backend/tests/emulator/test_e2e_cli_backend.py +1053 -0
- backend/tests/emulator/test_emulator_integration.py +356 -0
- backend/tests/emulator/test_style_loading_direct.py +436 -0
- backend/tests/emulator/test_worker_logs_direct.py +229 -0
- backend/tests/emulator/test_worker_logs_subcollection.py +443 -0
- backend/tests/requirements-test.txt +10 -0
- backend/tests/requirements.txt +6 -0
- backend/tests/test_admin_email_endpoints.py +411 -0
- backend/tests/test_api_integration.py +460 -0
- backend/tests/test_api_routes.py +93 -0
- backend/tests/test_audio_analysis_service.py +294 -0
- backend/tests/test_audio_editing_service.py +386 -0
- backend/tests/test_audio_search.py +1398 -0
- backend/tests/test_audio_services.py +378 -0
- backend/tests/test_auth_firestore.py +231 -0
- backend/tests/test_config_extended.py +68 -0
- backend/tests/test_credential_manager.py +377 -0
- backend/tests/test_dependencies.py +54 -0
- backend/tests/test_discord_service.py +244 -0
- backend/tests/test_distribution_services.py +820 -0
- backend/tests/test_dropbox_service.py +472 -0
- backend/tests/test_email_service.py +492 -0
- backend/tests/test_emulator_integration.py +322 -0
- backend/tests/test_encoding_interface.py +412 -0
- backend/tests/test_file_upload.py +1739 -0
- backend/tests/test_flacfetch_client.py +632 -0
- backend/tests/test_gdrive_service.py +524 -0
- backend/tests/test_instrumental_api.py +431 -0
- backend/tests/test_internal_api.py +343 -0
- backend/tests/test_job_creation_regression.py +583 -0
- backend/tests/test_job_manager.py +356 -0
- backend/tests/test_job_manager_notifications.py +329 -0
- backend/tests/test_job_notification_service.py +443 -0
- backend/tests/test_jobs_api.py +283 -0
- backend/tests/test_local_encoding_service.py +423 -0
- backend/tests/test_local_preview_encoding_service.py +567 -0
- backend/tests/test_main.py +87 -0
- backend/tests/test_models.py +918 -0
- backend/tests/test_packaging_service.py +382 -0
- backend/tests/test_requests.py +201 -0
- backend/tests/test_routes_jobs.py +282 -0
- backend/tests/test_routes_review.py +337 -0
- backend/tests/test_services.py +556 -0
- backend/tests/test_services_extended.py +112 -0
- backend/tests/test_spacy_preloader.py +119 -0
- backend/tests/test_storage_service.py +448 -0
- backend/tests/test_style_upload.py +261 -0
- backend/tests/test_template_service.py +295 -0
- backend/tests/test_theme_service.py +516 -0
- backend/tests/test_unicode_sanitization.py +522 -0
- backend/tests/test_upload_api.py +256 -0
- backend/tests/test_validate.py +156 -0
- backend/tests/test_video_worker_orchestrator.py +847 -0
- backend/tests/test_worker_log_subcollection.py +509 -0
- backend/tests/test_worker_logging.py +365 -0
- backend/tests/test_workers.py +1116 -0
- backend/tests/test_workers_extended.py +178 -0
- backend/tests/test_youtube_service.py +247 -0
- backend/tests/test_youtube_upload_service.py +568 -0
- backend/utils/test_data.py +27 -0
- backend/validate.py +173 -0
- backend/version.py +27 -0
- backend/workers/README.md +597 -0
- backend/workers/__init__.py +11 -0
- backend/workers/audio_worker.py +618 -0
- backend/workers/lyrics_worker.py +683 -0
- backend/workers/render_video_worker.py +483 -0
- backend/workers/screens_worker.py +535 -0
- backend/workers/style_helper.py +198 -0
- backend/workers/video_worker.py +1277 -0
- backend/workers/video_worker_orchestrator.py +701 -0
- backend/workers/worker_logging.py +278 -0
- karaoke_gen/instrumental_review/static/index.html +7 -4
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +6 -1
- karaoke_gen/utils/__init__.py +163 -8
- karaoke_gen/video_background_processor.py +9 -4
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/METADATA +1 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/RECORD +196 -46
- lyrics_transcriber/correction/agentic/agent.py +17 -6
- lyrics_transcriber/correction/agentic/providers/config.py +9 -5
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +96 -93
- lyrics_transcriber/correction/agentic/providers/model_factory.py +27 -6
- lyrics_transcriber/correction/anchor_sequence.py +151 -37
- lyrics_transcriber/correction/corrector.py +192 -130
- lyrics_transcriber/correction/handlers/syllables_match.py +44 -2
- lyrics_transcriber/correction/operations.py +24 -9
- lyrics_transcriber/correction/phrase_analyzer.py +18 -0
- lyrics_transcriber/frontend/package-lock.json +2 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +1 -1
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +11 -7
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +31 -5
- lyrics_transcriber/frontend/src/components/EditModal.tsx +28 -10
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +123 -27
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +112 -60
- lyrics_transcriber/frontend/src/components/Header.tsx +90 -76
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +53 -31
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +44 -13
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +66 -50
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +124 -30
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +12 -5
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +3 -3
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +1 -1
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +11 -7
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +4 -2
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +103 -1
- lyrics_transcriber/frontend/src/theme.ts +42 -15
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/vite.config.js +5 -0
- lyrics_transcriber/frontend/web_assets/assets/{index-BECn1o8Q.js → index-BSMgOq4Z.js} +6959 -5782
- lyrics_transcriber/frontend/web_assets/assets/index-BSMgOq4Z.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +6 -2
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.svg +5 -0
- lyrics_transcriber/output/generator.py +17 -3
- lyrics_transcriber/output/video.py +60 -95
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +0 -1
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.90.1.dist-info → karaoke_gen-0.99.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Packaging Service.
|
|
3
|
+
|
|
4
|
+
Provides CDG and TXT package generation functionality, extracted from KaraokeFinalise
|
|
5
|
+
for use by both the cloud backend (video_worker) and local CLI.
|
|
6
|
+
|
|
7
|
+
This service handles:
|
|
8
|
+
- CDG (CD+G) file generation from LRC files
|
|
9
|
+
- TXT lyric file generation from LRC files
|
|
10
|
+
- ZIP packaging of CDG/MP3 and TXT/MP3 pairs
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import os
|
|
15
|
+
import zipfile
|
|
16
|
+
from typing import Optional, Dict, Any, Tuple
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PackagingService:
|
|
22
|
+
"""
|
|
23
|
+
Service for creating CDG and TXT karaoke packages.
|
|
24
|
+
|
|
25
|
+
CDG (CD+Graphics) is a format used by karaoke machines.
|
|
26
|
+
TXT packages are used by software karaoke players.
|
|
27
|
+
Both formats are packaged as ZIP files with an MP3 audio track.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
cdg_styles: Optional[Dict[str, Any]] = None,
|
|
33
|
+
dry_run: bool = False,
|
|
34
|
+
non_interactive: bool = False,
|
|
35
|
+
logger: Optional[logging.Logger] = None,
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Initialize the packaging service.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
cdg_styles: CDG style configuration for CDG generation
|
|
42
|
+
dry_run: If True, log actions without performing them
|
|
43
|
+
non_interactive: If True, skip interactive prompts
|
|
44
|
+
logger: Optional logger instance
|
|
45
|
+
"""
|
|
46
|
+
self.cdg_styles = cdg_styles
|
|
47
|
+
self.dry_run = dry_run
|
|
48
|
+
self.non_interactive = non_interactive
|
|
49
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
def create_cdg_package(
|
|
52
|
+
self,
|
|
53
|
+
lrc_file: str,
|
|
54
|
+
audio_file: str,
|
|
55
|
+
output_zip_path: str,
|
|
56
|
+
artist: str,
|
|
57
|
+
title: str,
|
|
58
|
+
output_mp3_path: Optional[str] = None,
|
|
59
|
+
output_cdg_path: Optional[str] = None,
|
|
60
|
+
) -> Tuple[str, Optional[str], Optional[str]]:
|
|
61
|
+
"""
|
|
62
|
+
Create a CDG package (ZIP containing CDG and MP3 files).
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
lrc_file: Path to the LRC lyrics file
|
|
66
|
+
audio_file: Path to the instrumental audio file
|
|
67
|
+
output_zip_path: Path for the output ZIP file
|
|
68
|
+
artist: Artist name for metadata
|
|
69
|
+
title: Song title for metadata
|
|
70
|
+
output_mp3_path: Optional path for the extracted MP3 file
|
|
71
|
+
output_cdg_path: Optional path for the extracted CDG file
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Tuple of (zip_path, mp3_path, cdg_path)
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If CDG styles are not configured
|
|
78
|
+
FileNotFoundError: If input files are missing
|
|
79
|
+
Exception: If CDG generation fails
|
|
80
|
+
"""
|
|
81
|
+
self.logger.info(f"Creating CDG package for {artist} - {title}")
|
|
82
|
+
|
|
83
|
+
# Validate inputs
|
|
84
|
+
if not os.path.isfile(lrc_file):
|
|
85
|
+
raise FileNotFoundError(f"LRC file not found: {lrc_file}")
|
|
86
|
+
if not os.path.isfile(audio_file):
|
|
87
|
+
raise FileNotFoundError(f"Audio file not found: {audio_file}")
|
|
88
|
+
|
|
89
|
+
# Check if ZIP already exists
|
|
90
|
+
if os.path.isfile(output_zip_path):
|
|
91
|
+
if self.non_interactive:
|
|
92
|
+
self.logger.info(
|
|
93
|
+
f"CDG ZIP exists, will be overwritten: {output_zip_path}"
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
self.logger.info(f"CDG ZIP already exists: {output_zip_path}")
|
|
97
|
+
|
|
98
|
+
# Check if individual files exist (allows skipping generation)
|
|
99
|
+
if output_mp3_path and output_cdg_path:
|
|
100
|
+
if os.path.isfile(output_mp3_path) and os.path.isfile(output_cdg_path):
|
|
101
|
+
self.logger.info("Found existing MP3 and CDG files, creating ZIP directly")
|
|
102
|
+
if not self.dry_run:
|
|
103
|
+
self._create_zip_from_files(
|
|
104
|
+
output_zip_path,
|
|
105
|
+
[(output_mp3_path, os.path.basename(output_mp3_path)),
|
|
106
|
+
(output_cdg_path, os.path.basename(output_cdg_path))]
|
|
107
|
+
)
|
|
108
|
+
return output_zip_path, output_mp3_path, output_cdg_path
|
|
109
|
+
|
|
110
|
+
if self.dry_run:
|
|
111
|
+
self.logger.info(
|
|
112
|
+
f"DRY RUN: Would generate CDG package: {output_zip_path}"
|
|
113
|
+
)
|
|
114
|
+
return output_zip_path, output_mp3_path, output_cdg_path
|
|
115
|
+
|
|
116
|
+
# Generate CDG files
|
|
117
|
+
if self.cdg_styles is None:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
"CDG styles configuration is required for CDG generation"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
self.logger.info("Generating CDG and MP3 files")
|
|
123
|
+
from lyrics_transcriber.output.cdg import CDGGenerator
|
|
124
|
+
|
|
125
|
+
output_dir = os.path.dirname(output_zip_path) or os.getcwd()
|
|
126
|
+
generator = CDGGenerator(output_dir=output_dir, logger=self.logger)
|
|
127
|
+
|
|
128
|
+
cdg_file, mp3_file, zip_file = generator.generate_cdg_from_lrc(
|
|
129
|
+
lrc_file=lrc_file,
|
|
130
|
+
audio_file=audio_file,
|
|
131
|
+
title=title,
|
|
132
|
+
artist=artist,
|
|
133
|
+
cdg_styles=self.cdg_styles,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Rename ZIP to expected output path if different
|
|
137
|
+
if os.path.isfile(zip_file) and zip_file != output_zip_path:
|
|
138
|
+
os.rename(zip_file, output_zip_path)
|
|
139
|
+
self.logger.info(f"Renamed CDG ZIP: {zip_file} -> {output_zip_path}")
|
|
140
|
+
|
|
141
|
+
if not os.path.isfile(output_zip_path):
|
|
142
|
+
raise Exception(f"Failed to create CDG ZIP file: {output_zip_path}")
|
|
143
|
+
|
|
144
|
+
# Extract the ZIP to get individual files if paths provided
|
|
145
|
+
extracted_mp3 = None
|
|
146
|
+
extracted_cdg = None
|
|
147
|
+
if output_mp3_path or output_cdg_path:
|
|
148
|
+
self.logger.info(f"Extracting CDG ZIP file: {output_zip_path}")
|
|
149
|
+
with zipfile.ZipFile(output_zip_path, "r") as zip_ref:
|
|
150
|
+
zip_ref.extractall(output_dir)
|
|
151
|
+
|
|
152
|
+
# Find extracted files
|
|
153
|
+
if output_mp3_path and os.path.isfile(output_mp3_path):
|
|
154
|
+
extracted_mp3 = output_mp3_path
|
|
155
|
+
self.logger.info(f"Extracted MP3: {extracted_mp3}")
|
|
156
|
+
if output_cdg_path and os.path.isfile(output_cdg_path):
|
|
157
|
+
extracted_cdg = output_cdg_path
|
|
158
|
+
self.logger.info(f"Extracted CDG: {extracted_cdg}")
|
|
159
|
+
|
|
160
|
+
self.logger.info(f"CDG package created: {output_zip_path}")
|
|
161
|
+
return output_zip_path, extracted_mp3, extracted_cdg
|
|
162
|
+
|
|
163
|
+
def create_txt_package(
|
|
164
|
+
self,
|
|
165
|
+
lrc_file: str,
|
|
166
|
+
mp3_file: str,
|
|
167
|
+
output_zip_path: str,
|
|
168
|
+
output_txt_path: Optional[str] = None,
|
|
169
|
+
) -> Tuple[str, Optional[str]]:
|
|
170
|
+
"""
|
|
171
|
+
Create a TXT package (ZIP containing TXT lyrics and MP3 files).
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
lrc_file: Path to the LRC lyrics file
|
|
175
|
+
mp3_file: Path to the MP3 audio file
|
|
176
|
+
output_zip_path: Path for the output ZIP file
|
|
177
|
+
output_txt_path: Optional path for the generated TXT file
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Tuple of (zip_path, txt_path)
|
|
181
|
+
|
|
182
|
+
Raises:
|
|
183
|
+
FileNotFoundError: If input files are missing
|
|
184
|
+
Exception: If TXT generation fails
|
|
185
|
+
"""
|
|
186
|
+
self.logger.info(f"Creating TXT package from {lrc_file}")
|
|
187
|
+
|
|
188
|
+
# Validate inputs
|
|
189
|
+
if not os.path.isfile(lrc_file):
|
|
190
|
+
raise FileNotFoundError(f"LRC file not found: {lrc_file}")
|
|
191
|
+
if not os.path.isfile(mp3_file):
|
|
192
|
+
raise FileNotFoundError(f"MP3 file not found: {mp3_file}")
|
|
193
|
+
|
|
194
|
+
# Check if ZIP already exists
|
|
195
|
+
if os.path.isfile(output_zip_path):
|
|
196
|
+
if self.non_interactive:
|
|
197
|
+
self.logger.info(
|
|
198
|
+
f"TXT ZIP exists, will be overwritten: {output_zip_path}"
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
self.logger.info(f"TXT ZIP already exists: {output_zip_path}")
|
|
202
|
+
|
|
203
|
+
if self.dry_run:
|
|
204
|
+
self.logger.info(
|
|
205
|
+
f"DRY RUN: Would create TXT package: {output_zip_path}"
|
|
206
|
+
)
|
|
207
|
+
return output_zip_path, output_txt_path
|
|
208
|
+
|
|
209
|
+
# Generate TXT from LRC
|
|
210
|
+
self.logger.info(f"Converting LRC to TXT format: {lrc_file}")
|
|
211
|
+
from lyrics_converter import LyricsConverter
|
|
212
|
+
|
|
213
|
+
txt_converter = LyricsConverter(output_format="txt", filepath=lrc_file)
|
|
214
|
+
converted_txt = txt_converter.convert_file()
|
|
215
|
+
|
|
216
|
+
# Write TXT file
|
|
217
|
+
if output_txt_path is None:
|
|
218
|
+
# Default to same name as ZIP but with .txt extension
|
|
219
|
+
output_txt_path = output_zip_path.replace(".zip", ".txt")
|
|
220
|
+
|
|
221
|
+
with open(output_txt_path, "w") as txt_file:
|
|
222
|
+
txt_file.write(converted_txt)
|
|
223
|
+
self.logger.info(f"TXT file written: {output_txt_path}")
|
|
224
|
+
|
|
225
|
+
# Create ZIP containing MP3 and TXT
|
|
226
|
+
self.logger.info(f"Creating TXT ZIP: {output_zip_path}")
|
|
227
|
+
self._create_zip_from_files(
|
|
228
|
+
output_zip_path,
|
|
229
|
+
[(mp3_file, os.path.basename(mp3_file)),
|
|
230
|
+
(output_txt_path, os.path.basename(output_txt_path))]
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
if not os.path.isfile(output_zip_path):
|
|
234
|
+
raise Exception(f"Failed to create TXT ZIP file: {output_zip_path}")
|
|
235
|
+
|
|
236
|
+
self.logger.info(f"TXT package created: {output_zip_path}")
|
|
237
|
+
return output_zip_path, output_txt_path
|
|
238
|
+
|
|
239
|
+
def _create_zip_from_files(
|
|
240
|
+
self,
|
|
241
|
+
zip_path: str,
|
|
242
|
+
files: list,
|
|
243
|
+
) -> None:
|
|
244
|
+
"""
|
|
245
|
+
Create a ZIP file from a list of files.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
zip_path: Path for the output ZIP file
|
|
249
|
+
files: List of (file_path, archive_name) tuples
|
|
250
|
+
"""
|
|
251
|
+
with zipfile.ZipFile(zip_path, "w") as zipf:
|
|
252
|
+
for file_path, archive_name in files:
|
|
253
|
+
if os.path.isfile(file_path):
|
|
254
|
+
zipf.write(file_path, archive_name)
|
|
255
|
+
self.logger.debug(f"Added to ZIP: {archive_name}")
|
|
256
|
+
else:
|
|
257
|
+
self.logger.warning(f"File not found for ZIP: {file_path}")
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# Singleton instance and factory function (following existing service pattern)
|
|
261
|
+
_packaging_service: Optional[PackagingService] = None
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def get_packaging_service(
|
|
265
|
+
cdg_styles: Optional[Dict[str, Any]] = None,
|
|
266
|
+
**kwargs
|
|
267
|
+
) -> PackagingService:
|
|
268
|
+
"""
|
|
269
|
+
Get a packaging service instance.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
cdg_styles: CDG style configuration
|
|
273
|
+
**kwargs: Additional arguments passed to PackagingService
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
PackagingService instance
|
|
277
|
+
"""
|
|
278
|
+
global _packaging_service
|
|
279
|
+
|
|
280
|
+
# Create new instance if settings changed
|
|
281
|
+
if _packaging_service is None or cdg_styles:
|
|
282
|
+
_packaging_service = PackagingService(
|
|
283
|
+
cdg_styles=cdg_styles,
|
|
284
|
+
**kwargs
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return _packaging_service
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rclone configuration service for cloud storage integration.
|
|
3
|
+
|
|
4
|
+
This service manages the rclone configuration needed for Dropbox
|
|
5
|
+
and other cloud storage uploads from the backend workers.
|
|
6
|
+
"""
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import tempfile
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from backend.config import get_settings
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RcloneService:
|
|
18
|
+
"""Service for managing rclone configuration."""
|
|
19
|
+
|
|
20
|
+
# Secret Manager secret name for rclone config
|
|
21
|
+
RCLONE_CONFIG_SECRET = "rclone-config"
|
|
22
|
+
|
|
23
|
+
def __init__(self):
|
|
24
|
+
self.settings = get_settings()
|
|
25
|
+
self._config_file: Optional[str] = None
|
|
26
|
+
self._config_loaded = False
|
|
27
|
+
|
|
28
|
+
def setup_rclone_config(self) -> bool:
|
|
29
|
+
"""
|
|
30
|
+
Load rclone config from Secret Manager and set up environment.
|
|
31
|
+
|
|
32
|
+
Writes the config to a temp file and sets RCLONE_CONFIG env var.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
True if successful, False otherwise
|
|
36
|
+
"""
|
|
37
|
+
if self._config_loaded:
|
|
38
|
+
logger.debug("Rclone config already loaded")
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
# Get rclone config from Secret Manager
|
|
43
|
+
config_content = self.settings.get_secret(self.RCLONE_CONFIG_SECRET)
|
|
44
|
+
|
|
45
|
+
if not config_content:
|
|
46
|
+
logger.warning("Rclone config not found in Secret Manager")
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
# Write to a temp file
|
|
50
|
+
fd, config_path = tempfile.mkstemp(prefix="rclone_", suffix=".conf")
|
|
51
|
+
try:
|
|
52
|
+
with os.fdopen(fd, 'w') as f:
|
|
53
|
+
f.write(config_content)
|
|
54
|
+
|
|
55
|
+
self._config_file = config_path
|
|
56
|
+
|
|
57
|
+
# Set environment variable for rclone to find the config
|
|
58
|
+
os.environ["RCLONE_CONFIG"] = config_path
|
|
59
|
+
|
|
60
|
+
logger.info(f"Rclone config loaded and written to {config_path}")
|
|
61
|
+
self._config_loaded = True
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
except Exception:
|
|
65
|
+
# Clean up the temp file on error
|
|
66
|
+
# Note: os.fdopen() takes ownership of fd, so it's already closed
|
|
67
|
+
# We only need to remove the temp file if it exists
|
|
68
|
+
if os.path.exists(config_path):
|
|
69
|
+
os.unlink(config_path)
|
|
70
|
+
raise
|
|
71
|
+
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.error(f"Failed to setup rclone config: {e}")
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
def cleanup(self) -> None:
|
|
77
|
+
"""Remove the temporary config file."""
|
|
78
|
+
if self._config_file and os.path.exists(self._config_file):
|
|
79
|
+
try:
|
|
80
|
+
os.unlink(self._config_file)
|
|
81
|
+
logger.debug(f"Cleaned up rclone config file: {self._config_file}")
|
|
82
|
+
except Exception as e:
|
|
83
|
+
logger.warning(f"Failed to cleanup rclone config: {e}")
|
|
84
|
+
|
|
85
|
+
# Always reset internal state and environment, even if the file was missing
|
|
86
|
+
if self._config_file is not None:
|
|
87
|
+
os.environ.pop("RCLONE_CONFIG", None)
|
|
88
|
+
self._config_file = None
|
|
89
|
+
self._config_loaded = False
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def is_configured(self) -> bool:
|
|
93
|
+
"""Check if rclone is configured and ready to use."""
|
|
94
|
+
return self._config_loaded and self._config_file is not None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# Singleton instance
|
|
98
|
+
_rclone_service: Optional[RcloneService] = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_rclone_service() -> RcloneService:
|
|
102
|
+
"""Get the singleton rclone service instance."""
|
|
103
|
+
global _rclone_service
|
|
104
|
+
if _rclone_service is None:
|
|
105
|
+
_rclone_service = RcloneService()
|
|
106
|
+
return _rclone_service
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""SpaCy model preloader for container startup.
|
|
2
|
+
|
|
3
|
+
Loads SpaCy models at container startup to avoid slow loading during request processing.
|
|
4
|
+
Cloud Run filesystem I/O can cause 60+ second delays when loading SpaCy models lazily.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
# Singleton storage for preloaded models
|
|
14
|
+
_preloaded_models: dict = {}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def preload_spacy_model(model_name: str = "en_core_web_sm") -> None:
|
|
18
|
+
"""Preload a SpaCy model at startup.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
model_name: The SpaCy model to load (default: en_core_web_sm)
|
|
22
|
+
"""
|
|
23
|
+
global _preloaded_models
|
|
24
|
+
|
|
25
|
+
if model_name in _preloaded_models:
|
|
26
|
+
logger.info(f"SpaCy model '{model_name}' already preloaded")
|
|
27
|
+
return
|
|
28
|
+
|
|
29
|
+
logger.info(f"Preloading SpaCy model '{model_name}'...")
|
|
30
|
+
start_time = time.time()
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
import spacy
|
|
34
|
+
|
|
35
|
+
nlp = spacy.load(model_name)
|
|
36
|
+
_preloaded_models[model_name] = nlp
|
|
37
|
+
|
|
38
|
+
elapsed = time.time() - start_time
|
|
39
|
+
logger.info(f"SpaCy model '{model_name}' preloaded in {elapsed:.2f}s")
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.error(f"Failed to preload SpaCy model '{model_name}': {e}")
|
|
42
|
+
raise
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_preloaded_model(model_name: str = "en_core_web_sm") -> Optional[object]:
|
|
46
|
+
"""Get a preloaded SpaCy model if available.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
model_name: The SpaCy model name
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
The preloaded SpaCy Language object, or None if not preloaded
|
|
53
|
+
"""
|
|
54
|
+
return _preloaded_models.get(model_name)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def is_model_preloaded(model_name: str = "en_core_web_sm") -> bool:
|
|
58
|
+
"""Check if a SpaCy model has been preloaded."""
|
|
59
|
+
return model_name in _preloaded_models
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def clear_preloaded_models() -> None:
|
|
63
|
+
"""Clear all preloaded models. Useful for testing."""
|
|
64
|
+
global _preloaded_models
|
|
65
|
+
_preloaded_models.clear()
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Google Cloud Storage operations for file management.
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
from typing import Optional, BinaryIO, Any, Dict
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from google.cloud import storage
|
|
10
|
+
from datetime import timedelta
|
|
11
|
+
|
|
12
|
+
from backend.config import settings
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class StorageService:
|
|
19
|
+
"""Service for Google Cloud Storage operations."""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
"""Initialize GCS client."""
|
|
23
|
+
self.client = storage.Client(project=settings.google_cloud_project)
|
|
24
|
+
self.bucket = self.client.bucket(settings.gcs_bucket_name)
|
|
25
|
+
|
|
26
|
+
def upload_file(self, local_path: str, destination_path: str) -> str:
|
|
27
|
+
"""Upload a file to GCS."""
|
|
28
|
+
try:
|
|
29
|
+
blob = self.bucket.blob(destination_path)
|
|
30
|
+
blob.upload_from_filename(local_path)
|
|
31
|
+
logger.info(f"Uploaded {local_path} to gs://{settings.gcs_bucket_name}/{destination_path}")
|
|
32
|
+
return destination_path
|
|
33
|
+
except Exception as e:
|
|
34
|
+
logger.error(f"Error uploading file {local_path}: {e}")
|
|
35
|
+
raise
|
|
36
|
+
|
|
37
|
+
def upload_fileobj(self, file_obj: BinaryIO, destination_path: str, content_type: Optional[str] = None) -> str:
|
|
38
|
+
"""Upload a file object to GCS."""
|
|
39
|
+
try:
|
|
40
|
+
blob = self.bucket.blob(destination_path)
|
|
41
|
+
if content_type:
|
|
42
|
+
blob.content_type = content_type
|
|
43
|
+
blob.upload_from_file(file_obj, rewind=True)
|
|
44
|
+
logger.info(f"Uploaded file object to gs://{settings.gcs_bucket_name}/{destination_path}")
|
|
45
|
+
return destination_path
|
|
46
|
+
except Exception as e:
|
|
47
|
+
logger.error(f"Error uploading file object: {e}")
|
|
48
|
+
raise
|
|
49
|
+
|
|
50
|
+
def download_file(self, source_path: str, destination_path: str) -> str:
|
|
51
|
+
"""Download a file from GCS."""
|
|
52
|
+
try:
|
|
53
|
+
blob = self.bucket.blob(source_path)
|
|
54
|
+
blob.download_to_filename(destination_path)
|
|
55
|
+
logger.info(f"Downloaded gs://{settings.gcs_bucket_name}/{source_path} to {destination_path}")
|
|
56
|
+
return destination_path
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.error(f"Error downloading file {source_path}: {e}")
|
|
59
|
+
raise
|
|
60
|
+
|
|
61
|
+
def generate_signed_url(self, blob_path: str, expiration_minutes: int = 60) -> str:
|
|
62
|
+
"""Generate a signed URL for downloading a file.
|
|
63
|
+
|
|
64
|
+
In Cloud Run, this uses the IAM signBlob API since we don't have
|
|
65
|
+
a private key available. Requires the service account to have
|
|
66
|
+
roles/iam.serviceAccountTokenCreator on itself.
|
|
67
|
+
"""
|
|
68
|
+
return self._generate_signed_url_internal(blob_path, "GET", expiration_minutes)
|
|
69
|
+
|
|
70
|
+
def generate_signed_upload_url(self, blob_path: str, content_type: str = "application/octet-stream", expiration_minutes: int = 60) -> str:
|
|
71
|
+
"""Generate a signed URL for uploading a file directly to GCS.
|
|
72
|
+
|
|
73
|
+
This allows clients to upload files directly to GCS without going through
|
|
74
|
+
the backend, bypassing any request body size limits.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
blob_path: The destination path in GCS
|
|
78
|
+
content_type: The expected content type of the upload
|
|
79
|
+
expiration_minutes: How long the URL is valid for
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
A signed URL that accepts PUT requests with the file content
|
|
83
|
+
"""
|
|
84
|
+
return self._generate_signed_url_internal(blob_path, "PUT", expiration_minutes, content_type)
|
|
85
|
+
|
|
86
|
+
def _generate_signed_url_internal(self, blob_path: str, method: str, expiration_minutes: int = 60, content_type: Optional[str] = None) -> str:
|
|
87
|
+
"""Internal method to generate signed URLs for GET or PUT operations."""
|
|
88
|
+
import google.auth
|
|
89
|
+
from google.auth.transport import requests
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
blob = self.bucket.blob(blob_path)
|
|
93
|
+
|
|
94
|
+
# Get default credentials and refresh to ensure we have a valid token
|
|
95
|
+
credentials, project = google.auth.default()
|
|
96
|
+
|
|
97
|
+
# Common kwargs for signed URL generation
|
|
98
|
+
kwargs = {
|
|
99
|
+
"version": "v4",
|
|
100
|
+
"expiration": timedelta(minutes=expiration_minutes),
|
|
101
|
+
"method": method,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# For PUT requests, we need to specify the content type in headers
|
|
105
|
+
if method == "PUT" and content_type:
|
|
106
|
+
kwargs["headers"] = {"Content-Type": content_type}
|
|
107
|
+
|
|
108
|
+
# Check if we're using compute credentials (Cloud Run/GCE)
|
|
109
|
+
# These need to use IAM signBlob via service_account_email + access_token
|
|
110
|
+
if hasattr(credentials, 'service_account_email'):
|
|
111
|
+
# Refresh credentials to get a valid access token
|
|
112
|
+
auth_request = requests.Request()
|
|
113
|
+
credentials.refresh(auth_request)
|
|
114
|
+
|
|
115
|
+
kwargs["service_account_email"] = credentials.service_account_email
|
|
116
|
+
kwargs["access_token"] = credentials.token
|
|
117
|
+
|
|
118
|
+
url = blob.generate_signed_url(**kwargs)
|
|
119
|
+
|
|
120
|
+
logger.info(f"Generated signed {method} URL for {blob_path}")
|
|
121
|
+
return url
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.error(f"Error generating signed {method} URL for {blob_path}: {e}")
|
|
124
|
+
raise
|
|
125
|
+
|
|
126
|
+
def delete_file(self, blob_path: str) -> None:
|
|
127
|
+
"""Delete a file from GCS."""
|
|
128
|
+
try:
|
|
129
|
+
blob = self.bucket.blob(blob_path)
|
|
130
|
+
blob.delete()
|
|
131
|
+
logger.info(f"Deleted gs://{settings.gcs_bucket_name}/{blob_path}")
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(f"Error deleting file {blob_path}: {e}")
|
|
134
|
+
raise
|
|
135
|
+
|
|
136
|
+
def delete_folder(self, prefix: str) -> int:
|
|
137
|
+
"""
|
|
138
|
+
Delete all files in GCS with a given prefix (folder).
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
prefix: The folder prefix to delete (e.g., "uploads/abc123/")
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Number of files deleted
|
|
145
|
+
"""
|
|
146
|
+
try:
|
|
147
|
+
blobs = list(self.bucket.list_blobs(prefix=prefix))
|
|
148
|
+
deleted_count = 0
|
|
149
|
+
|
|
150
|
+
for blob in blobs:
|
|
151
|
+
try:
|
|
152
|
+
blob.delete()
|
|
153
|
+
deleted_count += 1
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.warning(f"Error deleting blob {blob.name}: {e}")
|
|
156
|
+
|
|
157
|
+
if deleted_count > 0:
|
|
158
|
+
logger.info(f"Deleted {deleted_count} files from gs://{settings.gcs_bucket_name}/{prefix}")
|
|
159
|
+
|
|
160
|
+
return deleted_count
|
|
161
|
+
except Exception as e:
|
|
162
|
+
logger.error(f"Error deleting folder {prefix}: {e}")
|
|
163
|
+
return 0 # Don't raise - folder deletion shouldn't break operations
|
|
164
|
+
|
|
165
|
+
def list_files(self, prefix: str) -> list:
|
|
166
|
+
"""List files in GCS with a given prefix."""
|
|
167
|
+
try:
|
|
168
|
+
blobs = self.bucket.list_blobs(prefix=prefix)
|
|
169
|
+
return [blob.name for blob in blobs]
|
|
170
|
+
except Exception as e:
|
|
171
|
+
logger.error(f"Error listing files with prefix {prefix}: {e}")
|
|
172
|
+
raise
|
|
173
|
+
|
|
174
|
+
def file_exists(self, blob_path: str) -> bool:
|
|
175
|
+
"""Check if a file exists in GCS."""
|
|
176
|
+
try:
|
|
177
|
+
blob = self.bucket.blob(blob_path)
|
|
178
|
+
return blob.exists()
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.error(f"Error checking file existence {blob_path}: {e}")
|
|
181
|
+
raise
|
|
182
|
+
|
|
183
|
+
def upload_json(self, destination_path: str, data: Dict[str, Any]) -> str:
|
|
184
|
+
"""Upload a JSON object to GCS."""
|
|
185
|
+
try:
|
|
186
|
+
blob = self.bucket.blob(destination_path)
|
|
187
|
+
blob.content_type = "application/json"
|
|
188
|
+
blob.upload_from_string(
|
|
189
|
+
json.dumps(data, indent=2, ensure_ascii=False),
|
|
190
|
+
content_type="application/json"
|
|
191
|
+
)
|
|
192
|
+
logger.info(f"Uploaded JSON to gs://{settings.gcs_bucket_name}/{destination_path}")
|
|
193
|
+
return destination_path
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.error(f"Error uploading JSON to {destination_path}: {e}")
|
|
196
|
+
raise
|
|
197
|
+
|
|
198
|
+
def download_json(self, source_path: str) -> Dict[str, Any]:
|
|
199
|
+
"""Download and parse a JSON file from GCS."""
|
|
200
|
+
try:
|
|
201
|
+
blob = self.bucket.blob(source_path)
|
|
202
|
+
content = blob.download_as_text()
|
|
203
|
+
data = json.loads(content)
|
|
204
|
+
logger.info(f"Downloaded JSON from gs://{settings.gcs_bucket_name}/{source_path}")
|
|
205
|
+
return data
|
|
206
|
+
except Exception as e:
|
|
207
|
+
logger.error(f"Error downloading JSON from {source_path}: {e}")
|
|
208
|
+
raise
|
|
209
|
+
|