lyrics-transcriber 0.20.0__py3-none-any.whl → 0.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/__init__.py +2 -5
- lyrics_transcriber/cli/cli_main.py +206 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/controller.py +317 -0
- lyrics_transcriber/correction/base_strategy.py +29 -0
- lyrics_transcriber/correction/corrector.py +52 -0
- lyrics_transcriber/correction/strategy_diff.py +263 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
- lyrics_transcriber/lyrics/genius.py +70 -0
- lyrics_transcriber/lyrics/spotify.py +82 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/generator.py +271 -0
- lyrics_transcriber/{utils → output}/subtitles.py +12 -12
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/audioshake.py +216 -0
- lyrics_transcriber/transcribers/base_transcriber.py +186 -0
- lyrics_transcriber/transcribers/whisper.py +321 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +5 -16
- lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
- lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
- lyrics_transcriber/audioshake_transcriber.py +0 -122
- lyrics_transcriber/corrector.py +0 -57
- lyrics_transcriber/llm_prompts/README.md +0 -10
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -55
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -36
- lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -19
- lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -61
- lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -48
- lyrics_transcriber/transcriber.py +0 -934
- lyrics_transcriber/utils/cli.py +0 -179
- lyrics_transcriber-0.20.0.dist-info/RECORD +0 -19
- lyrics_transcriber-0.20.0.dist-info/entry_points.txt +0 -3
- /lyrics_transcriber/{utils → cli}/__init__.py +0 -0
- /lyrics_transcriber/{utils → output}/ass.py +0 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,271 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
import os
|
3
|
+
import logging
|
4
|
+
from typing import Dict, Any, List, Optional, Tuple
|
5
|
+
import subprocess
|
6
|
+
from datetime import timedelta
|
7
|
+
|
8
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
|
9
|
+
from .subtitles import create_styled_subtitles, LyricsScreen, LyricsLine, LyricSegment
|
10
|
+
from ..correction.corrector import CorrectionResult
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class OutputGeneratorConfig:
|
15
|
+
"""Configuration for output generation."""
|
16
|
+
|
17
|
+
output_dir: str
|
18
|
+
cache_dir: str
|
19
|
+
video_resolution: str = "360p"
|
20
|
+
video_background_image: Optional[str] = None
|
21
|
+
video_background_color: str = "black"
|
22
|
+
|
23
|
+
def __post_init__(self):
|
24
|
+
"""Validate configuration after initialization."""
|
25
|
+
if not self.output_dir:
|
26
|
+
raise ValueError("output_dir must be provided")
|
27
|
+
if not self.cache_dir:
|
28
|
+
raise ValueError("cache_dir must be provided")
|
29
|
+
if self.video_background_image and not os.path.isfile(self.video_background_image):
|
30
|
+
raise FileNotFoundError(f"Video background image not found: {self.video_background_image}")
|
31
|
+
|
32
|
+
|
33
|
+
@dataclass
|
34
|
+
class OutputPaths:
|
35
|
+
"""Holds paths for generated output files."""
|
36
|
+
|
37
|
+
lrc: Optional[str] = None
|
38
|
+
ass: Optional[str] = None
|
39
|
+
video: Optional[str] = None
|
40
|
+
|
41
|
+
|
42
|
+
class OutputGenerator:
|
43
|
+
"""Handles generation of various lyrics output formats."""
|
44
|
+
|
45
|
+
def __init__(
|
46
|
+
self,
|
47
|
+
config: OutputGeneratorConfig,
|
48
|
+
logger: Optional[logging.Logger] = None,
|
49
|
+
):
|
50
|
+
"""
|
51
|
+
Initialize OutputGenerator with configuration.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
config: OutputGeneratorConfig instance with required paths
|
55
|
+
logger: Optional logger instance
|
56
|
+
"""
|
57
|
+
self.config = config
|
58
|
+
self.logger = logger or logging.getLogger(__name__)
|
59
|
+
|
60
|
+
# Log the configured directories
|
61
|
+
self.logger.debug(f"Initialized OutputGenerator with output_dir: {self.config.output_dir}")
|
62
|
+
self.logger.debug(f"Using cache_dir: {self.config.cache_dir}")
|
63
|
+
|
64
|
+
# Set video resolution parameters
|
65
|
+
self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
|
66
|
+
|
67
|
+
def generate_outputs(
|
68
|
+
self,
|
69
|
+
transcription_corrected: CorrectionResult,
|
70
|
+
lyrics_results: List[LyricsData],
|
71
|
+
output_prefix: str,
|
72
|
+
audio_filepath: str,
|
73
|
+
render_video: bool = False,
|
74
|
+
) -> OutputPaths:
|
75
|
+
"""Generate all requested output formats."""
|
76
|
+
outputs = OutputPaths()
|
77
|
+
|
78
|
+
try:
|
79
|
+
# Generate plain lyrics files for each provider
|
80
|
+
for lyrics_data in lyrics_results:
|
81
|
+
provider_name = lyrics_data.metadata.source.title()
|
82
|
+
self.write_plain_lyrics(lyrics_data, f"{output_prefix} (Lyrics {provider_name})")
|
83
|
+
|
84
|
+
if transcription_corrected:
|
85
|
+
# Write corrected lyrics as plain text
|
86
|
+
self.write_plain_lyrics_from_correction(transcription_corrected, f"{output_prefix} (Lyrics Corrected)")
|
87
|
+
|
88
|
+
# Generate LRC
|
89
|
+
outputs.lrc = self.generate_lrc(transcription_corrected, output_prefix)
|
90
|
+
|
91
|
+
# Generate ASS
|
92
|
+
outputs.ass = self.generate_ass(transcription_corrected, output_prefix)
|
93
|
+
|
94
|
+
# Generate video if requested
|
95
|
+
if render_video:
|
96
|
+
outputs.video = self.generate_video(outputs.ass, audio_filepath, output_prefix)
|
97
|
+
|
98
|
+
except Exception as e:
|
99
|
+
self.logger.error(f"Error generating outputs: {str(e)}")
|
100
|
+
raise
|
101
|
+
|
102
|
+
return outputs
|
103
|
+
|
104
|
+
def _get_output_path(self, output_prefix: str, extension: str) -> str:
|
105
|
+
"""Generate full output path for a file."""
|
106
|
+
return os.path.join(self.config.output_dir or self.config.cache_dir, f"{output_prefix}.{extension}")
|
107
|
+
|
108
|
+
def generate_lrc(self, transcription_data: CorrectionResult, output_prefix: str) -> str:
|
109
|
+
"""Generate LRC format lyrics file."""
|
110
|
+
self.logger.info("Generating LRC format lyrics")
|
111
|
+
output_path = self._get_output_path(output_prefix, "lrc")
|
112
|
+
|
113
|
+
try:
|
114
|
+
self._write_lrc_file(output_path, transcription_data.segments)
|
115
|
+
self.logger.info(f"LRC file generated: {output_path}")
|
116
|
+
return output_path
|
117
|
+
|
118
|
+
except Exception as e:
|
119
|
+
self.logger.error(f"Failed to generate LRC file: {str(e)}")
|
120
|
+
raise
|
121
|
+
|
122
|
+
def _write_lrc_file(self, output_path: str, segments: list) -> None:
|
123
|
+
"""Write LRC file content."""
|
124
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
125
|
+
for segment in segments:
|
126
|
+
start_time = self._format_lrc_timestamp(segment.start_time)
|
127
|
+
line = f"[{start_time}]{segment.text}\n"
|
128
|
+
f.write(line)
|
129
|
+
|
130
|
+
def generate_ass(self, transcription_data: CorrectionResult, output_prefix: str) -> str:
|
131
|
+
"""Generate ASS format subtitles file."""
|
132
|
+
self.logger.info("Generating ASS format subtitles")
|
133
|
+
output_path = self._get_output_path(output_prefix, "ass")
|
134
|
+
|
135
|
+
try:
|
136
|
+
self._write_ass_file(output_path, transcription_data.segments)
|
137
|
+
self.logger.info(f"ASS file generated: {output_path}")
|
138
|
+
return output_path
|
139
|
+
|
140
|
+
except Exception as e:
|
141
|
+
self.logger.error(f"Failed to generate ASS file: {str(e)}")
|
142
|
+
raise
|
143
|
+
|
144
|
+
def _write_ass_file(self, output_path: str, segments: list) -> None:
|
145
|
+
"""Write ASS file content."""
|
146
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
147
|
+
f.write(self._get_ass_header())
|
148
|
+
for segment in segments:
|
149
|
+
# Change from ts/end_ts to start_time/end_time
|
150
|
+
start_time = self._format_ass_timestamp(segment.start_time)
|
151
|
+
end_time = self._format_ass_timestamp(segment.end_time)
|
152
|
+
line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{segment.text}\n"
|
153
|
+
f.write(line)
|
154
|
+
|
155
|
+
def generate_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
|
156
|
+
"""Generate MP4 video with lyrics overlay."""
|
157
|
+
self.logger.info("Generating video with lyrics overlay")
|
158
|
+
output_path = self._get_output_path(output_prefix, "mp4")
|
159
|
+
|
160
|
+
try:
|
161
|
+
cmd = self._build_ffmpeg_command(ass_path, audio_path, output_path)
|
162
|
+
self._run_ffmpeg_command(cmd)
|
163
|
+
self.logger.info(f"Video generated: {output_path}")
|
164
|
+
return output_path
|
165
|
+
|
166
|
+
except Exception as e:
|
167
|
+
self.logger.error(f"Failed to generate video: {str(e)}")
|
168
|
+
raise
|
169
|
+
|
170
|
+
def _build_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> list:
|
171
|
+
"""Build FFmpeg command for video generation."""
|
172
|
+
width, height = self.video_resolution_num
|
173
|
+
cmd = ["ffmpeg", "-y"]
|
174
|
+
|
175
|
+
# Input source (background)
|
176
|
+
if self.config.video_background_image:
|
177
|
+
cmd.extend(["-i", self.config.video_background_image])
|
178
|
+
else:
|
179
|
+
cmd.extend(["-f", "lavfi", "-i", f"color=c={self.config.video_background_color}:s={width}x{height}"])
|
180
|
+
|
181
|
+
# Add audio and subtitle inputs
|
182
|
+
cmd.extend(["-i", audio_path, "-vf", f"ass={ass_path}", "-c:v", "libx264", "-c:a", "aac", "-shortest", output_path])
|
183
|
+
|
184
|
+
return cmd
|
185
|
+
|
186
|
+
def _run_ffmpeg_command(self, cmd: list) -> None:
|
187
|
+
"""Execute FFmpeg command."""
|
188
|
+
self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
|
189
|
+
try:
|
190
|
+
subprocess.run(cmd, check=True)
|
191
|
+
except subprocess.CalledProcessError as e:
|
192
|
+
self.logger.error(f"FFmpeg error: {str(e)}")
|
193
|
+
raise
|
194
|
+
|
195
|
+
def _get_video_params(self, resolution: str) -> tuple:
|
196
|
+
"""Get video parameters based on resolution setting."""
|
197
|
+
match resolution:
|
198
|
+
case "4k":
|
199
|
+
return (3840, 2160), 250, 250
|
200
|
+
case "1080p":
|
201
|
+
return (1920, 1080), 120, 120
|
202
|
+
case "720p":
|
203
|
+
return (1280, 720), 100, 100
|
204
|
+
case "360p":
|
205
|
+
return (640, 360), 50, 50
|
206
|
+
case _:
|
207
|
+
raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
|
208
|
+
|
209
|
+
def _format_lrc_timestamp(self, seconds: float) -> str:
|
210
|
+
"""Format timestamp for LRC format."""
|
211
|
+
time = timedelta(seconds=seconds)
|
212
|
+
minutes = int(time.total_seconds() / 60)
|
213
|
+
seconds = time.total_seconds() % 60
|
214
|
+
return f"{minutes:02d}:{seconds:05.2f}"
|
215
|
+
|
216
|
+
def _format_ass_timestamp(self, seconds: float) -> str:
|
217
|
+
"""Format timestamp for ASS format."""
|
218
|
+
time = timedelta(seconds=seconds)
|
219
|
+
hours = int(time.total_seconds() / 3600)
|
220
|
+
minutes = int((time.total_seconds() % 3600) / 60)
|
221
|
+
seconds = time.total_seconds() % 60
|
222
|
+
centiseconds = int((seconds % 1) * 100)
|
223
|
+
seconds = int(seconds)
|
224
|
+
return f"{hours}:{minutes:02d}:{seconds:02d}.{centiseconds:02d}"
|
225
|
+
|
226
|
+
def _get_ass_header(self) -> str:
|
227
|
+
"""Get ASS format header with style definitions."""
|
228
|
+
width, height = self.video_resolution_num
|
229
|
+
return f"""[Script Info]
|
230
|
+
ScriptType: v4.00+
|
231
|
+
PlayResX: {width}
|
232
|
+
PlayResY: {height}
|
233
|
+
WrapStyle: 0
|
234
|
+
|
235
|
+
[V4+ Styles]
|
236
|
+
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
237
|
+
Style: Default,Arial,{self.font_size},&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1
|
238
|
+
|
239
|
+
[Events]
|
240
|
+
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
241
|
+
"""
|
242
|
+
|
243
|
+
def write_plain_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
|
244
|
+
"""Write plain text lyrics file."""
|
245
|
+
self.logger.info("Writing plain lyrics file")
|
246
|
+
output_path = self._get_output_path(output_prefix, "txt")
|
247
|
+
|
248
|
+
try:
|
249
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
250
|
+
f.write(lyrics_data.lyrics)
|
251
|
+
self.logger.info(f"Plain lyrics file generated: {output_path}")
|
252
|
+
return output_path
|
253
|
+
|
254
|
+
except Exception as e:
|
255
|
+
self.logger.error(f"Failed to write plain lyrics file: {str(e)}")
|
256
|
+
raise
|
257
|
+
|
258
|
+
def write_plain_lyrics_from_correction(self, correction_result: CorrectionResult, output_prefix: str) -> str:
|
259
|
+
"""Write corrected lyrics as plain text file."""
|
260
|
+
self.logger.info("Writing corrected lyrics file")
|
261
|
+
output_path = self._get_output_path(output_prefix, "txt")
|
262
|
+
|
263
|
+
try:
|
264
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
265
|
+
f.write(correction_result.text)
|
266
|
+
self.logger.info(f"Corrected lyrics file generated: {output_path}")
|
267
|
+
return output_path
|
268
|
+
|
269
|
+
except Exception as e:
|
270
|
+
self.logger.error(f"Failed to write corrected lyrics file: {str(e)}")
|
271
|
+
raise
|
@@ -18,10 +18,10 @@ Functions for generating ASS subtitles from lyric data
|
|
18
18
|
class LyricSegmentIterator:
|
19
19
|
def __init__(self, lyrics_segments: List[str]):
|
20
20
|
self._segments = lyrics_segments
|
21
|
-
self._current_segment =
|
21
|
+
self._current_segment = 0
|
22
22
|
|
23
23
|
def __iter__(self):
|
24
|
-
self.
|
24
|
+
self._current_segment = 0
|
25
25
|
return self
|
26
26
|
|
27
27
|
def __next__(self):
|
@@ -49,17 +49,17 @@ class LyricSegment:
|
|
49
49
|
def to_ass(self) -> str:
|
50
50
|
"""Render this segment as part of an ASS event line"""
|
51
51
|
duration = (self.end_ts - self.ts).total_seconds() * 100
|
52
|
-
return
|
52
|
+
return rf"{{\kf{duration}}}{self.text}"
|
53
53
|
|
54
54
|
def to_dict(self) -> dict:
|
55
|
-
return {"text": self.text, "ts":
|
55
|
+
return {"text": self.text, "ts": self.ts.total_seconds(), "end_ts": self.end_ts.total_seconds() if self.end_ts else None}
|
56
56
|
|
57
57
|
@classmethod
|
58
58
|
def from_dict(cls, data: dict) -> "LyricSegment":
|
59
59
|
return cls(
|
60
60
|
text=data["text"],
|
61
|
-
ts=timedelta(seconds=
|
62
|
-
end_ts=timedelta(seconds=
|
61
|
+
ts=timedelta(seconds=data["ts"]),
|
62
|
+
end_ts=timedelta(seconds=data["end_ts"]) if data["end_ts"] is not None else None,
|
63
63
|
)
|
64
64
|
|
65
65
|
|
@@ -73,7 +73,7 @@ class LyricsLine:
|
|
73
73
|
|
74
74
|
@property
|
75
75
|
def end_ts(self) -> Optional[timedelta]:
|
76
|
-
return self.segments[-1].end_ts
|
76
|
+
return self.segments[-1].end_ts if self.segments else None
|
77
77
|
|
78
78
|
@ts.setter
|
79
79
|
def ts(self, value):
|
@@ -105,7 +105,7 @@ class LyricsLine:
|
|
105
105
|
"""Decorate line with karaoke tags"""
|
106
106
|
# Prefix the tag with centisecs prior to line in screen
|
107
107
|
start_time = (self.ts - screen_start_ts).total_seconds() * 100
|
108
|
-
line =
|
108
|
+
line = rf"{{\k{start_time}}}"
|
109
109
|
prev_end: Optional[timedelta] = None
|
110
110
|
for s in self.segments:
|
111
111
|
if prev_end is not None and prev_end < s.ts:
|
@@ -168,10 +168,10 @@ class LyricsScreen:
|
|
168
168
|
events = []
|
169
169
|
for i, line in enumerate(self.lines):
|
170
170
|
y_position = self.get_line_y(i)
|
171
|
-
|
171
|
+
|
172
172
|
# if self.logger:
|
173
173
|
# self.logger.debug(f"Creating ASS event for line {i + 1} at y-position: {y_position}")
|
174
|
-
|
174
|
+
|
175
175
|
event = line.as_ass_event(self.start_ts, self.end_ts, style, y_position)
|
176
176
|
events.append(event)
|
177
177
|
return events
|
@@ -188,12 +188,12 @@ class LyricsScreen:
|
|
188
188
|
return LyricsScreen(new_lines, start_ts)
|
189
189
|
|
190
190
|
def to_dict(self) -> dict:
|
191
|
-
return {"lines": [line.to_dict() for line in self.lines], "start_ts":
|
191
|
+
return {"lines": [line.to_dict() for line in self.lines], "start_ts": self.start_ts.total_seconds() if self.start_ts else None}
|
192
192
|
|
193
193
|
@classmethod
|
194
194
|
def from_dict(cls, data: dict) -> "LyricsScreen":
|
195
195
|
lines = [LyricsLine.from_dict(line_data) for line_data in data["lines"]]
|
196
|
-
start_ts = timedelta(seconds=
|
196
|
+
start_ts = timedelta(seconds=data["start_ts"]) if data["start_ts"] is not None else None
|
197
197
|
return cls(lines=lines, start_ts=start_ts)
|
198
198
|
|
199
199
|
|
File without changes
|
@@ -0,0 +1,225 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import Protocol, BinaryIO, Optional, List, Any
|
3
|
+
import os
|
4
|
+
import time
|
5
|
+
import logging
|
6
|
+
import requests
|
7
|
+
from dropbox import Dropbox
|
8
|
+
from dropbox.files import WriteMode, FileMetadata
|
9
|
+
from dropbox.sharing import RequestedVisibility, SharedLinkSettings
|
10
|
+
from dropbox.exceptions import AuthError, ApiError
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class DropboxConfig:
|
17
|
+
"""Configuration for Dropbox client."""
|
18
|
+
|
19
|
+
app_key: Optional[str] = None
|
20
|
+
app_secret: Optional[str] = None
|
21
|
+
refresh_token: Optional[str] = None
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def from_env(cls) -> "DropboxConfig":
|
25
|
+
"""Create config from environment variables."""
|
26
|
+
return cls(
|
27
|
+
app_key=os.environ.get("WHISPER_DROPBOX_APP_KEY"),
|
28
|
+
app_secret=os.environ.get("WHISPER_DROPBOX_APP_SECRET"),
|
29
|
+
refresh_token=os.environ.get("WHISPER_DROPBOX_REFRESH_TOKEN"),
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
class DropboxAPI(Protocol):
|
34
|
+
"""Protocol for Dropbox API operations."""
|
35
|
+
|
36
|
+
def files_upload(self, f: bytes, path: str, mode: WriteMode) -> Any: ...
|
37
|
+
def files_list_folder(self, path: str, recursive: bool = False) -> Any: ...
|
38
|
+
def files_list_folder_continue(self, cursor: str) -> Any: ...
|
39
|
+
def files_download(self, path: str) -> tuple[Any, Any]: ...
|
40
|
+
def files_download_to_file(self, download_path: str, path: str) -> None: ...
|
41
|
+
def files_get_metadata(self, path: str) -> Any: ...
|
42
|
+
def sharing_create_shared_link_with_settings(self, path: str, settings: SharedLinkSettings) -> Any: ...
|
43
|
+
def sharing_list_shared_links(self, path: str) -> Any: ...
|
44
|
+
|
45
|
+
|
46
|
+
class DropboxHandler:
|
47
|
+
"""Handles Dropbox storage operations with automatic token refresh."""
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
config: Optional[DropboxConfig] = None,
|
52
|
+
client: Optional[DropboxAPI] = None,
|
53
|
+
):
|
54
|
+
"""Initialize the Dropbox handler."""
|
55
|
+
self.config = config or DropboxConfig.from_env()
|
56
|
+
self._validate_config()
|
57
|
+
|
58
|
+
self.client = client or Dropbox(
|
59
|
+
app_key=self.config.app_key,
|
60
|
+
app_secret=self.config.app_secret,
|
61
|
+
oauth2_refresh_token=self.config.refresh_token,
|
62
|
+
)
|
63
|
+
|
64
|
+
def _validate_config(self) -> None:
|
65
|
+
"""Validate the configuration."""
|
66
|
+
logger.debug("Validating DropboxConfig with values:")
|
67
|
+
logger.debug(f"app_key: {self.config.app_key[:4] + '...' if self.config.app_key else 'None'}")
|
68
|
+
logger.debug(f"app_secret: {self.config.app_secret[:4] + '...' if self.config.app_secret else 'None'}")
|
69
|
+
logger.debug(f"refresh_token: {self.config.refresh_token[:4] + '...' if self.config.refresh_token else 'None'}")
|
70
|
+
|
71
|
+
missing = []
|
72
|
+
if not self.config.app_key:
|
73
|
+
missing.append("app_key")
|
74
|
+
if not self.config.app_secret:
|
75
|
+
missing.append("app_secret")
|
76
|
+
if not self.config.refresh_token:
|
77
|
+
missing.append("refresh_token")
|
78
|
+
|
79
|
+
if missing:
|
80
|
+
error_msg = f"Missing required Dropbox configuration: {', '.join(missing)}"
|
81
|
+
logger.error(error_msg)
|
82
|
+
raise ValueError(error_msg)
|
83
|
+
|
84
|
+
def upload_with_retry(self, file: BinaryIO, path: str, max_retries: int = 3) -> None:
|
85
|
+
"""Upload a file to Dropbox with retries."""
|
86
|
+
for attempt in range(max_retries):
|
87
|
+
try:
|
88
|
+
logger.debug(f"Attempting file upload to {path} (attempt {attempt + 1}/{max_retries})")
|
89
|
+
file.seek(0)
|
90
|
+
self.client.files_upload(file.read(), path, mode=WriteMode.overwrite)
|
91
|
+
logger.debug(f"Successfully uploaded file to {path}")
|
92
|
+
return
|
93
|
+
except ApiError as e:
|
94
|
+
logger.warning(f"Upload attempt {attempt + 1} failed: {str(e)}")
|
95
|
+
if attempt == max_retries - 1:
|
96
|
+
logger.error(f"All upload attempts failed for {path}")
|
97
|
+
raise
|
98
|
+
time.sleep(1 * (attempt + 1))
|
99
|
+
|
100
|
+
def upload_string_with_retry(self, content: str, path: str, max_retries: int = 3) -> None:
|
101
|
+
"""Upload a string content to Dropbox with retries."""
|
102
|
+
for attempt in range(max_retries):
|
103
|
+
try:
|
104
|
+
logger.debug(f"Attempting string upload to {path} (attempt {attempt + 1}/{max_retries})")
|
105
|
+
self.client.files_upload(content.encode(), path, mode=WriteMode.overwrite)
|
106
|
+
logger.debug(f"Successfully uploaded string content to {path}")
|
107
|
+
return
|
108
|
+
except ApiError as e:
|
109
|
+
logger.warning(f"Upload attempt {attempt + 1} failed: {str(e)}")
|
110
|
+
if attempt == max_retries - 1:
|
111
|
+
logger.error(f"All upload attempts failed for {path}")
|
112
|
+
raise
|
113
|
+
time.sleep(1 * (attempt + 1))
|
114
|
+
|
115
|
+
def list_folder_recursive(self, path: str = "") -> List[FileMetadata]:
|
116
|
+
"""List all files in a folder recursively."""
|
117
|
+
try:
|
118
|
+
logger.debug(f"Listing files recursively from {path}")
|
119
|
+
entries = []
|
120
|
+
result = self.client.files_list_folder(path, recursive=True)
|
121
|
+
|
122
|
+
while True:
|
123
|
+
entries.extend(result.entries)
|
124
|
+
if not result.has_more:
|
125
|
+
break
|
126
|
+
result = self.client.files_list_folder_continue(result.cursor)
|
127
|
+
|
128
|
+
return entries
|
129
|
+
except Exception as e:
|
130
|
+
logger.error(f"Error listing files: {str(e)}", exc_info=True)
|
131
|
+
raise
|
132
|
+
|
133
|
+
def download_file_content(self, path: str) -> bytes:
|
134
|
+
"""Download and return the content of a file."""
|
135
|
+
try:
|
136
|
+
logger.debug(f"Downloading file content from {path}")
|
137
|
+
return self.client.files_download(path)[1].content
|
138
|
+
except Exception as e:
|
139
|
+
logger.error(f"Error downloading file: {str(e)}", exc_info=True)
|
140
|
+
raise
|
141
|
+
|
142
|
+
def download_folder(self, dropbox_path: str, local_path: str) -> None:
|
143
|
+
"""Download all files from a Dropbox folder to a local path."""
|
144
|
+
try:
|
145
|
+
logger.debug(f"Downloading folder {dropbox_path} to {local_path}")
|
146
|
+
entries = self.list_folder_recursive(dropbox_path)
|
147
|
+
|
148
|
+
for entry in entries:
|
149
|
+
if isinstance(entry, FileMetadata):
|
150
|
+
rel_path = entry.path_display[len(dropbox_path) :].lstrip("/")
|
151
|
+
local_file_path = os.path.join(local_path, rel_path)
|
152
|
+
|
153
|
+
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
|
154
|
+
logger.debug(f"Downloading {entry.path_display} to {local_file_path}")
|
155
|
+
self.client.files_download_to_file(local_file_path, entry.path_display)
|
156
|
+
|
157
|
+
logger.debug(f"Successfully downloaded folder {dropbox_path}")
|
158
|
+
except Exception as e:
|
159
|
+
logger.error(f"Error downloading folder: {str(e)}", exc_info=True)
|
160
|
+
raise
|
161
|
+
|
162
|
+
def upload_folder(self, local_path: str, dropbox_path: str) -> None:
|
163
|
+
"""Upload all files from a local folder to a Dropbox path."""
|
164
|
+
try:
|
165
|
+
logger.debug(f"Uploading folder {local_path} to {dropbox_path}")
|
166
|
+
for root, _, files in os.walk(local_path):
|
167
|
+
for filename in files:
|
168
|
+
local_file_path = os.path.join(root, filename)
|
169
|
+
rel_path = os.path.relpath(local_file_path, local_path)
|
170
|
+
target_path = f"{dropbox_path}/{rel_path}"
|
171
|
+
|
172
|
+
logger.debug(f"Uploading {rel_path} to {target_path}")
|
173
|
+
with open(local_file_path, "rb") as f:
|
174
|
+
self.client.files_upload(f.read(), target_path, mode=WriteMode.overwrite)
|
175
|
+
|
176
|
+
logger.debug(f"Successfully uploaded folder {local_path}")
|
177
|
+
except Exception as e:
|
178
|
+
logger.error(f"Error uploading folder: {str(e)}", exc_info=True)
|
179
|
+
raise
|
180
|
+
|
181
|
+
def create_shared_link(self, path: str) -> str:
|
182
|
+
"""Create a shared link for a file that's accessible without login."""
|
183
|
+
try:
|
184
|
+
logger.debug(f"Creating shared link for {path}")
|
185
|
+
shared_link = self.client.sharing_create_shared_link_with_settings(
|
186
|
+
path, settings=SharedLinkSettings(requested_visibility=RequestedVisibility.public)
|
187
|
+
)
|
188
|
+
return shared_link.url.replace("www.dropbox.com", "dl.dropboxusercontent.com")
|
189
|
+
except Exception as e:
|
190
|
+
logger.error(f"Error creating shared link: {str(e)}", exc_info=True)
|
191
|
+
raise
|
192
|
+
|
193
|
+
def get_existing_shared_link(self, path: str) -> Optional[str]:
|
194
|
+
"""Get existing shared link for a file if it exists."""
|
195
|
+
try:
|
196
|
+
logger.debug(f"Getting existing shared link for {path}")
|
197
|
+
shared_links = self.client.sharing_list_shared_links(path=path).links
|
198
|
+
if shared_links:
|
199
|
+
return shared_links[0].url.replace("www.dropbox.com", "dl.dropboxusercontent.com")
|
200
|
+
return None
|
201
|
+
except Exception as e:
|
202
|
+
logger.error(f"Error getting existing shared link: {str(e)}", exc_info=True)
|
203
|
+
return None
|
204
|
+
|
205
|
+
def create_or_get_shared_link(self, path: str) -> str:
|
206
|
+
"""Create a shared link or get existing one."""
|
207
|
+
try:
|
208
|
+
existing_link = self.get_existing_shared_link(path)
|
209
|
+
if existing_link:
|
210
|
+
logger.debug(f"Found existing shared link for {path}")
|
211
|
+
return existing_link
|
212
|
+
|
213
|
+
logger.debug(f"Creating new shared link for {path}")
|
214
|
+
return self.create_shared_link(path)
|
215
|
+
except Exception as e:
|
216
|
+
logger.error(f"Error creating/getting shared link: {str(e)}", exc_info=True)
|
217
|
+
raise
|
218
|
+
|
219
|
+
def file_exists(self, path: str) -> bool:
|
220
|
+
"""Check if a file exists in Dropbox."""
|
221
|
+
try:
|
222
|
+
self.client.files_get_metadata(path)
|
223
|
+
return True
|
224
|
+
except:
|
225
|
+
return False
|