lyrics-transcriber 0.12.7__tar.gz → 0.12.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/PKG-INFO +2 -2
  2. lyrics_transcriber-0.12.9/lyrics_transcriber/llm_prompts/promptfooconfig.yaml +61 -0
  3. lyrics_transcriber-0.12.9/lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +48 -0
  4. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/transcriber.py +17 -12
  5. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/utils/subtitles.py +14 -57
  6. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/pyproject.toml +2 -2
  7. lyrics_transcriber-0.12.7/lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -39
  8. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/LICENSE +0 -0
  9. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/README.md +0 -0
  10. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/__init__.py +0 -0
  11. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/llm_prompts/README.md +0 -0
  12. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -0
  13. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -0
  14. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -0
  15. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/utils/__init__.py +0 -0
  16. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/utils/ass.py +0 -0
  17. {lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/utils/cli.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lyrics-transcriber
3
- Version: 0.12.7
3
+ Version: 0.12.9
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
6
  License: MIT
@@ -22,7 +22,7 @@ Requires-Dist: numpy (>=1,<2)
22
22
  Requires-Dist: onnx (>=1,<2)
23
23
  Requires-Dist: onnxruntime (>=1,<2)
24
24
  Requires-Dist: openai (>=1,<2)
25
- Requires-Dist: openai-whisper (==20231106)
25
+ Requires-Dist: openai-whisper (==20231117)
26
26
  Requires-Dist: python-slugify (>=8,<9)
27
27
  Requires-Dist: syrics (>=0,<1)
28
28
  Requires-Dist: torch (>1)
@@ -0,0 +1,61 @@
1
+ # This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
2
+ # Learn more: https://promptfoo.dev/docs/configuration/guide
3
+
4
+ description: Song lyric corrector for a karaoke video studio, responsible for reading lyrics inputs, correcting them and generating JSON-based responses containing the corrected lyrics according to predefined criteria.
5
+ providers:
6
+ - id: openai:gpt-3.5-turbo-1106
7
+ config:
8
+ temperature: 0
9
+ # - id: openai:gpt-4-1106-preview
10
+ # config:
11
+ # temperature: 0
12
+ prompts:
13
+ - file://llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt
14
+
15
+ defaultTest:
16
+ assert:
17
+ - type: is-json
18
+ value:
19
+ required: [id, text, words]
20
+ type: object
21
+ properties:
22
+ id:
23
+ type: number
24
+ text:
25
+ type: string
26
+ words:
27
+ type: array
28
+ items:
29
+ type: object
30
+ properties:
31
+ text:
32
+ type: string
33
+ start:
34
+ type: number
35
+ end:
36
+ type: number
37
+ confidence:
38
+ type: number
39
+
40
+ tests:
41
+ - description: ABBA - Under Attack (segment 0)
42
+ vars:
43
+ reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
44
+ previous_two_corrected_lines:
45
+ upcoming_two_uncorrected_lines:
46
+ segment_input: |
47
+ {"id": 0, "start": 17.46, "end": 21.3, "confidence": 0.792, "text": " Don't know how to take it, don't know where to go", "words": [{"text": "Don't", "start": 17.46, "end": 18.2, "confidence": 0.278}, {"text": "know", "start": 18.2, "end": 18.42, "confidence": 0.965}, {"text": "how", "start": 18.42, "end": 18.66, "confidence": 0.865}, {"text": "to", "start": 18.66, "end": 18.88, "confidence": 0.994}, {"text": "take", "start": 18.88, "end": 19.2, "confidence": 0.992}, {"text": "it,", "start": 19.2, "end": 19.44, "confidence": 0.974}, {"text": "don't", "start": 19.56, "end": 19.8, "confidence": 0.917}, {"text": "know", "start": 19.8, "end": 20.02, "confidence": 0.989}, {"text": "where", "start": 20.02, "end": 20.46, "confidence": 0.963}, {"text": "to", "start": 20.46, "end": 20.76, "confidence": 0.983}, {"text": "go", "start": 20.76, "end": 21.3, "confidence": 0.982}]}
48
+ assert:
49
+ - type: contains
50
+ value: "Don't know how to take it, don't know where to go"
51
+
52
+ - description: ABBA - Under Attack (segment 1)
53
+ vars:
54
+ reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
55
+ previous_two_corrected_lines:
56
+ upcoming_two_uncorrected_lines:
57
+ segment_input: |
58
+ {"id": 1, "start": 22.04, "end": 27.84, "confidence": 0.763, "text": " My resistance running low And every day the hole is getting tighter", "words": [{"text": "My", "start": 22.04, "end": 22.32, "confidence": 0.535}, {"text": "resistance", "start": 22.32, "end": 22.94, "confidence": 0.936}, {"text": "running", "start": 22.94, "end": 23.66, "confidence": 0.89}, {"text": "low", "start": 23.66, "end": 24.36, "confidence": 0.999}, {"text": "And", "start": 24.36, "end": 25.14, "confidence": 0.485}, {"text": "every", "start": 25.14, "end": 25.56, "confidence": 0.568}, {"text": "day", "start": 25.56, "end": 25.88, "confidence": 0.997}, {"text": "the", "start": 25.88, "end": 26.1, "confidence": 0.959}, {"text": "hole", "start": 26.1, "end": 26.48, "confidence": 0.361}, {"text": "is", "start": 26.48, "end": 26.68, "confidence": 0.947}, {"text": "getting", "start": 26.68, "end": 27.08, "confidence": 0.996}, {"text": "tighter", "start": 27.08, "end": 27.84, "confidence": 0.975}]}
59
+ assert:
60
+ - type: contains
61
+ value: "My resistance running low And every day the hold is getting tighter"
@@ -0,0 +1,48 @@
1
+ Don't know how to take it, don't know where to go
2
+ My resistance running low
3
+ And every day the hold is getting tighter and it troubles me so
4
+ (You know that I'm nobody's fool)
5
+ I'm nobody's fool and yet it's clear to me
6
+ I don't have a strategy
7
+ It's just like taking candy from a baby and I think I must be
8
+
9
+ Under attack, I'm being taken
10
+ About to crack, defences breaking
11
+ Won't somebody please have a heart
12
+ Come and rescue me now 'cause I'm falling apart
13
+ Under attack, I'm taking cover
14
+ He's on my track, my chasing lover
15
+ Thinking nothing can stop him now
16
+ Should I want to, I'm not sure I would know how
17
+
18
+ This is getting crazy, I should tell him so
19
+ Really let my anger show
20
+ Persuade him that the answer to his questions is a definite no
21
+ (I'm kind of flattered I suppose)
22
+ Guess I'm kind of flattered but I'm scared as well
23
+ Something like a magic spell
24
+ I hardly dare to think of what would happen, where I'd be if I fell
25
+
26
+ Under attack, I'm being taken
27
+ About to crack, defences breaking
28
+ Won't somebody please have a heart
29
+ Come and rescue me now 'cause I'm falling apart
30
+ Under attack, I'm taking cover
31
+ He's on my track, my chasing lover
32
+ Thinking nothing's gonna stop him now
33
+ Should I want to, I'm not sure I won't know how
34
+
35
+ Under attack, I'm being taken
36
+ About to crack, defences breaking
37
+ Won't somebody see and save a heart
38
+ Come and rescue me now 'cause I'm falling apart
39
+ Under attack, I'm taking cover
40
+ He's on my track, my chasing lover
41
+ Thinking nothing can stop him now
42
+ Should I want to, I'm not sure I would know how
43
+
44
+ Under attack, I'm being taken
45
+ About to crack, defences breaking
46
+ Won't somebody please have a heart
47
+ Come and rescue me now 'cause I'm falling apart
48
+ Under attack, I'm taking cover
@@ -76,17 +76,21 @@ class LyricsTranscriber:
76
76
 
77
77
  match video_resolution:
78
78
  case "4k":
79
- self.video_resolution_num = ("3840", "2160")
79
+ self.video_resolution_num = (3840, 2160)
80
80
  self.font_size = 250
81
+ self.line_height = 250
81
82
  case "1080p":
82
- self.video_resolution_num = ("1920", "1080")
83
- self.font_size = 140
83
+ self.video_resolution_num = (1920, 1080)
84
+ self.font_size = 120
85
+ self.line_height = 120
84
86
  case "720p":
85
- self.video_resolution_num = ("1280", "720")
87
+ self.video_resolution_num = (1280, 720)
86
88
  self.font_size = 100
89
+ self.line_height = 100
87
90
  case "360p":
88
- self.video_resolution_num = ("640", "360")
91
+ self.video_resolution_num = (640, 360)
89
92
  self.font_size = 50
93
+ self.line_height = 50
90
94
  case _:
91
95
  raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
92
96
 
@@ -299,14 +303,13 @@ class LyricsTranscriber:
299
303
  previous_two_corrected_lines = ""
300
304
  upcoming_two_uncorrected_lines = ""
301
305
 
302
- if segment["id"] > 2:
303
- for previous_segment in corrected_lyrics_dict["segments"]:
304
- if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
305
- previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
306
+ for previous_segment in corrected_lyrics_dict["segments"]:
307
+ if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
308
+ previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
306
309
 
307
- for next_segment in self.outputs["transcription_data_dict"]["segments"]:
308
- if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
309
- upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
310
+ for next_segment in self.outputs["transcription_data_dict"]["segments"]:
311
+ if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
312
+ upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
310
313
 
311
314
  llm_transcript_segment += f"--- Segment {segment['id']} / {total_segments} ---\n"
312
315
  llm_transcript_segment += f"Previous two corrected lines:\n\n{previous_two_corrected_lines}\nUpcoming two uncorrected lines:\n\n{upcoming_two_uncorrected_lines}\nData input:\n\n{segment_input}\n"
@@ -577,6 +580,8 @@ class LyricsTranscriber:
577
580
  if screen is None:
578
581
  self.logger.debug(f"screen is none, creating new LyricsScreen")
579
582
  screen = subtitles.LyricsScreen()
583
+ screen.video_size = self.video_resolution_num
584
+ screen.line_height = self.line_height
580
585
  if line is None:
581
586
  self.logger.debug(f"line is none, creating new LyricsLine")
582
587
  line = subtitles.LyricsLine()
@@ -1,6 +1,6 @@
1
1
  from dataclasses import dataclass, field
2
2
  from datetime import timedelta
3
- from typing import Dict, List, Optional
3
+ from typing import Dict, List, Optional, Tuple
4
4
  import json
5
5
  import itertools
6
6
  from pathlib import Path
@@ -13,14 +13,6 @@ from . import ass
13
13
  Functions for generating ASS subtitles from lyric data
14
14
  """
15
15
 
16
- VIDEO_SIZE = (400, 320)
17
- LINE_HEIGHT = 30
18
-
19
-
20
- class LyricMarker(IntEnum):
21
- SEGMENT_START = 1
22
- SEGMENT_END = 2
23
-
24
16
 
25
17
  class LyricSegmentIterator:
26
18
  def __init__(self, lyrics_segments: List[str]):
@@ -143,16 +135,17 @@ class LyricsLine:
143
135
  class LyricsScreen:
144
136
  lines: List[LyricsLine] = field(default_factory=list)
145
137
  start_ts: Optional[timedelta] = None
138
+ video_size: Tuple[int, int] = None
139
+ line_height: int = None
146
140
 
147
141
  @property
148
142
  def end_ts(self) -> timedelta:
149
143
  return self.lines[-1].end_ts
150
144
 
151
145
  def get_line_y(self, line_num: int) -> int:
152
- _, h = VIDEO_SIZE
146
+ _, h = self.video_size
153
147
  line_count = len(self.lines)
154
- line_height = LINE_HEIGHT
155
- return (h / 2) - (line_count * line_height / 2) + (line_num * line_height)
148
+ return (h / 2) - (line_count * self.line_height / 2) + (line_num * self.line_height)
156
149
 
157
150
  def as_ass_events(self, style: ass.ASS.Style) -> List[ass.ASS.Event]:
158
151
  return [line.as_ass_event(self.start_ts, self.end_ts, style, self.get_line_y(i)) for i, line in enumerate(self.lines)]
@@ -185,46 +178,6 @@ class LyricsObjectJSONEncoder(json.JSONEncoder):
185
178
  return super().default(o)
186
179
 
187
180
 
188
- def create_screens(logger, lyrics_segments, events_tuples):
189
- segments = iter(LyricSegmentIterator(lyrics_segments=lyrics_segments))
190
- events = iter(events_tuples)
191
- screens: List[LyricsScreen] = []
192
- prev_segment: Optional[LyricSegment] = None
193
- line: Optional[LyricsLine] = None
194
- screen: Optional[LyricsScreen] = None
195
-
196
- try:
197
- for event in events:
198
- ts = event[0]
199
- marker = event[1]
200
- if marker == LyricMarker.SEGMENT_START:
201
- segment_text: str = next(segments)
202
- segment = LyricSegment(segment_text, ts)
203
- if screen is None:
204
- screen = LyricsScreen()
205
- if line is None:
206
- line = LyricsLine()
207
- line.segments.append(segment)
208
- if segment_text.endswith("\n"):
209
- screen.lines.append(line)
210
- line = None
211
- if segment_text.endswith("\n\n"):
212
- screens.append(screen)
213
- screen = None
214
- prev_segment = segment
215
- elif marker == LyricMarker.SEGMENT_END:
216
- if prev_segment is not None:
217
- prev_segment.end_ts = ts
218
- if line is not None:
219
- screen.lines.append(line) # type: ignore[union-attr]
220
- if screen is not None and len(screen.lines) > 0:
221
- screens.append(screen) # type: ignore[arg-type]
222
- except StopIteration as si:
223
- logger.error(f"Reached end of segments before end of events. Events: {list(events)}, lyrics: {list(segments)}")
224
-
225
- return screens
226
-
227
-
228
181
  def set_segment_end_times(screens: List[LyricsScreen], song_duration_seconds: int) -> List[LyricsScreen]:
229
182
  """
230
183
  Infer end times of lines for screens where they are not already set.
@@ -254,10 +207,14 @@ def set_screen_start_times(screens: List[LyricsScreen]) -> List[LyricsScreen]:
254
207
  return screens
255
208
 
256
209
 
257
- def create_styled_subtitles(lyric_screens: List[LyricsScreen], resolution, fontsize) -> ass.ASS:
210
+ def create_styled_subtitles(
211
+ lyric_screens: List[LyricsScreen],
212
+ resolution,
213
+ fontsize,
214
+ ) -> ass.ASS:
258
215
  a = ass.ASS()
259
216
  a.set_resolution(resolution)
260
-
217
+
261
218
  a.styles_format = [
262
219
  "Name", # The name of the Style. Case sensitive. Cannot include commas.
263
220
  "Fontname", # The fontname as used by Windows. Case-sensitive.
@@ -278,9 +235,9 @@ def create_styled_subtitles(lyric_screens: List[LyricsScreen], resolution, fonts
278
235
  "Outline", # If BorderStyle is 1, then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
279
236
  "Shadow", # If BorderStyle is 1, then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
280
237
  "Alignment", # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
281
- "MarginL", #
282
- "MarginR", #
283
- "MarginV", #
238
+ "MarginL", # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
239
+ "MarginR", # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
240
+ "MarginV", # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
284
241
  "Encoding", #
285
242
  ]
286
243
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lyrics-transcriber"
3
- version = "0.12.7"
3
+ version = "0.12.9"
4
4
  description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
5
5
  authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
6
6
  license = "MIT"
@@ -26,7 +26,7 @@ lyricsgenius = "^3"
26
26
  python-slugify = "^8"
27
27
  syrics = "^0"
28
28
  openai = "^1"
29
- openai-whisper = "20231106"
29
+ openai-whisper = "20231117"
30
30
  whisper-timestamped = "^1"
31
31
  # Note: after adding openai-whisper and whisper-timestamped with poetry lock, I then removed all traces of triton
32
32
  # from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper.
@@ -1,39 +0,0 @@
1
- # This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
2
- # Learn more: https://promptfoo.dev/docs/configuration/guide
3
-
4
- prompts:
5
- - file://llm_prompt_lyrics_correction_*.txt
6
- providers: [openai:gpt-3.5-turbo-0613, openai:gpt-4-1106-preview]
7
- tests:
8
- - description: First test case - automatic review
9
- vars:
10
- var1: first variable's value
11
- var2: another value
12
- var3: some other value
13
- # For more information on assertions, see https://promptfoo.dev/docs/configuration/expected-outputs
14
- assert:
15
- - type: equals
16
- value: expected LLM output goes here
17
- - type: contains
18
- value: some text
19
- - type: javascript
20
- value: 1 / (output.length + 1) # prefer shorter outputs
21
-
22
- - description: Second test case - manual review
23
- # Test cases don't need assertions if you prefer to manually review the output
24
- vars:
25
- var1: new value
26
- var2: another value
27
- var3: third value
28
-
29
- - description: Third test case - other types of automatic review
30
- vars:
31
- var1: yet another value
32
- var2: and another
33
- var3: dear llm, please output your response in json format
34
- assert:
35
- - type: contains-json
36
- - type: similar
37
- value: ensures that output is semantically similar to this text
38
- - type: model-graded-closedqa
39
- value: ensure that output contains a reference to X