PyPI - lyrics-transcriber - Versions diffs - 0.12.7__tar.gz → 0.12.9__tar.gz - Mend

lyrics-transcriber 0.12.7tar.gz → 0.12.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lyrics-transcriber
-Version: 0.12.7
+Version: 0.12.9
 Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
 Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
 License: MIT
@@ -22,7 +22,7 @@ Requires-Dist: numpy (>=1,<2)
 Requires-Dist: onnx (>=1,<2)
 Requires-Dist: onnxruntime (>=1,<2)
 Requires-Dist: openai (>=1,<2)
-Requires-Dist: openai-whisper (==20231106)
+Requires-Dist: openai-whisper (==20231117)
 Requires-Dist: python-slugify (>=8,<9)
 Requires-Dist: syrics (>=0,<1)
 Requires-Dist: torch (>1)

lyrics_transcriber-0.12.9/lyrics_transcriber/llm_prompts/promptfooconfig.yaml ADDED Viewed

@@ -0,0 +1,61 @@
+# This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
+# Learn more: https://promptfoo.dev/docs/configuration/guide
+description: Song lyric corrector for a karaoke video studio, responsible for reading lyrics inputs, correcting them and generating JSON-based responses containing the corrected lyrics according to predefined criteria.
+providers:
+  - id: openai:gpt-3.5-turbo-1106
+    config:
+      temperature: 0
+  # - id: openai:gpt-4-1106-preview
+  #   config:
+  #     temperature: 0
+prompts:
+  - file://llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt
+defaultTest:
+  assert:
+    - type: is-json
+      value:
+        required: [id, text, words]
+        type: object
+        properties:
+          id:
+            type: number
+          text:
+            type: string
+          words:
+            type: array
+            items:
+              type: object
+              properties:
+                text:
+                  type: string
+                start:
+                  type: number
+                end:
+                  type: number
+                confidence:
+                  type: number
+tests:
+  - description: ABBA - Under Attack (segment 0)
+    vars:
+      reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
+      previous_two_corrected_lines:
+      upcoming_two_uncorrected_lines:
+      segment_input: |
+        {"id": 0, "start": 17.46, "end": 21.3, "confidence": 0.792, "text": " Don't know how to take it, don't know where to go", "words": [{"text": "Don't", "start": 17.46, "end": 18.2, "confidence": 0.278}, {"text": "know", "start": 18.2, "end": 18.42, "confidence": 0.965}, {"text": "how", "start": 18.42, "end": 18.66, "confidence": 0.865}, {"text": "to", "start": 18.66, "end": 18.88, "confidence": 0.994}, {"text": "take", "start": 18.88, "end": 19.2, "confidence": 0.992}, {"text": "it,", "start": 19.2, "end": 19.44, "confidence": 0.974}, {"text": "don't", "start": 19.56, "end": 19.8, "confidence": 0.917}, {"text": "know", "start": 19.8, "end": 20.02, "confidence": 0.989}, {"text": "where", "start": 20.02, "end": 20.46, "confidence": 0.963}, {"text": "to", "start": 20.46, "end": 20.76, "confidence": 0.983}, {"text": "go", "start": 20.76, "end": 21.3, "confidence": 0.982}]}
+    assert:
+      - type: contains
+        value: "Don't know how to take it, don't know where to go"
+  - description: ABBA - Under Attack (segment 1)
+    vars:
+      reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
+      previous_two_corrected_lines:
+      upcoming_two_uncorrected_lines:
+      segment_input: |
+        {"id": 1, "start": 22.04, "end": 27.84, "confidence": 0.763, "text": " My resistance running low And every day the hole is getting tighter", "words": [{"text": "My", "start": 22.04, "end": 22.32, "confidence": 0.535}, {"text": "resistance", "start": 22.32, "end": 22.94, "confidence": 0.936}, {"text": "running", "start": 22.94, "end": 23.66, "confidence": 0.89}, {"text": "low", "start": 23.66, "end": 24.36, "confidence": 0.999}, {"text": "And", "start": 24.36, "end": 25.14, "confidence": 0.485}, {"text": "every", "start": 25.14, "end": 25.56, "confidence": 0.568}, {"text": "day", "start": 25.56, "end": 25.88, "confidence": 0.997}, {"text": "the", "start": 25.88, "end": 26.1, "confidence": 0.959}, {"text": "hole", "start": 26.1, "end": 26.48, "confidence": 0.361}, {"text": "is", "start": 26.48, "end": 26.68, "confidence": 0.947}, {"text": "getting", "start": 26.68, "end": 27.08, "confidence": 0.996}, {"text": "tighter", "start": 27.08, "end": 27.84, "confidence": 0.975}]}
+    assert:
+      - type: contains
+        value: "My resistance running low And every day the hold is getting tighter"

lyrics_transcriber-0.12.9/lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt ADDED Viewed

@@ -0,0 +1,48 @@
+Don't know how to take it, don't know where to go
+My resistance running low
+And every day the hold is getting tighter and it troubles me so
+(You know that I'm nobody's fool)
+I'm nobody's fool and yet it's clear to me
+I don't have a strategy
+It's just like taking candy from a baby and I think I must be
+Under attack, I'm being taken
+About to crack, defences breaking
+Won't somebody please have a heart
+Come and rescue me now 'cause I'm falling apart
+Under attack, I'm taking cover
+He's on my track, my chasing lover
+Thinking nothing can stop him now
+Should I want to, I'm not sure I would know how
+This is getting crazy, I should tell him so
+Really let my anger show
+Persuade him that the answer to his questions is a definite no
+(I'm kind of flattered I suppose)
+Guess I'm kind of flattered but I'm scared as well
+Something like a magic spell
+I hardly dare to think of what would happen, where I'd be if I fell
+Under attack, I'm being taken
+About to crack, defences breaking
+Won't somebody please have a heart
+Come and rescue me now 'cause I'm falling apart
+Under attack, I'm taking cover
+He's on my track, my chasing lover
+Thinking nothing's gonna stop him now
+Should I want to, I'm not sure I won't know how
+Under attack, I'm being taken
+About to crack, defences breaking
+Won't somebody see and save a heart
+Come and rescue me now 'cause I'm falling apart
+Under attack, I'm taking cover
+He's on my track, my chasing lover
+Thinking nothing can stop him now
+Should I want to, I'm not sure I would know how
+Under attack, I'm being taken
+About to crack, defences breaking
+Won't somebody please have a heart
+Come and rescue me now 'cause I'm falling apart
+Under attack, I'm taking cover

{lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/transcriber.py RENAMED Viewed

@@ -76,17 +76,21 @@ class LyricsTranscriber:
         match video_resolution:
             case "4k":
-                self.video_resolution_num = ("3840", "2160")
+                self.video_resolution_num = (3840, 2160)
                 self.font_size = 250
+                self.line_height = 250
             case "1080p":
-                self.video_resolution_num = ("1920", "1080")
-                self.font_size = 140
+                self.video_resolution_num = (1920, 1080)
+                self.font_size = 120
+                self.line_height = 120
             case "720p":
-                self.video_resolution_num = ("1280", "720")
+                self.video_resolution_num = (1280, 720)
                 self.font_size = 100
+                self.line_height = 100
             case "360p":
-                self.video_resolution_num = ("640", "360")
+                self.video_resolution_num = (640, 360)
                 self.font_size = 50
+                self.line_height = 50
             case _:
                 raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
@@ -299,14 +303,13 @@ class LyricsTranscriber:
                 previous_two_corrected_lines = ""
                 upcoming_two_uncorrected_lines = ""
-                if segment["id"] > 2:
-                    for previous_segment in corrected_lyrics_dict["segments"]:
-                        if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
-                            previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
+                for previous_segment in corrected_lyrics_dict["segments"]:
+                    if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
+                        previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
-                    for next_segment in self.outputs["transcription_data_dict"]["segments"]:
-                        if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
-                            upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
+                for next_segment in self.outputs["transcription_data_dict"]["segments"]:
+                    if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
+                        upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
                 llm_transcript_segment += f"--- Segment {segment['id']} / {total_segments} ---\n"
                 llm_transcript_segment += f"Previous two corrected lines:\n\n{previous_two_corrected_lines}\nUpcoming two uncorrected lines:\n\n{upcoming_two_uncorrected_lines}\nData input:\n\n{segment_input}\n"
@@ -577,6 +580,8 @@ class LyricsTranscriber:
             if screen is None:
                 self.logger.debug(f"screen is none, creating new LyricsScreen")
                 screen = subtitles.LyricsScreen()
+                screen.video_size = self.video_resolution_num
+                screen.line_height = self.line_height
             if line is None:
                 self.logger.debug(f"line is none, creating new LyricsLine")
                 line = subtitles.LyricsLine()

{lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/lyrics_transcriber/utils/subtitles.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from dataclasses import dataclass, field
 from datetime import timedelta
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple
 import json
 import itertools
 from pathlib import Path
@@ -13,14 +13,6 @@ from . import ass
 Functions for generating ASS subtitles from lyric data
 """
-VIDEO_SIZE = (400, 320)
-LINE_HEIGHT = 30
-class LyricMarker(IntEnum):
-    SEGMENT_START = 1
-    SEGMENT_END = 2
 class LyricSegmentIterator:
     def __init__(self, lyrics_segments: List[str]):
@@ -143,16 +135,17 @@ class LyricsLine:
 class LyricsScreen:
     lines: List[LyricsLine] = field(default_factory=list)
     start_ts: Optional[timedelta] = None
+    video_size: Tuple[int, int] = None
+    line_height: int = None
     @property
     def end_ts(self) -> timedelta:
         return self.lines[-1].end_ts
     def get_line_y(self, line_num: int) -> int:
-        _, h = VIDEO_SIZE
+        _, h = self.video_size
         line_count = len(self.lines)
-        line_height = LINE_HEIGHT
-        return (h / 2) - (line_count * line_height / 2) + (line_num * line_height)
+        return (h / 2) - (line_count * self.line_height / 2) + (line_num * self.line_height)
     def as_ass_events(self, style: ass.ASS.Style) -> List[ass.ASS.Event]:
         return [line.as_ass_event(self.start_ts, self.end_ts, style, self.get_line_y(i)) for i, line in enumerate(self.lines)]
@@ -185,46 +178,6 @@ class LyricsObjectJSONEncoder(json.JSONEncoder):
         return super().default(o)
-def create_screens(logger, lyrics_segments, events_tuples):
-    segments = iter(LyricSegmentIterator(lyrics_segments=lyrics_segments))
-    events = iter(events_tuples)
-    screens: List[LyricsScreen] = []
-    prev_segment: Optional[LyricSegment] = None
-    line: Optional[LyricsLine] = None
-    screen: Optional[LyricsScreen] = None
-    try:
-        for event in events:
-            ts = event[0]
-            marker = event[1]
-            if marker == LyricMarker.SEGMENT_START:
-                segment_text: str = next(segments)
-                segment = LyricSegment(segment_text, ts)
-                if screen is None:
-                    screen = LyricsScreen()
-                if line is None:
-                    line = LyricsLine()
-                line.segments.append(segment)
-                if segment_text.endswith("\n"):
-                    screen.lines.append(line)
-                    line = None
-                if segment_text.endswith("\n\n"):
-                    screens.append(screen)
-                    screen = None
-                prev_segment = segment
-            elif marker == LyricMarker.SEGMENT_END:
-                if prev_segment is not None:
-                    prev_segment.end_ts = ts
-        if line is not None:
-            screen.lines.append(line)  # type: ignore[union-attr]
-        if screen is not None and len(screen.lines) > 0:
-            screens.append(screen)  # type: ignore[arg-type]
-    except StopIteration as si:
-        logger.error(f"Reached end of segments before end of events. Events: {list(events)}, lyrics: {list(segments)}")
-    return screens
 def set_segment_end_times(screens: List[LyricsScreen], song_duration_seconds: int) -> List[LyricsScreen]:
     """
     Infer end times of lines for screens where they are not already set.
@@ -254,10 +207,14 @@ def set_screen_start_times(screens: List[LyricsScreen]) -> List[LyricsScreen]:
     return screens
-def create_styled_subtitles(lyric_screens: List[LyricsScreen], resolution, fontsize) -> ass.ASS:
+def create_styled_subtitles(
+    lyric_screens: List[LyricsScreen],
+    resolution,
+    fontsize,
+) -> ass.ASS:
     a = ass.ASS()
     a.set_resolution(resolution)
     a.styles_format = [
         "Name",  # The name of the Style. Case sensitive. Cannot include commas.
         "Fontname",  # The fontname as used by Windows. Case-sensitive.
@@ -278,9 +235,9 @@ def create_styled_subtitles(lyric_screens: List[LyricsScreen], resolution, fonts
         "Outline",  # If BorderStyle is 1,  then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
         "Shadow",  # If BorderStyle is 1,  then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
         "Alignment",  # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
-        "MarginL",  #
-        "MarginR",  #
-        "MarginV",  #
+        "MarginL",  # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
+        "MarginR",  # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
+        "MarginV",  # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
         "Encoding",  #
     ]

{lyrics_transcriber-0.12.7 → lyrics_transcriber-0.12.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "lyrics-transcriber"
-version = "0.12.7"
+version = "0.12.9"
 description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"
@@ -26,7 +26,7 @@ lyricsgenius = "^3"
 python-slugify = "^8"
 syrics = "^0"
 openai = "^1"
-openai-whisper = "20231106"
+openai-whisper = "20231117"
 whisper-timestamped = "^1"
 # Note: after adding openai-whisper and whisper-timestamped with poetry lock, I then removed all traces of triton
 # from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper.

lyrics_transcriber-0.12.7/lyrics_transcriber/llm_prompts/promptfooconfig.yaml DELETED Viewed

@@ -1,39 +0,0 @@
-# This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
-# Learn more: https://promptfoo.dev/docs/configuration/guide
-prompts:
-  - file://llm_prompt_lyrics_correction_*.txt
-providers: [openai:gpt-3.5-turbo-0613, openai:gpt-4-1106-preview]
-tests:
-  - description: First test case - automatic review
-    vars:
-      var1: first variable's value
-      var2: another value
-      var3: some other value
-    # For more information on assertions, see https://promptfoo.dev/docs/configuration/expected-outputs
-    assert:
-      - type: equals
-        value: expected LLM output goes here
-      - type: contains
-        value: some text
-      - type: javascript
-        value: 1 / (output.length + 1)  # prefer shorter outputs
-  - description: Second test case - manual review
-    # Test cases don't need assertions if you prefer to manually review the output
-    vars:
-      var1: new value
-      var2: another value
-      var3: third value
-  - description: Third test case - other types of automatic review
-    vars:
-      var1: yet another value
-      var2: and another
-      var3: dear llm, please output your response in json format
-    assert:
-      - type: contains-json
-      - type: similar
-        value: ensures that output is semantically similar to this text
-      - type: model-graded-closedqa
-        value: ensure that output contains a reference to X