karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1965 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.27.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.27.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,2 @@
1
+ """Human feedback collection system for continuous improvement."""
2
+
@@ -0,0 +1,107 @@
1
+ """Schemas for correction annotations and human feedback."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional, List, Dict, Any
6
+ from pydantic import BaseModel, Field
7
+ from enum import Enum
8
+ from datetime import datetime
9
+ import uuid
10
+
11
+
12
+ class CorrectionAnnotationType(str, Enum):
13
+ """Types of corrections that can be annotated."""
14
+ PUNCTUATION_ONLY = "PUNCTUATION_ONLY"
15
+ SOUND_ALIKE = "SOUND_ALIKE"
16
+ BACKGROUND_VOCALS = "BACKGROUND_VOCALS"
17
+ EXTRA_WORDS = "EXTRA_WORDS"
18
+ REPEATED_SECTION = "REPEATED_SECTION"
19
+ COMPLEX_MULTI_ERROR = "COMPLEX_MULTI_ERROR"
20
+ AMBIGUOUS = "AMBIGUOUS"
21
+ NO_ERROR = "NO_ERROR"
22
+ MANUAL_EDIT = "MANUAL_EDIT" # Human-initiated edit not from gap
23
+
24
+
25
+ class CorrectionAction(str, Enum):
26
+ """Actions that can be taken for corrections."""
27
+ NO_ACTION = "NO_ACTION"
28
+ REPLACE = "REPLACE"
29
+ DELETE = "DELETE"
30
+ INSERT = "INSERT"
31
+ MERGE = "MERGE"
32
+ SPLIT = "SPLIT"
33
+ FLAG = "FLAG"
34
+
35
+
36
+ class CorrectionAnnotation(BaseModel):
37
+ """Annotation for a manual correction made by a human."""
38
+
39
+ annotation_id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique identifier")
40
+ audio_hash: str = Field(..., description="Hash of the audio file")
41
+ gap_id: Optional[str] = Field(None, description="Gap ID if this correction is for a gap")
42
+
43
+ # Classification
44
+ annotation_type: CorrectionAnnotationType = Field(..., description="Type of correction")
45
+ action_taken: CorrectionAction = Field(..., description="Action that was taken")
46
+
47
+ # Content
48
+ original_text: str = Field(..., description="Original transcribed text")
49
+ corrected_text: str = Field(..., description="Corrected text after human edit")
50
+
51
+ # Metadata
52
+ confidence: float = Field(..., ge=1.0, le=5.0, description="Human confidence rating (1-5)")
53
+ reasoning: str = Field(..., min_length=10, description="Human explanation for the correction")
54
+ word_ids_affected: List[str] = Field(default_factory=list, description="Word IDs involved in correction")
55
+
56
+ # Agentic AI comparison
57
+ agentic_proposal: Optional[Dict[str, Any]] = Field(None, description="What the AI suggested (if applicable)")
58
+ agentic_category: Optional[str] = Field(None, description="Category the AI classified this as")
59
+ agentic_agreed: bool = Field(False, description="Whether human agreed with AI proposal")
60
+
61
+ # Reference lyrics
62
+ reference_sources_consulted: List[str] = Field(default_factory=list, description="Which reference sources were used")
63
+
64
+ # Song metadata
65
+ artist: str = Field(..., description="Song artist")
66
+ title: str = Field(..., description="Song title")
67
+ session_id: str = Field(..., description="Correction session ID")
68
+
69
+ # Timestamp
70
+ timestamp: datetime = Field(default_factory=datetime.utcnow, description="When annotation was created")
71
+
72
+ class Config:
73
+ json_schema_extra = {
74
+ "example": {
75
+ "annotation_id": "550e8400-e29b-41d4-a716-446655440000",
76
+ "audio_hash": "abc123",
77
+ "gap_id": "gap_1",
78
+ "annotation_type": "sound_alike",
79
+ "action_taken": "REPLACE",
80
+ "original_text": "out I'm starting over",
81
+ "corrected_text": "now I'm starting over",
82
+ "confidence": 5.0,
83
+ "reasoning": "The word 'out' sounds like 'now' but the reference lyrics and context make it clear it should be 'now'",
84
+ "word_ids_affected": ["word_123"],
85
+ "agentic_proposal": {"action": "ReplaceWord", "replacement_text": "now"},
86
+ "agentic_category": "sound_alike",
87
+ "agentic_agreed": True,
88
+ "reference_sources_consulted": ["genius", "spotify"],
89
+ "artist": "Rancid",
90
+ "title": "Time Bomb",
91
+ "session_id": "session_abc",
92
+ "timestamp": "2025-01-01T12:00:00"
93
+ }
94
+ }
95
+
96
+
97
+ class AnnotationStatistics(BaseModel):
98
+ """Aggregated statistics from annotations."""
99
+
100
+ total_annotations: int = 0
101
+ annotations_by_type: Dict[str, int] = Field(default_factory=dict)
102
+ annotations_by_action: Dict[str, int] = Field(default_factory=dict)
103
+ average_confidence: float = 0.0
104
+ agentic_agreement_rate: float = 0.0
105
+ most_common_errors: List[Dict[str, Any]] = Field(default_factory=list)
106
+ songs_annotated: int = 0
107
+
@@ -0,0 +1,236 @@
1
+ """Storage backend for correction annotations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from pathlib import Path
8
+ from typing import List, Dict, Any, Optional
9
+ from datetime import datetime
10
+ from collections import Counter, defaultdict
11
+
12
+ from .schemas import CorrectionAnnotation, AnnotationStatistics
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class FeedbackStore:
18
+ """Stores correction annotations in JSONL format."""
19
+
20
+ def __init__(self, storage_dir: str = "cache"):
21
+ """Initialize feedback store.
22
+
23
+ Args:
24
+ storage_dir: Directory to store annotations file
25
+ """
26
+ self.storage_dir = Path(storage_dir)
27
+ self.storage_dir.mkdir(parents=True, exist_ok=True)
28
+ self.annotations_file = self.storage_dir / "correction_annotations.jsonl"
29
+
30
+ # Ensure file exists
31
+ if not self.annotations_file.exists():
32
+ self.annotations_file.touch()
33
+ logger.info(f"Created annotations file: {self.annotations_file}")
34
+
35
+ def save_annotation(self, annotation: CorrectionAnnotation) -> bool:
36
+ """Save a single annotation to the JSONL file.
37
+
38
+ Args:
39
+ annotation: CorrectionAnnotation to save
40
+
41
+ Returns:
42
+ True if successful, False otherwise
43
+ """
44
+ try:
45
+ # Convert to dict and handle datetime serialization
46
+ data = annotation.model_dump()
47
+ data['timestamp'] = data['timestamp'].isoformat()
48
+
49
+ # Append to JSONL file
50
+ with open(self.annotations_file, 'a', encoding='utf-8') as f:
51
+ f.write(json.dumps(data, ensure_ascii=False) + '\n')
52
+
53
+ logger.debug(f"Saved annotation {annotation.annotation_id}")
54
+ return True
55
+
56
+ except Exception as e:
57
+ logger.error(f"Failed to save annotation: {e}")
58
+ return False
59
+
60
+ def save_annotations(self, annotations: List[CorrectionAnnotation]) -> int:
61
+ """Save multiple annotations.
62
+
63
+ Args:
64
+ annotations: List of annotations to save
65
+
66
+ Returns:
67
+ Number of annotations successfully saved
68
+ """
69
+ saved = 0
70
+ for annotation in annotations:
71
+ if self.save_annotation(annotation):
72
+ saved += 1
73
+ return saved
74
+
75
+ def get_all_annotations(self) -> List[CorrectionAnnotation]:
76
+ """Load all annotations from the JSONL file.
77
+
78
+ Returns:
79
+ List of CorrectionAnnotation objects
80
+ """
81
+ annotations = []
82
+
83
+ if not self.annotations_file.exists():
84
+ return annotations
85
+
86
+ try:
87
+ with open(self.annotations_file, 'r', encoding='utf-8') as f:
88
+ for line_num, line in enumerate(f, 1):
89
+ line = line.strip()
90
+ if not line:
91
+ continue
92
+
93
+ try:
94
+ data = json.loads(line)
95
+ # Parse timestamp if string
96
+ if isinstance(data.get('timestamp'), str):
97
+ data['timestamp'] = datetime.fromisoformat(data['timestamp'])
98
+
99
+ annotation = CorrectionAnnotation.model_validate(data)
100
+ annotations.append(annotation)
101
+
102
+ except Exception as e:
103
+ logger.warning(f"Failed to parse annotation on line {line_num}: {e}")
104
+ continue
105
+
106
+ logger.debug(f"Loaded {len(annotations)} annotations")
107
+ return annotations
108
+
109
+ except Exception as e:
110
+ logger.error(f"Failed to load annotations: {e}")
111
+ return []
112
+
113
+ def get_annotations_by_song(self, audio_hash: str) -> List[CorrectionAnnotation]:
114
+ """Get all annotations for a specific song.
115
+
116
+ Args:
117
+ audio_hash: Hash of the audio file
118
+
119
+ Returns:
120
+ List of annotations for that song
121
+ """
122
+ all_annotations = self.get_all_annotations()
123
+ return [a for a in all_annotations if a.audio_hash == audio_hash]
124
+
125
+ def get_annotations_by_category(self, category: str) -> List[CorrectionAnnotation]:
126
+ """Get all annotations of a specific type.
127
+
128
+ Args:
129
+ category: Annotation type category
130
+
131
+ Returns:
132
+ List of annotations of that type
133
+ """
134
+ all_annotations = self.get_all_annotations()
135
+ return [a for a in all_annotations if a.annotation_type == category]
136
+
137
+ def get_statistics(self) -> AnnotationStatistics:
138
+ """Generate aggregated statistics from all annotations.
139
+
140
+ Returns:
141
+ AnnotationStatistics object with aggregated data
142
+ """
143
+ annotations = self.get_all_annotations()
144
+
145
+ if not annotations:
146
+ return AnnotationStatistics()
147
+
148
+ # Count by type
149
+ type_counts = Counter(a.annotation_type for a in annotations)
150
+
151
+ # Count by action
152
+ action_counts = Counter(a.action_taken for a in annotations)
153
+
154
+ # Average confidence
155
+ avg_confidence = sum(a.confidence for a in annotations) / len(annotations)
156
+
157
+ # Agentic agreement rate
158
+ agentic_proposals = [a for a in annotations if a.agentic_proposal is not None]
159
+ if agentic_proposals:
160
+ agentic_agreement_rate = sum(1 for a in agentic_proposals if a.agentic_agreed) / len(agentic_proposals)
161
+ else:
162
+ agentic_agreement_rate = 0.0
163
+
164
+ # Most common error patterns
165
+ error_patterns = defaultdict(list)
166
+ for a in annotations:
167
+ if a.action_taken != "NO_ACTION":
168
+ pattern = f"{a.original_text} -> {a.corrected_text}"
169
+ error_patterns[pattern].append(a)
170
+
171
+ most_common = [
172
+ {
173
+ "pattern": pattern,
174
+ "count": len(anns),
175
+ "annotation_type": anns[0].annotation_type
176
+ }
177
+ for pattern, anns in sorted(error_patterns.items(), key=lambda x: len(x[1]), reverse=True)[:10]
178
+ ]
179
+
180
+ # Unique songs
181
+ unique_hashes = set(a.audio_hash for a in annotations)
182
+
183
+ return AnnotationStatistics(
184
+ total_annotations=len(annotations),
185
+ annotations_by_type={k: v for k, v in type_counts.items()},
186
+ annotations_by_action={k: v for k, v in action_counts.items()},
187
+ average_confidence=avg_confidence,
188
+ agentic_agreement_rate=agentic_agreement_rate,
189
+ most_common_errors=most_common,
190
+ songs_annotated=len(unique_hashes)
191
+ )
192
+
193
+ def export_to_training_data(self, output_file: Optional[Path] = None) -> Path:
194
+ """Export annotations in a format suitable for model fine-tuning.
195
+
196
+ Args:
197
+ output_file: Optional path for output file
198
+
199
+ Returns:
200
+ Path to the exported file
201
+ """
202
+ if output_file is None:
203
+ output_file = self.storage_dir / "training_data.jsonl"
204
+
205
+ annotations = self.get_all_annotations()
206
+
207
+ # Filter to high-confidence annotations (4-5 rating)
208
+ high_confidence = [a for a in annotations if a.confidence >= 4.0]
209
+
210
+ with open(output_file, 'w', encoding='utf-8') as f:
211
+ for annotation in high_confidence:
212
+ # Create a training example with input/output format
213
+ training_example = {
214
+ "input": {
215
+ "original_text": annotation.original_text,
216
+ "annotation_type": annotation.annotation_type,
217
+ "artist": annotation.artist,
218
+ "title": annotation.title,
219
+ "reference_sources": annotation.reference_sources_consulted
220
+ },
221
+ "output": {
222
+ "action": annotation.action_taken,
223
+ "corrected_text": annotation.corrected_text,
224
+ "reasoning": annotation.reasoning
225
+ },
226
+ "metadata": {
227
+ "confidence": annotation.confidence,
228
+ "annotation_id": annotation.annotation_id,
229
+ "timestamp": annotation.timestamp.isoformat()
230
+ }
231
+ }
232
+ f.write(json.dumps(training_example, ensure_ascii=False) + '\n')
233
+
234
+ logger.info(f"Exported {len(high_confidence)} training examples to {output_file}")
235
+ return output_file
236
+
File without changes
@@ -0,0 +1,52 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Optional, Tuple, Dict, Any
3
+ import logging
4
+
5
+ from lyrics_transcriber.types import GapSequence, WordCorrection
6
+
7
+
8
+ class GapCorrectionHandler(ABC):
9
+ """Base class for gap correction handlers."""
10
+
11
+ def __init__(self, logger: Optional[logging.Logger] = None):
12
+ self.logger = logger or logging.getLogger(__name__)
13
+
14
+ @abstractmethod
15
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
16
+ """Check if this handler can process the given gap.
17
+
18
+ Args:
19
+ gap: The gap sequence to check
20
+ data: Optional dictionary containing additional data like word_map
21
+
22
+ Returns:
23
+ Tuple of (can_handle, handler_data)
24
+ """
25
+ pass
26
+
27
+ @abstractmethod
28
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
29
+ """Process the gap and return any corrections.
30
+
31
+ Args:
32
+ gap: The gap sequence to process
33
+ data: Optional dictionary containing additional data like word_map
34
+
35
+ Returns:
36
+ List of corrections to apply
37
+ """
38
+ pass
39
+
40
+ def _validate_data(self, data: Optional[Dict[str, Any]]) -> bool:
41
+ """Validate that required data is present.
42
+
43
+ Args:
44
+ data: The data dictionary to validate
45
+
46
+ Returns:
47
+ True if data is valid, False otherwise
48
+ """
49
+ if not data or "word_map" not in data:
50
+ self.logger.error("No word_map provided in data")
51
+ return False
52
+ return True
@@ -0,0 +1,149 @@
1
+ from typing import List, Optional, Tuple, Dict, Any
2
+ import logging
3
+
4
+ from lyrics_transcriber.types import GapSequence, WordCorrection, Word
5
+ from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
6
+ from lyrics_transcriber.correction.handlers.word_operations import WordOperations
7
+
8
+
9
+ class ExtendAnchorHandler(GapCorrectionHandler):
10
+ """Handles gaps where some words match reference text but there are extra words.
11
+
12
+ This handler looks for cases where:
13
+ 1. One or more words in the gap match words in the same position in at least one reference source
14
+ 2. The gap may contain additional words that aren't in the reference
15
+
16
+ When such matches are found, it:
17
+ 1. Validates all matching words (creates corrections that keep the same words)
18
+ 2. Leaves all non-matching words unchanged for other handlers to process
19
+
20
+ The confidence of validations is based on the ratio of reference sources that agree.
21
+ For example, if 2 out of 4 sources have the matching word, confidence will be 0.5.
22
+
23
+ Examples:
24
+ Gap: "hello world extra words"
25
+ References:
26
+ genius: ["hello", "world"]
27
+ spotify: ["hello", "world"]
28
+ Result:
29
+ - Validate "hello" (confidence=1.0)
30
+ - Validate "world" (confidence=1.0)
31
+ - Leave "extra" and "words" unchanged
32
+
33
+ Gap: "martyr youre a"
34
+ References:
35
+ genius: ["martyr"]
36
+ spotify: ["mother"]
37
+ Result:
38
+ - Validate "martyr" (confidence=0.5, source="genius")
39
+ - Leave "youre" and "a" unchanged
40
+ """
41
+
42
+ def __init__(self, logger: Optional[logging.Logger] = None):
43
+ self.logger = logger or logging.getLogger(__name__)
44
+
45
+ def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
46
+ """Check if this gap can be handled by extending anchor sequences."""
47
+ # Must have reference word IDs
48
+ if not gap.reference_word_ids:
49
+ self.logger.debug("No reference word IDs available.")
50
+ return False, {}
51
+
52
+ # Gap must have word IDs
53
+ if not gap.transcribed_word_ids:
54
+ self.logger.debug("No word IDs in the gap to process.")
55
+ return False, {}
56
+
57
+ # Must have word map to resolve IDs to actual words
58
+ if not self._validate_data(data):
59
+ return False, {}
60
+
61
+ word_map = data["word_map"]
62
+
63
+ # At least one word must match between gap and any reference source by text content
64
+ has_match = False
65
+ for i, trans_word_id in enumerate(gap.transcribed_word_ids):
66
+ if trans_word_id not in word_map:
67
+ continue
68
+ trans_word = word_map[trans_word_id]
69
+
70
+ # Check if this word matches any reference word at the same position
71
+ for ref_word_ids in gap.reference_word_ids.values():
72
+ if i < len(ref_word_ids):
73
+ ref_word_id = ref_word_ids[i]
74
+ if ref_word_id in word_map:
75
+ ref_word = word_map[ref_word_id]
76
+ if trans_word.text.lower() == ref_word.text.lower():
77
+ has_match = True
78
+ break
79
+ if has_match:
80
+ break
81
+
82
+ self.logger.debug(f"Can handle gap: {has_match}")
83
+ return has_match, {"word_map": word_map}
84
+
85
+ def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
86
+ corrections = []
87
+
88
+ # Get word lookup map from data
89
+ if not self._validate_data(data):
90
+ return []
91
+
92
+ word_map = data["word_map"]
93
+
94
+ # Process each word in the gap that has a corresponding reference position
95
+ for i, word_id in enumerate(gap.transcribed_word_ids):
96
+ # Get the actual word object
97
+ if word_id not in word_map:
98
+ self.logger.error(f"Word ID {word_id} not found in word_map")
99
+ continue
100
+ word = word_map[word_id]
101
+
102
+ # Find reference sources that have a matching word (by text) at this position
103
+ matching_sources = []
104
+ corrected_word_id = None
105
+
106
+ for source, ref_word_ids in gap.reference_word_ids.items():
107
+ if i < len(ref_word_ids):
108
+ ref_word_id = ref_word_ids[i]
109
+ if ref_word_id in word_map:
110
+ ref_word = word_map[ref_word_id]
111
+ if word.text.lower() == ref_word.text.lower():
112
+ matching_sources.append(source)
113
+ if corrected_word_id is None:
114
+ corrected_word_id = ref_word_id
115
+
116
+ if not matching_sources:
117
+ self.logger.debug(f"Skipping word '{word.text}' at position {i} - no matching references")
118
+ continue
119
+
120
+ # Word matches reference(s) at this position - validate it
121
+ confidence = len(matching_sources) / len(gap.reference_word_ids)
122
+ sources = ", ".join(matching_sources)
123
+
124
+ # Get base reference positions
125
+ base_reference_positions = WordOperations.calculate_reference_positions(gap, matching_sources)
126
+
127
+ # Adjust reference positions based on the word's position in the reference text
128
+ reference_positions = {}
129
+ for source in matching_sources:
130
+ if source in base_reference_positions:
131
+ reference_positions[source] = base_reference_positions[source] + i
132
+
133
+ corrections.append(
134
+ WordOperations.create_word_replacement_correction(
135
+ original_word=word.text,
136
+ corrected_word=word.text,
137
+ original_position=gap.transcription_position + i,
138
+ source=sources,
139
+ confidence=confidence,
140
+ reason="Matched reference source(s)",
141
+ reference_positions=reference_positions,
142
+ handler="ExtendAnchorHandler",
143
+ original_word_id=word_id,
144
+ corrected_word_id=corrected_word_id,
145
+ )
146
+ )
147
+ self.logger.debug(f"Validated word '{word.text}' with confidence {confidence} from sources: {sources}")
148
+
149
+ return corrections