karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. karaoke_gen/audio_fetcher.py +461 -0
  2. karaoke_gen/audio_processor.py +407 -30
  3. karaoke_gen/config.py +62 -113
  4. karaoke_gen/file_handler.py +32 -59
  5. karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
  6. karaoke_gen/karaoke_gen.py +270 -61
  7. karaoke_gen/lyrics_processor.py +13 -1
  8. karaoke_gen/metadata.py +78 -73
  9. karaoke_gen/pipeline/__init__.py +87 -0
  10. karaoke_gen/pipeline/base.py +215 -0
  11. karaoke_gen/pipeline/context.py +230 -0
  12. karaoke_gen/pipeline/executors/__init__.py +21 -0
  13. karaoke_gen/pipeline/executors/local.py +159 -0
  14. karaoke_gen/pipeline/executors/remote.py +257 -0
  15. karaoke_gen/pipeline/stages/__init__.py +27 -0
  16. karaoke_gen/pipeline/stages/finalize.py +202 -0
  17. karaoke_gen/pipeline/stages/render.py +165 -0
  18. karaoke_gen/pipeline/stages/screens.py +139 -0
  19. karaoke_gen/pipeline/stages/separation.py +191 -0
  20. karaoke_gen/pipeline/stages/transcription.py +191 -0
  21. karaoke_gen/style_loader.py +531 -0
  22. karaoke_gen/utils/bulk_cli.py +6 -0
  23. karaoke_gen/utils/cli_args.py +424 -0
  24. karaoke_gen/utils/gen_cli.py +26 -261
  25. karaoke_gen/utils/remote_cli.py +1815 -0
  26. karaoke_gen/video_background_processor.py +351 -0
  27. karaoke_gen-0.71.23.dist-info/METADATA +610 -0
  28. karaoke_gen-0.71.23.dist-info/RECORD +275 -0
  29. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
  30. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
  31. lyrics_transcriber/__init__.py +10 -0
  32. lyrics_transcriber/cli/__init__.py +0 -0
  33. lyrics_transcriber/cli/cli_main.py +285 -0
  34. lyrics_transcriber/core/__init__.py +0 -0
  35. lyrics_transcriber/core/config.py +50 -0
  36. lyrics_transcriber/core/controller.py +520 -0
  37. lyrics_transcriber/correction/__init__.py +0 -0
  38. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  39. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  40. lyrics_transcriber/correction/agentic/agent.py +313 -0
  41. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  42. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  43. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  44. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  45. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  46. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  47. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  48. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  49. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  50. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  51. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  52. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  53. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  54. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  55. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  56. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  57. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  58. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  59. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  60. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  61. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  62. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  63. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  64. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  65. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  66. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  67. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  68. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  69. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  70. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  71. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  72. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  73. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  74. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  75. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  76. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  77. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  78. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  79. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  80. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  81. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  82. lyrics_transcriber/correction/agentic/router.py +35 -0
  83. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  84. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  85. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  86. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  87. lyrics_transcriber/correction/anchor_sequence.py +1043 -0
  88. lyrics_transcriber/correction/corrector.py +760 -0
  89. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  90. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  91. lyrics_transcriber/correction/feedback/store.py +236 -0
  92. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  93. lyrics_transcriber/correction/handlers/base.py +52 -0
  94. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  95. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  96. lyrics_transcriber/correction/handlers/llm.py +293 -0
  97. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  98. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  99. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  100. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  101. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  102. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  103. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  104. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  105. lyrics_transcriber/correction/operations.py +352 -0
  106. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  107. lyrics_transcriber/correction/text_utils.py +30 -0
  108. lyrics_transcriber/frontend/.gitignore +23 -0
  109. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  110. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  111. lyrics_transcriber/frontend/README.md +50 -0
  112. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  113. lyrics_transcriber/frontend/__init__.py +25 -0
  114. lyrics_transcriber/frontend/eslint.config.js +28 -0
  115. lyrics_transcriber/frontend/index.html +18 -0
  116. lyrics_transcriber/frontend/package.json +42 -0
  117. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  118. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  119. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  120. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  121. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  122. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  123. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  124. lyrics_transcriber/frontend/src/App.tsx +212 -0
  125. lyrics_transcriber/frontend/src/api.ts +239 -0
  126. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  127. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  128. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  129. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  130. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  131. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  132. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  133. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  134. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  135. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  136. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  137. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  138. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  139. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  140. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  141. lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
  142. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
  143. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  144. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  145. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  146. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  147. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  148. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
  149. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  150. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  151. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  152. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  153. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  154. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  155. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  157. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  158. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  159. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  160. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  161. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  162. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  163. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  164. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  165. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  166. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  167. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  168. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  169. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  170. lyrics_transcriber/frontend/src/main.tsx +17 -0
  171. lyrics_transcriber/frontend/src/theme.ts +177 -0
  172. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  173. lyrics_transcriber/frontend/src/types.js +2 -0
  174. lyrics_transcriber/frontend/src/types.ts +199 -0
  175. lyrics_transcriber/frontend/src/validation.ts +132 -0
  176. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  177. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  178. lyrics_transcriber/frontend/tsconfig.json +25 -0
  179. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  180. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  181. lyrics_transcriber/frontend/update_version.js +11 -0
  182. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  183. lyrics_transcriber/frontend/vite.config.js +10 -0
  184. lyrics_transcriber/frontend/vite.config.ts +11 -0
  185. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  186. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  187. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  188. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
  189. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
  190. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  191. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  192. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  193. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  194. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  195. lyrics_transcriber/frontend/yarn.lock +3752 -0
  196. lyrics_transcriber/lyrics/__init__.py +0 -0
  197. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  198. lyrics_transcriber/lyrics/file_provider.py +95 -0
  199. lyrics_transcriber/lyrics/genius.py +384 -0
  200. lyrics_transcriber/lyrics/lrclib.py +231 -0
  201. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  202. lyrics_transcriber/lyrics/spotify.py +290 -0
  203. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  204. lyrics_transcriber/output/__init__.py +0 -0
  205. lyrics_transcriber/output/ass/__init__.py +21 -0
  206. lyrics_transcriber/output/ass/ass.py +2088 -0
  207. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  208. lyrics_transcriber/output/ass/config.py +180 -0
  209. lyrics_transcriber/output/ass/constants.py +23 -0
  210. lyrics_transcriber/output/ass/event.py +94 -0
  211. lyrics_transcriber/output/ass/formatters.py +132 -0
  212. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  213. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  214. lyrics_transcriber/output/ass/section_detector.py +89 -0
  215. lyrics_transcriber/output/ass/section_screen.py +106 -0
  216. lyrics_transcriber/output/ass/style.py +187 -0
  217. lyrics_transcriber/output/cdg.py +619 -0
  218. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  219. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  220. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  221. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  222. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  223. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  224. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  225. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  226. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  227. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  228. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  229. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  230. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  231. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  232. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  233. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  234. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  235. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  236. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  237. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  238. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  239. lyrics_transcriber/output/countdown_processor.py +267 -0
  240. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  241. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  242. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  243. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  244. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  245. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  246. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  247. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  248. lyrics_transcriber/output/generator.py +257 -0
  249. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  250. lyrics_transcriber/output/lyrics_file.py +102 -0
  251. lyrics_transcriber/output/plain_text.py +96 -0
  252. lyrics_transcriber/output/segment_resizer.py +431 -0
  253. lyrics_transcriber/output/subtitles.py +397 -0
  254. lyrics_transcriber/output/video.py +544 -0
  255. lyrics_transcriber/review/__init__.py +0 -0
  256. lyrics_transcriber/review/server.py +676 -0
  257. lyrics_transcriber/storage/__init__.py +0 -0
  258. lyrics_transcriber/storage/dropbox.py +225 -0
  259. lyrics_transcriber/transcribers/__init__.py +0 -0
  260. lyrics_transcriber/transcribers/audioshake.py +290 -0
  261. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  262. lyrics_transcriber/transcribers/whisper.py +330 -0
  263. lyrics_transcriber/types.py +648 -0
  264. lyrics_transcriber/utils/__init__.py +0 -0
  265. lyrics_transcriber/utils/word_utils.py +27 -0
  266. karaoke_gen-0.57.0.dist-info/METADATA +0 -167
  267. karaoke_gen-0.57.0.dist-info/RECORD +0 -23
  268. {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,212 @@
1
+ """Refactored LangChain-based provider bridge using composition.
2
+
3
+ This is a much cleaner version that delegates to specialized components:
4
+ - ModelFactory: Creates ChatModels
5
+ - CircuitBreaker: Manages failure state
6
+ - ResponseParser: Parses responses
7
+ - RetryExecutor: Handles retry logic
8
+ - ResponseCache: Caches LLM responses to avoid redundant calls
9
+
10
+ Each component has a single responsibility and is independently testable.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ import os
16
+ from typing import List, Dict, Any, Optional
17
+ from datetime import datetime
18
+
19
+ from .base import BaseAIProvider
20
+ from .config import ProviderConfig
21
+ from .model_factory import ModelFactory
22
+ from .circuit_breaker import CircuitBreaker
23
+ from .response_parser import ResponseParser
24
+ from .retry_executor import RetryExecutor
25
+ from .response_cache import ResponseCache
26
+ from .constants import (
27
+ PROMPT_LOG_LENGTH,
28
+ RESPONSE_LOG_LENGTH,
29
+ CIRCUIT_OPEN_ERROR,
30
+ MODEL_INIT_ERROR,
31
+ PROVIDER_ERROR,
32
+ )
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class LangChainBridge(BaseAIProvider):
38
+ """Provider bridge using LangChain ChatModels with reliability patterns.
39
+
40
+ This bridge is now much simpler - it delegates to specialized components
41
+ rather than handling everything itself. This follows the Single
42
+ Responsibility Principle and makes the code more testable.
43
+
44
+ Components:
45
+ - ModelFactory: Creates and configures ChatModels
46
+ - CircuitBreaker: Protects against cascading failures
47
+ - ResponseParser: Handles JSON/raw response parsing
48
+ - RetryExecutor: Implements exponential backoff
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ model: str,
54
+ config: ProviderConfig | None = None,
55
+ model_factory: ModelFactory | None = None,
56
+ circuit_breaker: CircuitBreaker | None = None,
57
+ response_parser: ResponseParser | None = None,
58
+ retry_executor: RetryExecutor | None = None,
59
+ response_cache: ResponseCache | None = None,
60
+ ):
61
+ """Initialize the bridge with components (dependency injection).
62
+
63
+ Args:
64
+ model: Model identifier in format "provider/model"
65
+ config: Provider configuration (creates default if None)
66
+ model_factory: Factory for creating ChatModels (creates default if None)
67
+ circuit_breaker: Circuit breaker instance (creates default if None)
68
+ response_parser: Response parser instance (creates default if None)
69
+ retry_executor: Retry executor instance (creates default if None)
70
+ response_cache: Response cache instance (creates default if None)
71
+ """
72
+ self._model = model
73
+ self._config = config or ProviderConfig.from_env()
74
+
75
+ # Dependency injection with sensible defaults
76
+ self._factory = model_factory or ModelFactory()
77
+ self._circuit_breaker = circuit_breaker or CircuitBreaker(self._config)
78
+ self._parser = response_parser or ResponseParser()
79
+ self._executor = retry_executor or RetryExecutor(self._config)
80
+
81
+ # Initialize cache (enabled by default, can be disabled via DISABLE_LLM_CACHE=1)
82
+ cache_enabled = os.getenv("DISABLE_LLM_CACHE", "0").lower() not in {"1", "true", "yes"}
83
+ self._cache = response_cache or ResponseCache(
84
+ cache_dir=self._config.cache_dir,
85
+ enabled=cache_enabled
86
+ )
87
+
88
+ # Lazy-initialized chat model
89
+ self._chat_model: Optional[Any] = None
90
+
91
+ def name(self) -> str:
92
+ """Return provider name for logging."""
93
+ return f"langchain:{self._model}"
94
+
95
+ def generate_correction_proposals(
96
+ self,
97
+ prompt: str,
98
+ schema: Dict[str, Any],
99
+ session_id: str | None = None
100
+ ) -> List[Dict[str, Any]]:
101
+ """Generate correction proposals using LangChain ChatModel.
102
+
103
+ This method is now much simpler - it orchestrates the components
104
+ rather than implementing all the logic itself.
105
+
106
+ Args:
107
+ prompt: The correction prompt
108
+ schema: Pydantic schema for structured output (for future use)
109
+ session_id: Optional Langfuse session ID for grouping traces
110
+
111
+ Returns:
112
+ List of correction proposal dictionaries, or error dicts on failure
113
+ """
114
+ # Store session_id for use in _invoke_model
115
+ self._session_id = session_id
116
+
117
+ # Step 0: Check cache first
118
+ cached_response = self._cache.get(prompt, self._model)
119
+ if cached_response:
120
+ # Parse cached response and return
121
+ parsed = self._parser.parse(cached_response)
122
+ logger.debug(f"🎯 Using cached response ({len(parsed)} items)")
123
+ return parsed
124
+
125
+ # Step 1: Check circuit breaker
126
+ if self._circuit_breaker.is_open(self._model):
127
+ open_until = self._circuit_breaker.get_open_until(self._model)
128
+ return [{
129
+ "error": CIRCUIT_OPEN_ERROR,
130
+ "until": open_until
131
+ }]
132
+
133
+ # Step 2: Get or create chat model
134
+ if not self._chat_model:
135
+ try:
136
+ self._chat_model = self._factory.create_chat_model(
137
+ self._model,
138
+ self._config
139
+ )
140
+ except Exception as e:
141
+ self._circuit_breaker.record_failure(self._model)
142
+ logger.error(f"🤖 Failed to initialize chat model: {e}")
143
+ return [{
144
+ "error": MODEL_INIT_ERROR,
145
+ "message": str(e)
146
+ }]
147
+
148
+ # Step 3: Execute with retry logic
149
+ logger.debug(
150
+ f"🤖 [LangChain] Sending prompt to {self._model}: "
151
+ f"{prompt[:PROMPT_LOG_LENGTH]}..."
152
+ )
153
+
154
+ result = self._executor.execute_with_retry(
155
+ operation=lambda: self._invoke_model(prompt),
156
+ operation_name=f"invoke_{self._model}"
157
+ )
158
+
159
+ # Step 4: Handle result and update circuit breaker
160
+ if result.success:
161
+ self._circuit_breaker.record_success(self._model)
162
+
163
+ logger.info(
164
+ f"🤖 [LangChain] Got response from {self._model}: "
165
+ f"{result.value[:RESPONSE_LOG_LENGTH]}..."
166
+ )
167
+
168
+ # Step 5: Cache the raw response for future use
169
+ self._cache.set(
170
+ prompt=prompt,
171
+ model=self._model,
172
+ response=result.value,
173
+ metadata={
174
+ "session_id": session_id,
175
+ "timestamp": datetime.utcnow().isoformat()
176
+ }
177
+ )
178
+
179
+ # Step 6: Parse response
180
+ return self._parser.parse(result.value)
181
+ else:
182
+ self._circuit_breaker.record_failure(self._model)
183
+ return [{
184
+ "error": PROVIDER_ERROR,
185
+ "message": result.error or "unknown"
186
+ }]
187
+
188
+ def _invoke_model(self, prompt: str) -> str:
189
+ """Invoke the chat model with a prompt.
190
+
191
+ This is a simple wrapper that can be passed to the retry executor.
192
+
193
+ Args:
194
+ prompt: The prompt to send
195
+
196
+ Returns:
197
+ Response content as string
198
+
199
+ Raises:
200
+ Exception: Any error from the model invocation
201
+ """
202
+ from langchain_core.messages import HumanMessage
203
+
204
+ # Prepare config with session_id in metadata (Langfuse format)
205
+ config = {}
206
+ if hasattr(self, '_session_id') and self._session_id:
207
+ config["metadata"] = {"langfuse_session_id": self._session_id}
208
+ logger.debug(f"🤖 [LangChain] Invoking with session_id: {self._session_id}")
209
+
210
+ response = self._chat_model.invoke([HumanMessage(content=prompt)], config=config)
211
+ return response.content
212
+
@@ -0,0 +1,209 @@
1
+ """Factory for creating LangChain ChatModels with Langfuse callbacks."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import os
6
+ from typing import Any, Optional, List
7
+
8
+ from .config import ProviderConfig
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ModelFactory:
14
+ """Creates and configures LangChain ChatModels with observability.
15
+
16
+ This factory handles:
17
+ - Parsing model specifications ("provider/model" format)
18
+ - Creating Langfuse callbacks when configured
19
+ - Instantiating the appropriate ChatModel for each provider
20
+
21
+ Single Responsibility: Model creation only, no execution or state management.
22
+ """
23
+
24
+ def __init__(self):
25
+ self._langfuse_handler: Optional[Any] = None
26
+ self._langfuse_initialized = False
27
+
28
+ def create_chat_model(self, model_spec: str, config: ProviderConfig) -> Any:
29
+ """Create a ChatModel from a model specification.
30
+
31
+ Args:
32
+ model_spec: Model identifier in format "provider/model"
33
+ e.g. "ollama/gpt-oss:latest", "openai/gpt-4"
34
+ config: Provider configuration with timeouts, retries, etc.
35
+
36
+ Returns:
37
+ Configured LangChain ChatModel instance
38
+
39
+ Raises:
40
+ ValueError: If model_spec format is invalid or provider unsupported
41
+ """
42
+ provider, model_name = self._parse_model_spec(model_spec)
43
+ callbacks = self._create_callbacks(model_spec)
44
+ return self._instantiate_model(provider, model_name, callbacks, config)
45
+
46
+ def _parse_model_spec(self, spec: str) -> tuple[str, str]:
47
+ """Parse model specification into provider and model name.
48
+
49
+ Args:
50
+ spec: Model spec in format "provider/model"
51
+
52
+ Returns:
53
+ Tuple of (provider, model_name)
54
+
55
+ Raises:
56
+ ValueError: If format is invalid
57
+ """
58
+ parts = spec.split("/", 1)
59
+ if len(parts) != 2:
60
+ raise ValueError(
61
+ f"Model spec must be in format 'provider/model', got: {spec}"
62
+ )
63
+ return parts[0], parts[1]
64
+
65
+ def _create_callbacks(self, model_spec: str) -> List[Any]:
66
+ """Create Langfuse callback handlers if configured.
67
+
68
+ Args:
69
+ model_spec: Model specification for logging
70
+
71
+ Returns:
72
+ List of callback handlers (may be empty)
73
+ """
74
+ # Only initialize Langfuse once
75
+ if not self._langfuse_initialized:
76
+ self._initialize_langfuse(model_spec)
77
+ self._langfuse_initialized = True
78
+
79
+ return [self._langfuse_handler] if self._langfuse_handler else []
80
+
81
+ def _initialize_langfuse(self, model_spec: str) -> None:
82
+ """Initialize Langfuse callback handler if keys are present.
83
+
84
+ Langfuse reads credentials from environment variables automatically:
85
+ - LANGFUSE_PUBLIC_KEY
86
+ - LANGFUSE_SECRET_KEY
87
+ - LANGFUSE_HOST (optional)
88
+
89
+ Args:
90
+ model_spec: Model specification for logging
91
+
92
+ Raises:
93
+ RuntimeError: If Langfuse keys are set but initialization fails
94
+ """
95
+ public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
96
+ secret_key = os.getenv("LANGFUSE_SECRET_KEY")
97
+
98
+ if not (public_key and secret_key):
99
+ logger.debug("🤖 Langfuse keys not found, tracing disabled")
100
+ return
101
+
102
+ try:
103
+ from langfuse import Langfuse
104
+ from langfuse.langchain import CallbackHandler
105
+
106
+ # Initialize Langfuse client first (this is required!)
107
+ langfuse_client = Langfuse(
108
+ public_key=public_key,
109
+ secret_key=secret_key,
110
+ host=os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com"),
111
+ )
112
+
113
+ # Then create callback handler with the same public_key
114
+ # The handler will use the initialized client
115
+ self._langfuse_handler = CallbackHandler(public_key=public_key)
116
+ logger.info(f"🤖 Langfuse callback handler initialized for {model_spec}")
117
+ except Exception as e:
118
+ # If Langfuse keys are set, we MUST fail fast
119
+ raise RuntimeError(
120
+ f"Langfuse keys are set but initialization failed: {e}\n"
121
+ f"This indicates a configuration or dependency problem.\n"
122
+ f"Check:\n"
123
+ f" - LANGFUSE_PUBLIC_KEY: {public_key[:10]}...\n"
124
+ f" - LANGFUSE_SECRET_KEY: {'set' if secret_key else 'not set'}\n"
125
+ f" - LANGFUSE_HOST: {os.getenv('LANGFUSE_HOST', 'default')}\n"
126
+ f" - langfuse package version: pip show langfuse"
127
+ ) from e
128
+
129
+ def _instantiate_model(
130
+ self,
131
+ provider: str,
132
+ model_name: str,
133
+ callbacks: List[Any],
134
+ config: ProviderConfig
135
+ ) -> Any:
136
+ """Instantiate the appropriate ChatModel for the provider.
137
+
138
+ Args:
139
+ provider: Provider name (ollama, openai, anthropic)
140
+ model_name: Model name within that provider
141
+ callbacks: List of callback handlers
142
+ config: Provider configuration
143
+
144
+ Returns:
145
+ Configured ChatModel instance
146
+
147
+ Raises:
148
+ ValueError: If provider is not supported
149
+ ImportError: If provider package is not installed
150
+ """
151
+ try:
152
+ if provider == "ollama":
153
+ return self._create_ollama_model(model_name, callbacks, config)
154
+ elif provider == "openai":
155
+ return self._create_openai_model(model_name, callbacks, config)
156
+ elif provider == "anthropic":
157
+ return self._create_anthropic_model(model_name, callbacks, config)
158
+ else:
159
+ raise ValueError(f"Unsupported provider: {provider}")
160
+ except ImportError as e:
161
+ raise ImportError(
162
+ f"Failed to import {provider} provider. "
163
+ f"Install with: pip install langchain-{provider}"
164
+ ) from e
165
+
166
+ def _create_ollama_model(
167
+ self, model_name: str, callbacks: List[Any], config: ProviderConfig
168
+ ) -> Any:
169
+ """Create ChatOllama model."""
170
+ from langchain_ollama import ChatOllama
171
+
172
+ model = ChatOllama(
173
+ model=model_name,
174
+ timeout=config.request_timeout_seconds,
175
+ callbacks=callbacks,
176
+ )
177
+ logger.debug(f"🤖 Created Ollama model: {model_name}")
178
+ return model
179
+
180
+ def _create_openai_model(
181
+ self, model_name: str, callbacks: List[Any], config: ProviderConfig
182
+ ) -> Any:
183
+ """Create ChatOpenAI model."""
184
+ from langchain_openai import ChatOpenAI
185
+
186
+ model = ChatOpenAI(
187
+ model=model_name,
188
+ timeout=config.request_timeout_seconds,
189
+ max_retries=config.max_retries,
190
+ callbacks=callbacks,
191
+ )
192
+ logger.debug(f"🤖 Created OpenAI model: {model_name}")
193
+ return model
194
+
195
+ def _create_anthropic_model(
196
+ self, model_name: str, callbacks: List[Any], config: ProviderConfig
197
+ ) -> Any:
198
+ """Create ChatAnthropic model."""
199
+ from langchain_anthropic import ChatAnthropic
200
+
201
+ model = ChatAnthropic(
202
+ model=model_name,
203
+ timeout=config.request_timeout_seconds,
204
+ max_retries=config.max_retries,
205
+ callbacks=callbacks,
206
+ )
207
+ logger.debug(f"🤖 Created Anthropic model: {model_name}")
208
+ return model
209
+
@@ -0,0 +1,218 @@
1
+ """Response caching for LLM calls to avoid redundant API requests."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import hashlib
7
+ import logging
8
+ from pathlib import Path
9
+ from typing import Optional, Dict, Any
10
+ from datetime import datetime
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ResponseCache:
16
+ """Caches LLM responses based on prompt hash.
17
+
18
+ This allows reusing responses when iterating on frontend/UI changes
19
+ without re-running expensive LLM inference calls.
20
+
21
+ Cache Structure:
22
+ {
23
+ "prompt_hash": {
24
+ "prompt": "full prompt text",
25
+ "response": "llm response",
26
+ "timestamp": "iso datetime",
27
+ "model": "model identifier",
28
+ "metadata": {...}
29
+ }
30
+ }
31
+ """
32
+
33
+ def __init__(self, cache_dir: str = "cache", enabled: bool = True):
34
+ """Initialize response cache.
35
+
36
+ Args:
37
+ cache_dir: Directory to store cache file
38
+ enabled: Whether caching is enabled (can be disabled via env var)
39
+ """
40
+ self.cache_dir = Path(cache_dir)
41
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
42
+ self.cache_file = self.cache_dir / "llm_response_cache.json"
43
+ self.enabled = enabled
44
+ self._cache: Dict[str, Dict[str, Any]] = {}
45
+ self._load_cache()
46
+
47
+ def _load_cache(self) -> None:
48
+ """Load cache from disk."""
49
+ if not self.cache_file.exists():
50
+ self._cache = {}
51
+ return
52
+
53
+ try:
54
+ with open(self.cache_file, 'r', encoding='utf-8') as f:
55
+ self._cache = json.load(f)
56
+ logger.debug(f"📦 Loaded {len(self._cache)} cached responses")
57
+ except Exception as e:
58
+ logger.warning(f"Failed to load cache: {e}")
59
+ self._cache = {}
60
+
61
+ def _save_cache(self) -> None:
62
+ """Save cache to disk."""
63
+ try:
64
+ with open(self.cache_file, 'w', encoding='utf-8') as f:
65
+ json.dump(self._cache, f, indent=2, ensure_ascii=False)
66
+ logger.debug(f"💾 Saved {len(self._cache)} cached responses")
67
+ except Exception as e:
68
+ logger.warning(f"Failed to save cache: {e}")
69
+
70
+ def _compute_hash(self, prompt: str, model: str) -> str:
71
+ """Compute hash for prompt + model combination.
72
+
73
+ Args:
74
+ prompt: The full prompt text
75
+ model: Model identifier
76
+
77
+ Returns:
78
+ SHA256 hash as hex string
79
+ """
80
+ # Include both prompt and model in hash
81
+ combined = f"{model}::{prompt}"
82
+ return hashlib.sha256(combined.encode('utf-8')).hexdigest()
83
+
84
+ def get(self, prompt: str, model: str) -> Optional[str]:
85
+ """Get cached response for prompt if available.
86
+
87
+ Args:
88
+ prompt: The prompt text
89
+ model: Model identifier
90
+
91
+ Returns:
92
+ Cached response string or None if not found
93
+ """
94
+ if not self.enabled:
95
+ return None
96
+
97
+ prompt_hash = self._compute_hash(prompt, model)
98
+
99
+ if prompt_hash in self._cache:
100
+ cached = self._cache[prompt_hash]
101
+ logger.info(f"🎯 Cache HIT for {model} (hash: {prompt_hash[:8]}...)")
102
+ logger.debug(f" Cached at: {cached.get('timestamp')}")
103
+ return cached.get('response')
104
+
105
+ logger.debug(f"📭 Cache MISS for {model} (hash: {prompt_hash[:8]}...)")
106
+ return None
107
+
108
+ def set(
109
+ self,
110
+ prompt: str,
111
+ model: str,
112
+ response: str,
113
+ metadata: Optional[Dict[str, Any]] = None
114
+ ) -> None:
115
+ """Store response in cache.
116
+
117
+ Args:
118
+ prompt: The prompt text
119
+ model: Model identifier
120
+ response: The LLM response
121
+ metadata: Optional metadata to store with cache entry
122
+ """
123
+ if not self.enabled:
124
+ return
125
+
126
+ prompt_hash = self._compute_hash(prompt, model)
127
+
128
+ self._cache[prompt_hash] = {
129
+ "prompt": prompt[:500] + "..." if len(prompt) > 500 else prompt, # Truncate for readability
130
+ "response": response,
131
+ "timestamp": datetime.utcnow().isoformat(),
132
+ "model": model,
133
+ "metadata": metadata or {}
134
+ }
135
+
136
+ # Save to disk immediately (for persistence across runs)
137
+ self._save_cache()
138
+ logger.debug(f"💾 Cached response for {model} (hash: {prompt_hash[:8]}...)")
139
+
140
+ def clear(self) -> int:
141
+ """Clear all cached responses.
142
+
143
+ Returns:
144
+ Number of entries cleared
145
+ """
146
+ count = len(self._cache)
147
+ self._cache = {}
148
+ self._save_cache()
149
+ logger.info(f"🗑️ Cleared {count} cached responses")
150
+ return count
151
+
152
+ def get_stats(self) -> Dict[str, Any]:
153
+ """Get cache statistics.
154
+
155
+ Returns:
156
+ Dictionary with cache statistics
157
+ """
158
+ if not self._cache:
159
+ return {
160
+ "total_entries": 0,
161
+ "cache_file": str(self.cache_file),
162
+ "enabled": self.enabled
163
+ }
164
+
165
+ # Count by model
166
+ by_model = {}
167
+ for entry in self._cache.values():
168
+ model = entry.get('model', 'unknown')
169
+ by_model[model] = by_model.get(model, 0) + 1
170
+
171
+ # Find oldest and newest
172
+ timestamps = [
173
+ datetime.fromisoformat(entry['timestamp'])
174
+ for entry in self._cache.values()
175
+ if 'timestamp' in entry
176
+ ]
177
+
178
+ return {
179
+ "total_entries": len(self._cache),
180
+ "by_model": by_model,
181
+ "oldest": min(timestamps).isoformat() if timestamps else None,
182
+ "newest": max(timestamps).isoformat() if timestamps else None,
183
+ "cache_file": str(self.cache_file),
184
+ "enabled": self.enabled
185
+ }
186
+
187
+ def prune_old_entries(self, days: int = 30) -> int:
188
+ """Remove cache entries older than specified days.
189
+
190
+ Args:
191
+ days: Remove entries older than this many days
192
+
193
+ Returns:
194
+ Number of entries removed
195
+ """
196
+ from datetime import timedelta
197
+
198
+ cutoff = datetime.utcnow() - timedelta(days=days)
199
+
200
+ to_remove = []
201
+ for prompt_hash, entry in self._cache.items():
202
+ if 'timestamp' in entry:
203
+ try:
204
+ entry_time = datetime.fromisoformat(entry['timestamp'])
205
+ if entry_time < cutoff:
206
+ to_remove.append(prompt_hash)
207
+ except Exception:
208
+ pass
209
+
210
+ for prompt_hash in to_remove:
211
+ del self._cache[prompt_hash]
212
+
213
+ if to_remove:
214
+ self._save_cache()
215
+ logger.info(f"🗑️ Pruned {len(to_remove)} old cache entries (older than {days} days)")
216
+
217
+ return len(to_remove)
218
+