lattifai 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +26 -27
- lattifai/base_client.py +7 -7
- lattifai/bin/agent.py +90 -91
- lattifai/bin/align.py +110 -111
- lattifai/bin/cli_base.py +3 -3
- lattifai/bin/subtitle.py +45 -45
- lattifai/client.py +56 -56
- lattifai/errors.py +73 -73
- lattifai/io/__init__.py +12 -11
- lattifai/io/gemini_reader.py +30 -30
- lattifai/io/gemini_writer.py +17 -17
- lattifai/io/reader.py +13 -12
- lattifai/io/supervision.py +3 -3
- lattifai/io/text_parser.py +43 -16
- lattifai/io/utils.py +4 -4
- lattifai/io/writer.py +31 -19
- lattifai/tokenizer/__init__.py +1 -1
- lattifai/tokenizer/phonemizer.py +3 -3
- lattifai/tokenizer/tokenizer.py +83 -82
- lattifai/utils.py +15 -15
- lattifai/workers/__init__.py +1 -1
- lattifai/workers/lattice1_alpha.py +46 -46
- lattifai/workflows/__init__.py +11 -11
- lattifai/workflows/agents.py +2 -0
- lattifai/workflows/base.py +22 -22
- lattifai/workflows/file_manager.py +182 -182
- lattifai/workflows/gemini.py +29 -29
- lattifai/workflows/prompts/__init__.py +4 -4
- lattifai/workflows/youtube.py +233 -233
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/METADATA +7 -9
- lattifai-0.4.6.dist-info/RECORD +39 -0
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/licenses/LICENSE +1 -1
- lattifai-0.4.5.dist-info/RECORD +0 -39
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/WHEEL +0 -0
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/entry_points.txt +0 -0
- {lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/top_level.txt +0 -0
lattifai/workflows/gemini.py
CHANGED
|
@@ -24,16 +24,16 @@ class GeminiTranscriber:
|
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
26
|
# The specific Gem URL provided by the user
|
|
27
|
-
GEM_URL =
|
|
27
|
+
GEM_URL = "https://gemini.google.com/gem/1870ly7xvW2hU_umtv-LedGsjywT0sQiN"
|
|
28
28
|
|
|
29
29
|
def __init__(self, api_key: Optional[str] = None):
|
|
30
30
|
self.api_key = api_key
|
|
31
|
-
self.logger = setup_workflow_logger(
|
|
31
|
+
self.logger = setup_workflow_logger("gemini")
|
|
32
32
|
self.prompt_loader = get_prompt_loader()
|
|
33
33
|
|
|
34
34
|
if not self.api_key:
|
|
35
35
|
self.logger.warning(
|
|
36
|
-
|
|
36
|
+
"⚠️ Gemini API key not provided. API key will be required when calling transcription methods."
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
async def __call__(self, youtube_url: str) -> str:
|
|
@@ -51,9 +51,9 @@ class GeminiTranscriber:
|
|
|
51
51
|
Transcribed text
|
|
52
52
|
"""
|
|
53
53
|
if not self.api_key:
|
|
54
|
-
raise ValueError(
|
|
54
|
+
raise ValueError("Gemini API key is required for transcription")
|
|
55
55
|
|
|
56
|
-
self.logger.info(f
|
|
56
|
+
self.logger.info(f"🎤 Starting Gemini transcription for: {youtube_url}")
|
|
57
57
|
|
|
58
58
|
try:
|
|
59
59
|
# Initialize client
|
|
@@ -63,11 +63,11 @@ class GeminiTranscriber:
|
|
|
63
63
|
system_prompt = self.prompt_loader.get_gemini_transcription_prompt()
|
|
64
64
|
|
|
65
65
|
# Generate transcription with extended thinking
|
|
66
|
-
self.logger.info(
|
|
66
|
+
self.logger.info("🔄 Sending request to Gemini 2.5 Pro...")
|
|
67
67
|
config = GenerateContentConfig(
|
|
68
68
|
system_instruction=system_prompt,
|
|
69
69
|
# Enable thinking by including it in response modalities
|
|
70
|
-
response_modalities=[
|
|
70
|
+
response_modalities=["TEXT"],
|
|
71
71
|
thinking_config=ThinkingConfig(
|
|
72
72
|
include_thoughts=False,
|
|
73
73
|
thinking_budget=-1,
|
|
@@ -76,25 +76,25 @@ class GeminiTranscriber:
|
|
|
76
76
|
response = await asyncio.get_event_loop().run_in_executor(
|
|
77
77
|
None,
|
|
78
78
|
lambda: client.models.generate_content(
|
|
79
|
-
model=
|
|
80
|
-
contents=Part.from_uri(file_uri=youtube_url, mime_type=
|
|
79
|
+
model="gemini-2.5-pro",
|
|
80
|
+
contents=Part.from_uri(file_uri=youtube_url, mime_type="video/*"),
|
|
81
81
|
config=config,
|
|
82
82
|
),
|
|
83
83
|
)
|
|
84
84
|
|
|
85
85
|
if not response.text:
|
|
86
|
-
raise RuntimeError(
|
|
86
|
+
raise RuntimeError("Empty response from Gemini API")
|
|
87
87
|
|
|
88
88
|
transcript = response.text.strip()
|
|
89
89
|
|
|
90
|
-
self.logger.info(f
|
|
90
|
+
self.logger.info(f"✅ Transcription completed: {len(transcript)} characters")
|
|
91
91
|
return transcript
|
|
92
92
|
|
|
93
93
|
except ImportError:
|
|
94
|
-
raise RuntimeError(
|
|
94
|
+
raise RuntimeError("Google GenAI SDK not installed. Please install with: pip install google-genai")
|
|
95
95
|
except Exception as e:
|
|
96
|
-
self.logger.error(f
|
|
97
|
-
raise RuntimeError(f
|
|
96
|
+
self.logger.error(f"Gemini transcription failed: {str(e)}")
|
|
97
|
+
raise RuntimeError(f"Gemini transcription failed: {str(e)}")
|
|
98
98
|
|
|
99
99
|
async def transcribe_file(self, media_file_path: str) -> str:
|
|
100
100
|
"""
|
|
@@ -107,9 +107,9 @@ class GeminiTranscriber:
|
|
|
107
107
|
Transcribed text
|
|
108
108
|
"""
|
|
109
109
|
if not self.api_key:
|
|
110
|
-
raise ValueError(
|
|
110
|
+
raise ValueError("Gemini API key is required for transcription")
|
|
111
111
|
|
|
112
|
-
self.logger.info(f
|
|
112
|
+
self.logger.info(f"🎤 Starting Gemini transcription for file: {media_file_path}")
|
|
113
113
|
|
|
114
114
|
try:
|
|
115
115
|
# Initialize client
|
|
@@ -119,16 +119,16 @@ class GeminiTranscriber:
|
|
|
119
119
|
system_prompt = self.prompt_loader.get_gemini_transcription_prompt()
|
|
120
120
|
|
|
121
121
|
# Upload audio file
|
|
122
|
-
self.logger.info(
|
|
122
|
+
self.logger.info("📤 Uploading audio file to Gemini...")
|
|
123
123
|
media_file = client.files.upload(path=media_file_path)
|
|
124
124
|
|
|
125
125
|
# Generate transcription with extended thinking
|
|
126
126
|
# Note: For thinking mode, you may want to use 'gemini-2.0-flash-thinking-exp' or similar models
|
|
127
|
-
self.logger.info(
|
|
127
|
+
self.logger.info("🔄 Sending transcription request...")
|
|
128
128
|
config = GenerateContentConfig(
|
|
129
129
|
system_instruction=system_prompt,
|
|
130
130
|
# Enable thinking by including it in response modalities
|
|
131
|
-
response_modalities=[
|
|
131
|
+
response_modalities=["TEXT"],
|
|
132
132
|
thinking_config=ThinkingConfig(
|
|
133
133
|
include_thoughts=False,
|
|
134
134
|
thinking_budget=-1,
|
|
@@ -137,31 +137,31 @@ class GeminiTranscriber:
|
|
|
137
137
|
response = await asyncio.get_event_loop().run_in_executor(
|
|
138
138
|
None,
|
|
139
139
|
lambda: client.models.generate_content(
|
|
140
|
-
model=
|
|
140
|
+
model="gemini-2.5-pro",
|
|
141
141
|
contents=Part.from_uri(file_uri=media_file.uri, mime_type=media_file.mime_type),
|
|
142
142
|
config=config,
|
|
143
143
|
),
|
|
144
144
|
)
|
|
145
145
|
|
|
146
146
|
if not response.text:
|
|
147
|
-
raise RuntimeError(
|
|
147
|
+
raise RuntimeError("Empty response from Gemini API")
|
|
148
148
|
|
|
149
149
|
transcript = response.text.strip()
|
|
150
150
|
|
|
151
|
-
self.logger.info(f
|
|
151
|
+
self.logger.info(f"✅ Transcription completed: {len(transcript)} characters")
|
|
152
152
|
return transcript
|
|
153
153
|
|
|
154
154
|
except ImportError:
|
|
155
|
-
raise RuntimeError(
|
|
155
|
+
raise RuntimeError("Google GenAI SDK not installed. Please install with: pip install google-genai")
|
|
156
156
|
except Exception as e:
|
|
157
|
-
self.logger.error(f
|
|
158
|
-
raise RuntimeError(f
|
|
157
|
+
self.logger.error(f"Gemini transcription failed: {str(e)}")
|
|
158
|
+
raise RuntimeError(f"Gemini transcription failed: {str(e)}")
|
|
159
159
|
|
|
160
160
|
def get_gem_info(self) -> dict:
|
|
161
161
|
"""Get information about the Gem being used"""
|
|
162
162
|
return {
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
163
|
+
"gem_name": "Audio Transcription Gem",
|
|
164
|
+
"gem_url": self.GEM_URL,
|
|
165
|
+
"model": "Gemini 2.5 Pro",
|
|
166
|
+
"description": "Specialized Gem for media content transcribe",
|
|
167
167
|
}
|
|
@@ -26,16 +26,16 @@ class PromptLoader:
|
|
|
26
26
|
Raises:
|
|
27
27
|
FileNotFoundError: If prompt file doesn't exist
|
|
28
28
|
"""
|
|
29
|
-
prompt_path = self.prompts_dir / category / f
|
|
29
|
+
prompt_path = self.prompts_dir / category / f"{name}.txt"
|
|
30
30
|
|
|
31
31
|
if not prompt_path.exists():
|
|
32
|
-
raise FileNotFoundError(f
|
|
32
|
+
raise FileNotFoundError(f"Prompt not found: {prompt_path}")
|
|
33
33
|
|
|
34
|
-
return prompt_path.read_text(encoding=
|
|
34
|
+
return prompt_path.read_text(encoding="utf-8").strip()
|
|
35
35
|
|
|
36
36
|
def get_gemini_transcription_prompt(self) -> str:
|
|
37
37
|
"""Get the Gemini transcription Gem prompt"""
|
|
38
|
-
return self.load_prompt(
|
|
38
|
+
return self.load_prompt("gemini", "transcription_gem")
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
# Global instance
|