PyPI - lattifai - Versions diffs - 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl - Mend

lattifai 0.4.5py3-none-any.whl → 0.4.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

lattifai/__init__.py +26 -27
lattifai/base_client.py +7 -7
lattifai/bin/agent.py +90 -91
lattifai/bin/align.py +110 -111
lattifai/bin/cli_base.py +3 -3
lattifai/bin/subtitle.py +45 -45
lattifai/client.py +56 -56
lattifai/errors.py +73 -73
lattifai/io/__init__.py +12 -11
lattifai/io/gemini_reader.py +30 -30
lattifai/io/gemini_writer.py +17 -17
lattifai/io/reader.py +13 -12
lattifai/io/supervision.py +3 -3
lattifai/io/text_parser.py +43 -16
lattifai/io/utils.py +4 -4
lattifai/io/writer.py +31 -19
lattifai/tokenizer/__init__.py +1 -1
lattifai/tokenizer/phonemizer.py +3 -3
lattifai/tokenizer/tokenizer.py +83 -82
lattifai/utils.py +15 -15
lattifai/workers/__init__.py +1 -1
lattifai/workers/lattice1_alpha.py +46 -46
lattifai/workflows/__init__.py +11 -11
lattifai/workflows/agents.py +2 -0
lattifai/workflows/base.py +22 -22
lattifai/workflows/file_manager.py +182 -182
lattifai/workflows/gemini.py +29 -29
lattifai/workflows/prompts/__init__.py +4 -4
lattifai/workflows/youtube.py +233 -233
{lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/METADATA +7 -9
lattifai-0.4.6.dist-info/RECORD +39 -0
{lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/licenses/LICENSE +1 -1
lattifai-0.4.5.dist-info/RECORD +0 -39
{lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/WHEEL +0 -0
{lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/entry_points.txt +0 -0
{lattifai-0.4.5.dist-info → lattifai-0.4.6.dist-info}/top_level.txt +0 -0

lattifai/tokenizer/tokenizer.py CHANGED Viewed

@@ -3,25 +3,25 @@ import inspect
 import pickle
 import re
 from collections import defaultdict
-from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar
 import torch
 from lattifai.errors import LATTICE_DECODING_FAILURE_HELP, LatticeDecodingError
-from lattifai.io import Supervision
+from lattifai.io import Supervision, normalize_html_text
 from lattifai.tokenizer.phonemizer import G2Phonemizer
 PUNCTUATION = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~'
 END_PUNCTUATION = '.!?"]。！？”】'
-PUNCTUATION_SPACE = PUNCTUATION + ' '
-STAR_TOKEN = '※'
+PUNCTUATION_SPACE = PUNCTUATION + " "
+STAR_TOKEN = "※"
-GROUPING_SEPARATOR = '✹'
+GROUPING_SEPARATOR = "✹"
 MAXIMUM_WORD_LENGTH = 40
-TokenizerT = TypeVar('TokenizerT', bound='LatticeTokenizer')
+TokenizerT = TypeVar("TokenizerT", bound="LatticeTokenizer")
 class LatticeTokenizer:
@@ -32,9 +32,9 @@ class LatticeTokenizer:
         self.words: List[str] = []
         self.g2p_model: Any = None  # Placeholder for G2P model
         self.dictionaries = defaultdict(lambda: [])
-        self.oov_word = '<unk>'
+        self.oov_word = "<unk>"
         self.sentence_splitter = None
-        self.device = 'cpu'
+        self.device = "cpu"
     def init_sentence_splitter(self):
         if self.sentence_splitter is not None:
@@ -45,14 +45,14 @@ class LatticeTokenizer:
         providers = []
         device = self.device
-        if device.startswith('cuda') and ort.get_all_providers().count('CUDAExecutionProvider') > 0:
-            providers.append('CUDAExecutionProvider')
-        elif device.startswith('mps') and ort.get_all_providers().count('MPSExecutionProvider') > 0:
-            providers.append('MPSExecutionProvider')
+        if device.startswith("cuda") and ort.get_all_providers().count("CUDAExecutionProvider") > 0:
+            providers.append("CUDAExecutionProvider")
+        elif device.startswith("mps") and ort.get_all_providers().count("MPSExecutionProvider") > 0:
+            providers.append("MPSExecutionProvider")
         sat = SaT(
-            'sat-3l-sm',
-            ort_providers=providers + ['CPUExecutionProvider'],
+            "sat-3l-sm",
+            ort_providers=providers + ["CPUExecutionProvider"],
         )
         self.sentence_splitter = sat
@@ -79,23 +79,23 @@ class LatticeTokenizer:
         # or other forms like [SOMETHING] SPEAKER:
         # Pattern 1: [mark] HTML-encoded separator speaker:
-        pattern1 = r'^(\[[^\]]+\])\s+(&gt;&gt;|>>)\s+(.+)$'
+        pattern1 = r"^(\[[^\]]+\])\s+(&gt;&gt;|>>)\s+(.+)$"
         match1 = re.match(pattern1, sentence.strip())
         if match1:
             special_mark = match1.group(1)
             separator = match1.group(2)
             speaker_part = match1.group(3)
-            return [special_mark, f'{separator} {speaker_part}']
+            return [special_mark, f"{separator} {speaker_part}"]
         # Pattern 2: [mark] speaker:
-        pattern2 = r'^(\[[^\]]+\])\s+([^:]+:)(.*)$'
+        pattern2 = r"^(\[[^\]]+\])\s+([^:]+:)(.*)$"
         match2 = re.match(pattern2, sentence.strip())
         if match2:
             special_mark = match2.group(1)
             speaker_label = match2.group(2)
             remaining = match2.group(3).strip()
             if remaining:
-                return [special_mark, f'{speaker_label} {remaining}']
+                return [special_mark, f"{speaker_label} {remaining}"]
             else:
                 return [special_mark, speaker_label]
@@ -107,26 +107,26 @@ class LatticeTokenizer:
         cls: Type[TokenizerT],
         client_wrapper: Any,
         model_path: str,
-        device: str = 'cpu',
+        device: str = "cpu",
         compressed: bool = True,
     ) -> TokenizerT:
         """Load tokenizer from exported binary file"""
         from pathlib import Path
-        words_model_path = f'{model_path}/words.bin'
+        words_model_path = f"{model_path}/words.bin"
         if compressed:
-            with gzip.open(words_model_path, 'rb') as f:
+            with gzip.open(words_model_path, "rb") as f:
                 data = pickle.load(f)
         else:
-            with open(words_model_path, 'rb') as f:
+            with open(words_model_path, "rb") as f:
                 data = pickle.load(f)
         tokenizer = cls(client_wrapper=client_wrapper)
-        tokenizer.words = data['words']
-        tokenizer.dictionaries = defaultdict(list, data['dictionaries'])
-        tokenizer.oov_word = data['oov_word']
+        tokenizer.words = data["words"]
+        tokenizer.dictionaries = defaultdict(list, data["dictionaries"])
+        tokenizer.oov_word = data["oov_word"]
-        g2p_model_path = f'{model_path}/g2p.bin' if Path(f'{model_path}/g2p.bin').exists() else None
+        g2p_model_path = f"{model_path}/g2p.bin" if Path(f"{model_path}/g2p.bin").exists() else None
         if g2p_model_path:
             tokenizer.g2p_model = G2Phonemizer(g2p_model_path, device=device)
@@ -136,18 +136,19 @@ class LatticeTokenizer:
     def add_special_tokens(self):
         tokenizer = self
-        for special_token in ['&gt;&gt;', '&gt;']:
+        for special_token in ["&gt;&gt;", "&gt;"]:
             if special_token not in tokenizer.dictionaries:
                 tokenizer.dictionaries[special_token] = tokenizer.dictionaries[tokenizer.oov_word]
         return self
     def prenormalize(self, texts: List[str], language: Optional[str] = None) -> List[str]:
         if not self.g2p_model:
-            raise ValueError('G2P model is not loaded, cannot prenormalize texts')
+            raise ValueError("G2P model is not loaded, cannot prenormalize texts")
         oov_words = []
         for text in texts:
-            words = text.lower().replace('-', ' ').replace('—', ' ').replace('–', ' ').split()
+            text = normalize_html_text(text)
+            words = text.lower().replace("-", " ").replace("—", " ").replace("–", " ").split()
             oovs = [w.strip(PUNCTUATION) for w in words if w not in self.words]
             if oovs:
                 oov_words.extend([w for w in oovs if (w not in self.words and len(w) <= MAXIMUM_WORD_LENGTH)])
@@ -156,7 +157,7 @@ class LatticeTokenizer:
         if oov_words:
             indexs = []
             for k, _word in enumerate(oov_words):
-                if any(_word.startswith(p) and _word.endswith(q) for (p, q) in [('(', ')'), ('[', ']')]):
+                if any(_word.startswith(p) and _word.endswith(q) for (p, q) in [("(", ")"), ("[", "]")]):
                     self.dictionaries[_word] = self.dictionaries[self.oov_word]
                 else:
                     _word = _word.strip(PUNCTUATION_SPACE)
@@ -195,7 +196,7 @@ class LatticeTokenizer:
                 if sidx < s:
                     if len(speakers) < len(texts) + 1:
                         speakers.append(None)
-                    text = ' '.join([sup.text for sup in supervisions[sidx:s]])
+                    text = " ".join([sup.text for sup in supervisions[sidx:s]])
                     texts.append(text)
                     sidx = s
                     text_len = len(supervision.text)
@@ -205,20 +206,20 @@ class LatticeTokenizer:
                 if text_len >= 2000 or s == len(supervisions) - 1:
                     if len(speakers) < len(texts) + 1:
                         speakers.append(None)
-                    text = ' '.join([sup.text for sup in supervisions[sidx : s + 1]])
+                    text = " ".join([sup.text for sup in supervisions[sidx : s + 1]])
                     texts.append(text)
                     sidx = s + 1
                     text_len = 0
-        assert len(speakers) == len(texts), f'len(speakers)={len(speakers)} != len(texts)={len(texts)}'
+        assert len(speakers) == len(texts), f"len(speakers)={len(speakers)} != len(texts)={len(texts)}"
         sentences = self.sentence_splitter.split(texts, threshold=0.15, strip_whitespace=strip_whitespace)
-        supervisions, remainder = [], ''
+        supervisions, remainder = [], ""
         for k, (_speaker, _sentences) in enumerate(zip(speakers, sentences)):
             # Prepend remainder from previous iteration to the first sentence
             if _sentences and remainder:
                 _sentences[0] = remainder + _sentences[0]
-                remainder = ''
+                remainder = ""
             if not _sentences:
                 continue
@@ -228,14 +229,14 @@ class LatticeTokenizer:
             for s, _sentence in enumerate(_sentences):
                 if remainder:
                     _sentence = remainder + _sentence
-                    remainder = ''
+                    remainder = ""
                 # Detect and split special sentence types: e.g., '[APPLAUSE] &gt;&gt; MIRA MURATI:' -> ['[APPLAUSE]', '&gt;&gt; MIRA MURATI:']  # noqa: E501
                 resplit_parts = self._resplit_special_sentence_types(_sentence)
-                if any(resplit_parts[-1].endswith(sp) for sp in [':', '：']):
+                if any(resplit_parts[-1].endswith(sp) for sp in [":", "："]):
                     if s < len(_sentences) - 1:
-                        _sentences[s + 1] = resplit_parts[-1] + ' ' + _sentences[s + 1]
+                        _sentences[s + 1] = resplit_parts[-1] + " " + _sentences[s + 1]
                     else:  # last part
-                        remainder = resplit_parts[-1] + ' '
+                        remainder = resplit_parts[-1] + " "
                     processed_sentences.extend(resplit_parts[:-1])
                 else:
                     processed_sentences.extend(resplit_parts)
@@ -243,7 +244,7 @@ class LatticeTokenizer:
             if not _sentences:
                 if remainder:
-                    _sentences, remainder = [remainder.strip()], ''
+                    _sentences, remainder = [remainder.strip()], ""
                 else:
                     continue
@@ -257,12 +258,12 @@ class LatticeTokenizer:
                     Supervision(text=text, speaker=(_speaker if s == 0 else None))
                     for s, text in enumerate(_sentences[:-1])
                 )
-                remainder = _sentences[-1] + ' ' + remainder
+                remainder = _sentences[-1] + " " + remainder
                 if k < len(speakers) - 1 and speakers[k + 1] is not None:  # next speaker is set
                     supervisions.append(
                         Supervision(text=remainder.strip(), speaker=_speaker if len(_sentences) == 1 else None)
                     )
-                    remainder = ''
+                    remainder = ""
                 elif len(_sentences) == 1:
                     if k == len(speakers) - 1:
                         pass  # keep _speaker for the last supervision
@@ -285,20 +286,20 @@ class LatticeTokenizer:
         pronunciation_dictionaries = self.prenormalize([s.text for s in supervisions])
         response = self.client_wrapper.post(
-            'tokenize',
+            "tokenize",
             json={
-                'supervisions': [s.to_dict() for s in supervisions],
-                'pronunciation_dictionaries': pronunciation_dictionaries,
+                "supervisions": [s.to_dict() for s in supervisions],
+                "pronunciation_dictionaries": pronunciation_dictionaries,
             },
         )
         if response.status_code != 200:
-            raise Exception(f'Failed to tokenize texts: {response.text}')
+            raise Exception(f"Failed to tokenize texts: {response.text}")
         result = response.json()
-        lattice_id = result['id']
+        lattice_id = result["id"]
         return (
             supervisions,
             lattice_id,
-            (result['lattice_graph'], result['final_state'], result.get('acoustic_scale', 1.0)),
+            (result["lattice_graph"], result["final_state"], result.get("acoustic_scale", 1.0)),
         )
     def detokenize(
@@ -310,16 +311,16 @@ class LatticeTokenizer:
     ) -> List[Supervision]:
         emission, results, labels, frame_shift, offset, channel = lattice_results  # noqa: F841
         response = self.client_wrapper.post(
-            'detokenize',
+            "detokenize",
             json={
-                'lattice_id': lattice_id,
-                'frame_shift': frame_shift,
-                'results': [t.to_dict() for t in results[0]],
-                'labels': labels[0],
-                'offset': offset,
-                'channel': channel,
-                'return_details': return_details,
-                'destroy_lattice': True,
+                "lattice_id": lattice_id,
+                "frame_shift": frame_shift,
+                "results": [t.to_dict() for t in results[0]],
+                "labels": labels[0],
+                "offset": offset,
+                "channel": channel,
+                "return_details": return_details,
+                "destroy_lattice": True,
             },
         )
         if response.status_code == 422:
@@ -328,13 +329,13 @@ class LatticeTokenizer:
                 original_error=Exception(LATTICE_DECODING_FAILURE_HELP),
             )
         if response.status_code != 200:
-            raise Exception(f'Failed to detokenize lattice: {response.text}')
+            raise Exception(f"Failed to detokenize lattice: {response.text}")
         result = response.json()
-        if not result.get('success'):
-            raise Exception('Failed to detokenize the alignment results.')
+        if not result.get("success"):
+            raise Exception("Failed to detokenize the alignment results.")
-        alignments = [Supervision.from_dict(s) for s in result['supervisions']]
+        alignments = [Supervision.from_dict(s) for s in result["supervisions"]]
         if return_details:
             # Add emission confidence scores for segments and word-level alignments
@@ -361,20 +362,20 @@ class AsyncLatticeTokenizer(LatticeTokenizer):
         pronunciation_dictionaries = self.prenormalize([s.text for s in supervisions])
         response = await self._post_async(
-            'tokenize',
+            "tokenize",
             json={
-                'supervisions': [s.to_dict() for s in supervisions],
-                'pronunciation_dictionaries': pronunciation_dictionaries,
+                "supervisions": [s.to_dict() for s in supervisions],
+                "pronunciation_dictionaries": pronunciation_dictionaries,
             },
         )
         if response.status_code != 200:
-            raise Exception(f'Failed to tokenize texts: {response.text}')
+            raise Exception(f"Failed to tokenize texts: {response.text}")
         result = response.json()
-        lattice_id = result['id']
+        lattice_id = result["id"]
         return (
             supervisions,
             lattice_id,
-            (result['lattice_graph'], result['final_state'], result.get('acoustic_scale', 1.0)),
+            (result["lattice_graph"], result["final_state"], result.get("acoustic_scale", 1.0)),
         )
     async def detokenize(
@@ -386,16 +387,16 @@ class AsyncLatticeTokenizer(LatticeTokenizer):
     ) -> List[Supervision]:
         emission, results, labels, frame_shift, offset, channel = lattice_results  # noqa: F841
         response = await self._post_async(
-            'detokenize',
+            "detokenize",
             json={
-                'lattice_id': lattice_id,
-                'frame_shift': frame_shift,
-                'results': [t.to_dict() for t in results[0]],
-                'labels': labels[0],
-                'offset': offset,
-                'channel': channel,
-                'return_details': return_details,
-                'destroy_lattice': True,
+                "lattice_id": lattice_id,
+                "frame_shift": frame_shift,
+                "results": [t.to_dict() for t in results[0]],
+                "labels": labels[0],
+                "offset": offset,
+                "channel": channel,
+                "return_details": return_details,
+                "destroy_lattice": True,
             },
         )
         if response.status_code == 422:
@@ -404,13 +405,13 @@ class AsyncLatticeTokenizer(LatticeTokenizer):
                 original_error=Exception(LATTICE_DECODING_FAILURE_HELP),
             )
         if response.status_code != 200:
-            raise Exception(f'Failed to detokenize lattice: {response.text}')
+            raise Exception(f"Failed to detokenize lattice: {response.text}")
         result = response.json()
-        if not result.get('success'):
-            return Exception('Failed to detokenize the alignment results.')
+        if not result.get("success"):
+            return Exception("Failed to detokenize the alignment results.")
-        alignments = [Supervision.from_dict(s) for s in result['supervisions']]
+        alignments = [Supervision.from_dict(s) for s in result["supervisions"]]
         if return_details:
             # Add emission confidence scores for segments and word-level alignments
@@ -453,8 +454,8 @@ def _add_confidence_scores(
         supervision.score = round(1.0 - diffprobs.mean().item(), ndigits=4)
         # Compute word-level confidence if alignment exists
-        if hasattr(supervision, 'alignment') and supervision.alignment:
-            words = supervision.alignment.get('word', [])
+        if hasattr(supervision, "alignment") and supervision.alignment:
+            words = supervision.alignment.get("word", [])
             for w, item in enumerate(words):
                 start = int(item.start / frame_shift) - start_frame
                 end = int(item.end / frame_shift) - start_frame

lattifai/utils.py CHANGED Viewed

@@ -12,8 +12,8 @@ from lattifai.workers import Lattice1AlphaWorker
 def _get_cache_marker_path(cache_dir: Path) -> Path:
     """Get the path for the cache marker file with current date."""
-    today = datetime.now().strftime('%Y%m%d')
-    return cache_dir / f'.done{today}'
+    today = datetime.now().strftime("%Y%m%d")
+    return cache_dir / f".done{today}"
 def _is_cache_valid(cache_dir: Path) -> bool:
@@ -22,7 +22,7 @@ def _is_cache_valid(cache_dir: Path) -> bool:
         return False
     # Find any .done* marker files
-    marker_files = list(cache_dir.glob('.done*'))
+    marker_files = list(cache_dir.glob(".done*"))
     if not marker_files:
         return False
@@ -31,8 +31,8 @@ def _is_cache_valid(cache_dir: Path) -> bool:
     # Extract date from marker filename (format: .doneYYYYMMDD)
     try:
-        date_str = latest_marker.name.replace('.done', '')
-        marker_date = datetime.strptime(date_str, '%Y%m%d')
+        date_str = latest_marker.name.replace(".done", "")
+        marker_date = datetime.strptime(date_str, "%Y%m%d")
         # Check if marker is older than 1 days
         if datetime.now() - marker_date > timedelta(days=1):
             return False
@@ -45,7 +45,7 @@ def _is_cache_valid(cache_dir: Path) -> bool:
 def _create_cache_marker(cache_dir: Path) -> None:
     """Create a cache marker file with current date and clean old markers."""
     # Remove old marker files
-    for old_marker in cache_dir.glob('.done*'):
+    for old_marker in cache_dir.glob(".done*"):
         old_marker.unlink(missing_ok=True)
     # Create new marker file
@@ -68,7 +68,7 @@ def _resolve_model_path(model_name_or_path: str) -> str:
     # Check if we have a valid cached version
     if _is_cache_valid(cache_dir):
         # Return the snapshot path (latest version)
-        snapshots_dir = cache_dir / 'snapshots'
+        snapshots_dir = cache_dir / "snapshots"
         if snapshots_dir.exists():
             snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
             if snapshot_dirs:
@@ -77,13 +77,13 @@ def _resolve_model_path(model_name_or_path: str) -> str:
                 return str(latest_snapshot)
     try:
-        downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
+        downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
         _create_cache_marker(cache_dir)
         return downloaded_path
     except LocalEntryNotFoundError:
         try:
-            os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
-            downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
+            os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
+            downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
             _create_cache_marker(cache_dir)
             return downloaded_path
         except Exception as e:  # pragma: no cover - bubble up for caller context
@@ -99,11 +99,11 @@ def _select_device(device: Optional[str]) -> str:
     import torch
-    detected = 'cpu'
+    detected = "cpu"
     if torch.backends.mps.is_available():
-        detected = 'mps'
+        detected = "mps"
     elif torch.cuda.is_available():
-        detected = 'cuda'
+        detected = "cuda"
     return detected
@@ -122,7 +122,7 @@ def _load_tokenizer(
             device=device,
         )
     except Exception as e:
-        raise ModelLoadError(f'tokenizer from {model_path}', original_error=e)
+        raise ModelLoadError(f"tokenizer from {model_path}", original_error=e)
 def _load_worker(model_path: str, device: str) -> Lattice1AlphaWorker:
@@ -130,4 +130,4 @@ def _load_worker(model_path: str, device: str) -> Lattice1AlphaWorker:
     try:
         return Lattice1AlphaWorker(model_path, device=device, num_threads=8)
     except Exception as e:
-        raise ModelLoadError(f'worker from {model_path}', original_error=e)
+        raise ModelLoadError(f"worker from {model_path}", original_error=e)

lattifai/workers/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .lattice1_alpha import Lattice1AlphaWorker
-__all__ = ['Lattice1AlphaWorker']
+__all__ = ["Lattice1AlphaWorker"]

lattifai 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl

lattifai 0.4.5py3-none-any.whl → 0.4.6py3-none-any.whl