lattifai 0.1.5__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,333 @@
1
+ Metadata-Version: 2.4
2
+ Name: lattifai
3
+ Version: 0.2.2
4
+ Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
5
+ Author-email: Lattifai Technologies <tech@lattifai.com>
6
+ Maintainer-email: Lattice <tech@lattifai.com>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2025 Lattifai.
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+ Project-URL: Homepage, https://github.com/lattifai/lattifai-python
29
+ Project-URL: Documentation, https://github.com/lattifai/lattifai-python/README.md
30
+ Project-URL: Bug Tracker, https://github.com/lattifai/lattifai-python/issues
31
+ Project-URL: Discussions, https://github.com/lattifai/lattifai-python/discussions
32
+ Project-URL: Changelog, https://github.com/lattifai/lattifai-python/CHANGELOG.md
33
+ Keywords: lattifai,speech recognition,video analysis,ai,sdk,api client
34
+ Classifier: Development Status :: 5 - Production/Stable
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: Intended Audience :: Science/Research
37
+ Classifier: License :: OSI Approved :: Apache Software License
38
+ Classifier: Programming Language :: Python :: 3.9
39
+ Classifier: Programming Language :: Python :: 3.10
40
+ Classifier: Programming Language :: Python :: 3.11
41
+ Classifier: Programming Language :: Python :: 3.12
42
+ Classifier: Programming Language :: Python :: 3.13
43
+ Classifier: Operating System :: MacOS :: MacOS X
44
+ Classifier: Operating System :: POSIX :: Linux
45
+ Classifier: Operating System :: Microsoft :: Windows
46
+ Classifier: Topic :: Multimedia :: Sound/Audio
47
+ Classifier: Topic :: Multimedia :: Video
48
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
49
+ Requires-Python: >=3.9
50
+ Description-Content-Type: text/markdown
51
+ License-File: LICENSE
52
+ Requires-Dist: lattifai-core>=0.2.0
53
+ Requires-Dist: httpx
54
+ Requires-Dist: python-dotenv
55
+ Requires-Dist: lhotse>=1.26.0
56
+ Requires-Dist: colorful>=0.5.6
57
+ Requires-Dist: pysubs2
58
+ Requires-Dist: praatio
59
+ Requires-Dist: tgt
60
+ Requires-Dist: onnxruntime
61
+ Requires-Dist: resampy
62
+ Requires-Dist: g2p-phonemizer==0.1.1
63
+ Requires-Dist: wtpsplit>=2.1.6
64
+ Provides-Extra: numpy
65
+ Requires-Dist: numpy; extra == "numpy"
66
+ Provides-Extra: test
67
+ Requires-Dist: pytest; extra == "test"
68
+ Requires-Dist: pytest-cov; extra == "test"
69
+ Requires-Dist: ruff; extra == "test"
70
+ Requires-Dist: numpy; extra == "test"
71
+ Provides-Extra: all
72
+ Requires-Dist: numpy; extra == "all"
73
+ Requires-Dist: pytest; extra == "all"
74
+ Requires-Dist: pytest-cov; extra == "all"
75
+ Requires-Dist: ruff; extra == "all"
76
+ Dynamic: license-file
77
+
78
+ # LattifAI Python
79
+
80
+ [![PyPI version](https://badge.fury.io/py/lattifai.svg)](https://badge.fury.io/py/lattifai)
81
+
82
+ <p align="center">
83
+ 🌐 <a href="https://lattifai.com"><b>Official Website</b></a> &nbsp&nbsp | &nbsp&nbsp 🖥️ <a href="https://github.com/lattifai/lattifai-python">GitHub</a> &nbsp&nbsp | &nbsp&nbsp 🤗 <a href="https://huggingface.co/Lattifai/Lattice-1-Alpha">Model</a> &nbsp&nbsp | &nbsp&nbsp 📑 <a href="https://lattifai.com/blogs">Blog</a> &nbsp&nbsp | &nbsp&nbsp <a href="https://discord.gg/gTZqdaBJ"><img src="https://img.shields.io/badge/Discord-Join-5865F2?logo=discord&logoColor=white" alt="Discord" style="vertical-align: middle;"></a>
84
+ </p>
85
+
86
+ Advanced forced alignment and subtitle generation powered by [Lattice-1-Alpha](https://huggingface.co/Lattifai/Lattice-1-Alpha) model.
87
+
88
+ ## Installation
89
+
90
+ ```bash
91
+ pip install install-k2
92
+ # The installation will automatically detect and use your already installed PyTorch version.
93
+ install-k2 # Install k2
94
+
95
+ pip install lattifai
96
+ ```
97
+
98
+ > **⚠️ Important**: You must run `install-k2` before using the lattifai library.
99
+
100
+ ## Quick Start
101
+
102
+ ### Command Line
103
+
104
+ ```bash
105
+ # Align audio with subtitle
106
+ lattifai align audio.wav subtitle.srt output.srt
107
+
108
+ # Convert subtitle format
109
+ lattifai subtitle convert input.srt output.vtt
110
+ ```
111
+ #### lattifai align options
112
+ ```
113
+ > lattifai align --help
114
+ Usage: lattifai align [OPTIONS] INPUT_AUDIO_PATH INPUT_SUBTITLE_PATH OUTPUT_SUBTITLE_PATH
115
+
116
+ Command used to align audio with subtitles
117
+
118
+ Options:
119
+ -F, --input_format [srt|vtt|ass|txt|auto] Input Subtitle format.
120
+ -D, --device [cpu|cuda|mps] Device to use for inference.
121
+ --split_sentence Smart sentence splitting based on punctuation semantics.
122
+ --help Show this message and exit.
123
+ ```
124
+
125
+ #### Understanding --split_sentence
126
+
127
+ The `--split_sentence` option performs intelligent sentence re-splitting based on punctuation and semantic boundaries. This is especially useful when processing subtitles that combine multiple semantic units in a single segment, such as:
128
+
129
+ - **Mixed content**: Non-speech elements (e.g., `[APPLAUSE]`, `[MUSIC]`) followed by actual dialogue
130
+ - **Natural punctuation boundaries**: Colons, periods, and other punctuation marks that indicate semantic breaks
131
+ - **Concatenated phrases**: Multiple distinct utterances joined together without proper separation
132
+
133
+ **Example transformations**:
134
+ ```
135
+ Input: "[APPLAUSE] >> MIRA MURATI: Thank you all"
136
+ Output: ["[APPLAUSE]", ">> MIRA MURATI: Thank you all"]
137
+
138
+ Input: "[MUSIC] Welcome back. Today we discuss AI."
139
+ Output: ["[MUSIC]", "Welcome back.", "Today we discuss AI."]
140
+ ```
141
+
142
+ This feature helps improve alignment accuracy by:
143
+ 1. Respecting punctuation-based semantic boundaries
144
+ 2. Separating distinct utterances for more precise timing
145
+ 3. Maintaining semantic context for each independent phrase
146
+
147
+ **Usage**:
148
+ ```bash
149
+ lattifai align --split_sentence audio.wav subtitle.srt output.srt
150
+ ```
151
+
152
+ ### Python API
153
+
154
+ ```python
155
+ from lattifai import LattifAI
156
+
157
+ # Initialize client
158
+ client = LattifAI(
159
+ api_key: Optional[str] = None,
160
+ model_name_or_path='Lattifai/Lattice-1-Alpha',
161
+ device='cpu', # 'cpu', 'cuda', or 'mps'
162
+ )
163
+
164
+ # Perform alignment
165
+ result = client.alignment(
166
+ audio="audio.wav",
167
+ subtitle="subtitle.srt",
168
+ split_sentence=False,
169
+ output_subtitle_path="output.srt"
170
+ )
171
+ ```
172
+
173
+ ## Supported Formats
174
+
175
+ **Audio**: WAV, MP3, FLAC, M4A, OGG
176
+ **Subtitle**: SRT, VTT, ASS, TXT (plain text)
177
+
178
+ ## API Reference
179
+
180
+ ### LattifAI
181
+
182
+ ```python
183
+ LattifAI(
184
+ api_key: Optional[str] = None,
185
+ model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
186
+ device: str = 'cpu' # 'cpu', 'cuda', or 'mps'
187
+ )
188
+ ```
189
+
190
+ ### alignment()
191
+
192
+ ```python
193
+ client.alignment(
194
+ audio: str, # Path to audio file
195
+ subtitle: str, # Path to subtitle/text file
196
+ format: Optional[str] = None, # 'srt', 'vtt', 'ass', 'txt' (auto-detect if None)
197
+ split_sentence: bool = False, # Smart sentence splitting based on punctuation semantics
198
+ output_subtitle_path: Optional[str] = None
199
+ ) -> str
200
+ ```
201
+
202
+ **Parameters**:
203
+ - `audio`: Path to the audio file to be aligned
204
+ - `subtitle`: Path to the subtitle or text file
205
+ - `format`: Subtitle format ('srt', 'vtt', 'ass', 'txt'). Auto-detected if None
206
+ - `split_sentence`: Enable intelligent sentence re-splitting (default: False). Set to True when subtitles combine multiple semantic units (non-speech elements + dialogue, or multiple sentences) that would benefit from separate timing alignment
207
+ - `output_subtitle_path`: Output path for aligned subtitle (optional)
208
+
209
+ ## Examples
210
+
211
+ ### Basic Text Alignment
212
+
213
+ ```python
214
+ client = LattifAI()
215
+ client.alignment(
216
+ audio="speech.wav",
217
+ subtitle="transcript.txt",
218
+ format="txt",
219
+ split_sentence=False,
220
+ output_subtitle_path="output.srt"
221
+ )
222
+ ```
223
+
224
+ ### Batch Processing
225
+
226
+ ```python
227
+ from pathlib import Path
228
+
229
+ client = LattifAI()
230
+ audio_dir = Path("audio_files")
231
+ subtitle_dir = Path("subtitles")
232
+ output_dir = Path("aligned")
233
+
234
+ for audio in audio_dir.glob("*.wav"):
235
+ subtitle = subtitle_dir / f"{audio.stem}.srt"
236
+ if subtitle.exists():
237
+ client.alignment(
238
+ audio=audio,
239
+ subtitle=subtitle,
240
+ output_subtitle_path=output_dir / f"{audio.stem}_aligned.srt"
241
+ )
242
+ ```
243
+
244
+ ### GPU Acceleration
245
+
246
+ ```python
247
+ # NVIDIA GPU
248
+ client = LattifAI(device='cuda')
249
+
250
+ # Apple Silicon
251
+ client = LattifAI(device='mps')
252
+
253
+ # CLI
254
+ lattifai align --device mps audio.wav subtitle.srt output.srt
255
+ ```
256
+
257
+ ## Configuration
258
+
259
+ ### API Key Setup
260
+
261
+ First, create your API key at [https://lattifai.com/dashboard/api-keys](https://lattifai.com/dashboard/api-keys)
262
+
263
+ **Recommended: Using .env file**
264
+
265
+ Create a `.env` file in your project root:
266
+ ```bash
267
+ LATTIFAI_API_KEY=your-api-key
268
+ ```
269
+
270
+ The library automatically loads the `.env` file (python-dotenv is included as a dependency).
271
+
272
+ **Alternative: Environment variable**
273
+ ```bash
274
+ export LATTIFAI_API_KEY="your-api-key"
275
+ ```
276
+
277
+ ## Model Information
278
+
279
+ **[Lattice-1-Alpha](https://huggingface.co/Lattifai/Lattice-1-Alpha)** features:
280
+ - State-of-the-art alignment precision
281
+ - **Language Support**: Currently supports English only. The upcoming **Lattice-1** release will support English, Chinese, and mixed English-Chinese content.
282
+ - Handles noisy audio and imperfect transcripts
283
+ - Optimized for CPU and GPU (CUDA/MPS)
284
+
285
+ **Requirements**:
286
+ - Python 3.9+
287
+ - 4GB RAM recommended
288
+ - ~2GB storage for model files
289
+
290
+ ## Development
291
+
292
+ ### Setup
293
+
294
+ ```bash
295
+ git clone https://github.com/lattifai/lattifai-python.git
296
+ cd lattifai-python
297
+ pip install -e ".[test]"
298
+ ./scripts/install-hooks.sh # Optional: install pre-commit hooks
299
+ ```
300
+
301
+ ### Testing
302
+
303
+ ```bash
304
+ pytest # Run all tests
305
+ pytest --cov=src # With coverage
306
+ pytest tests/test_basic.py # Specific test
307
+ ```
308
+
309
+ ### Code Quality
310
+
311
+ ```bash
312
+ ruff check src/ tests/ # Lint
313
+ ruff format src/ tests/ # Format
314
+ isort src/ tests/ # Sort imports
315
+ ```
316
+
317
+ ## Contributing
318
+
319
+ 1. Fork the repository
320
+ 2. Create a feature branch
321
+ 3. Make changes and add tests
322
+ 4. Run `pytest` and `ruff check`
323
+ 5. Submit a pull request
324
+
325
+ ## License
326
+
327
+ Apache License 2.0
328
+
329
+ ## Support
330
+
331
+ - **Issues**: [GitHub Issues](https://github.com/lattifai/lattifai-python/issues)
332
+ - **Discussions**: [GitHub Discussions](https://github.com/lattifai/lattifai-python/discussions)
333
+ - **Discord**: [Join our community](https://discord.gg/gTZqdaBJ)
@@ -0,0 +1,22 @@
1
+ lattifai/__init__.py,sha256=JXUg0dT74UyAtKOjewRs9ijr5sl9SYsc6oU_WItY314,1497
2
+ lattifai/base_client.py,sha256=ktFtATjL9pLSJUD-VqeJKA1FHkrsGHX7Uq_x00H7gO8,3322
3
+ lattifai/client.py,sha256=QXbdTuDA5Aap2udu4iig7CVxlgwOIrydpuLlVASs0aA,5145
4
+ lattifai/bin/__init__.py,sha256=7YhmtEM8kbxJtz2-KIskvpLKBZAvkMSceVx8z4fkgQ4,61
5
+ lattifai/bin/align.py,sha256=nQs901SDYmxyH2AXBtjgZGzrpwLaxANQRYP49Bd1AWo,1669
6
+ lattifai/bin/cli_base.py,sha256=y535WXDRX8StloFn9icpfw7nQt0JxuWBIuPMnRxAYy8,392
7
+ lattifai/bin/subtitle.py,sha256=bUWImAHpvyY59Vskqb5loQiD5ytQOxR8lTQRiQ4LyNA,647
8
+ lattifai/io/__init__.py,sha256=vHWRN7MvAch-GUeFqqO-gM57SM-4YOpGUjIxFJdjfPA,671
9
+ lattifai/io/reader.py,sha256=mtgxT5c_BiHbqqJvPE3nf7TIe_OcWgGu1zr6iXasfrk,2591
10
+ lattifai/io/supervision.py,sha256=5UfSsgBhXoDU3-6drDtoD7y8HIiA4xRKZnbOKgeejwM,354
11
+ lattifai/io/writer.py,sha256=1eAEFLlL8kricxRDPFBtVmeC4IiFyFnjbWXvw0VU-q4,2036
12
+ lattifai/tokenizer/__init__.py,sha256=aqv44PDtq6g3oFFKW_l4HSR5ywT5W8eP1dHHywIvBfs,72
13
+ lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
14
+ lattifai/tokenizer/tokenizer.py,sha256=Yuo0pLPQnF2uX0Fm5g8i5vtcADn7GeLpSqdGpMJgTww,11492
15
+ lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
16
+ lattifai/workers/lattice1_alpha.py,sha256=1VFo59EcygEctTHOhkcII8v3_mrj8JEJ8Fcaqk_7LVo,5762
17
+ lattifai-0.2.2.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
18
+ lattifai-0.2.2.dist-info/METADATA,sha256=4vmPOYKsIlvADiw0zUDQ2dbDpe-vOV-o5A0Hs1p7xfg,10971
19
+ lattifai-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
+ lattifai-0.2.2.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
21
+ lattifai-0.2.2.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
22
+ lattifai-0.2.2.dist-info/RECORD,,
@@ -1,147 +0,0 @@
1
- import gzip
2
- import pickle
3
- from collections import defaultdict
4
- from itertools import chain
5
- from typing import Any, Dict, List, Optional, Tuple
6
-
7
- import torch
8
-
9
- from lattifai.base_client import SyncAPIClient
10
- from lattifai.io import Supervision
11
- from lattifai.tokenizers.phonemizer import G2Phonemizer
12
-
13
- PUNCTUATION = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~'
14
- PUNCTUATION_SPACE = PUNCTUATION + ' '
15
- STAR_TOKEN = '※'
16
-
17
- GROUPING_SEPARATOR = '✹'
18
-
19
- MAXIMUM_WORD_LENGTH = 40
20
-
21
-
22
- class LatticeTokenizer:
23
- """Tokenizer for converting Lhotse Cut to LatticeGraph."""
24
-
25
- def __init__(self, client_wrapper: SyncAPIClient):
26
- self.client_wrapper = client_wrapper
27
- self.words: List[str] = []
28
- self.g2p_model: Any = None # Placeholder for G2P model
29
- self.dictionaries = defaultdict(lambda: [])
30
- self.oov_word = '<unk>'
31
-
32
- @staticmethod
33
- def from_pretrained(
34
- client_wrapper: SyncAPIClient,
35
- model_path: str,
36
- device: str = 'cpu',
37
- compressed: bool = True,
38
- ):
39
- """Load tokenizer from exported binary file"""
40
- from pathlib import Path
41
-
42
- words_model_path = f'{model_path}/words.bin'
43
- if compressed:
44
- with gzip.open(words_model_path, 'rb') as f:
45
- data = pickle.load(f)
46
- else:
47
- with open(words_model_path, 'rb') as f:
48
- data = pickle.load(f)
49
-
50
- tokenizer = LatticeTokenizer(client_wrapper=client_wrapper)
51
- tokenizer.words = data['words']
52
- tokenizer.dictionaries = defaultdict(list, data['dictionaries'])
53
- tokenizer.oov_word = data['oov_word']
54
-
55
- g2p_model_path = f'{model_path}/g2p.bin' if Path(f'{model_path}/g2p.bin').exists() else None
56
- if g2p_model_path:
57
- tokenizer.g2p_model = G2Phonemizer(g2p_model_path, device=device)
58
- return tokenizer
59
-
60
- def prenormalize(self, texts: List[str], language: Optional[str] = None) -> List[str]:
61
- if not self.g2p_model:
62
- raise ValueError('G2P model is not loaded, cannot prenormalize texts')
63
-
64
- oov_words = []
65
- for text in texts:
66
- words = text.lower().replace('-', ' ').replace('—', ' ').replace('–', ' ').split()
67
- oovs = [w for w in words if w not in self.words]
68
- if oovs:
69
- oov_words.extend([w for w in oovs if (w not in self.words and len(w) <= MAXIMUM_WORD_LENGTH)])
70
-
71
- oov_words = list(set(oov_words))
72
- if oov_words:
73
- indexs = []
74
- for k, _word in enumerate(oov_words):
75
- if any(_word.startswith(p) and _word.endswith(q) for (p, q) in [('(', ')'), ('[', ']')]):
76
- self.dictionaries[_word] = self.dictionaries[self.oov_word]
77
- else:
78
- _word = _word.strip(PUNCTUATION_SPACE)
79
- if not _word or _word in self.words:
80
- indexs.append(k)
81
- for idx in sorted(indexs, reverse=True):
82
- del oov_words[idx]
83
-
84
- g2p_words = [w for w in oov_words if w not in self.dictionaries]
85
- if g2p_words:
86
- predictions = self.g2p_model(words=g2p_words, lang=language, batch_size=len(g2p_words), num_prons=4)
87
- for _word, _predictions in zip(g2p_words, predictions):
88
- for pronuncation in _predictions:
89
- if pronuncation and pronuncation not in self.dictionaries[_word]:
90
- self.dictionaries[_word].append(pronuncation)
91
-
92
- pronunciation_dictionaries: Dict[str, List[List[str]]] = {
93
- w: self.dictionaries[w] for w in oov_words if self.dictionaries[w]
94
- }
95
- return pronunciation_dictionaries
96
-
97
- return {}
98
-
99
- def tokenize(self, supervisions: List[Supervision]) -> Tuple[str, Dict[str, Any]]:
100
- pronunciation_dictionaries = self.prenormalize([s.text for s in supervisions])
101
- response = self.client_wrapper.post(
102
- 'tokenize',
103
- json={
104
- 'supervisions': [s.to_dict() for s in supervisions],
105
- 'pronunciation_dictionaries': pronunciation_dictionaries,
106
- },
107
- )
108
- if response.status_code != 200:
109
- raise Exception(f'Failed to tokenize texts: {response.text}')
110
- result = response.json()
111
- lattice_id = result['id']
112
- return lattice_id, (result['lattice_graph'], result['final_state'], result.get('acoustic_scale', 1.0))
113
-
114
- def detokenize(
115
- self,
116
- lattice_id: str,
117
- lattice_results: Tuple[torch.Tensor, Any, Any, float, float],
118
- # return_supervisions: bool = True,
119
- # return_details: bool = False,
120
- ) -> List[Supervision]:
121
- emission, results, labels, frame_shift, offset, channel = lattice_results # noqa: F841
122
- response = self.client_wrapper.post(
123
- 'detokenize',
124
- json={
125
- 'lattice_id': lattice_id,
126
- 'frame_shift': frame_shift,
127
- 'results': [t.to_dict() for t in results[0]],
128
- 'labels': labels[0],
129
- 'offset': offset,
130
- 'channel': channel,
131
- 'destroy_lattice': True,
132
- },
133
- )
134
- if response.status_code != 200:
135
- raise Exception(f'Failed to detokenize lattice: {response.text}')
136
- result = response.json()
137
- # if return_details:
138
- # raise NotImplementedError("return_details is not implemented yet")
139
- return [Supervision.from_dict(s) for s in result['supervisions']]
140
-
141
-
142
- # Compute average score weighted by the span length
143
- def _score(spans):
144
- if not spans:
145
- return 0.0
146
- # TokenSpan(token=token, start=start, end=end, score=scores[start:end].mean().item())
147
- return round(sum(s.score * len(s) for s in spans) / sum(len(s) for s in spans), ndigits=4)