k2py 0.2.4__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- k2py/__init__.py +137 -0
- k2py/_k2.cp314-win_amd64.pyd +0 -0
- k2py-0.2.4.dist-info/METADATA +79 -0
- k2py-0.2.4.dist-info/RECORD +5 -0
- k2py-0.2.4.dist-info/WHEEL +5 -0
k2py/__init__.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""k2py - Python bindings for k2 OnlineDenseIntersecter
|
|
2
|
+
|
|
3
|
+
This package provides Python bindings for the k2 forced alignment library,
|
|
4
|
+
specifically the OnlineDenseIntersecter for efficient streaming decoding.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Union, List, Dict, Any, Tuple
|
|
11
|
+
|
|
12
|
+
from ._k2 import (
|
|
13
|
+
CreateFsaVecFromStr,
|
|
14
|
+
FsaVec,
|
|
15
|
+
Array2,
|
|
16
|
+
OnlineDenseIntersecter as _OnlineDenseIntersecter,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class AlignedToken:
|
|
21
|
+
token_id: Union[str, int]
|
|
22
|
+
timestamp: int
|
|
23
|
+
duration: int # in frames
|
|
24
|
+
score: float
|
|
25
|
+
attr: dict = field(default_factory=dict)
|
|
26
|
+
|
|
27
|
+
def to_dict(self):
|
|
28
|
+
return {
|
|
29
|
+
"token_id": self.token_id,
|
|
30
|
+
"timestamp": self.timestamp,
|
|
31
|
+
"duration": self.duration,
|
|
32
|
+
"score": self.score,
|
|
33
|
+
"attr": self.attr,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
class OnlineDenseIntersecter(_OnlineDenseIntersecter):
|
|
37
|
+
"""Wrapper around C++ OnlineDenseIntersecter to produce Python-friendly results."""
|
|
38
|
+
|
|
39
|
+
def get_partial_labels(self) -> List[int]:
|
|
40
|
+
"""Get partial per-frame labels for streaming confidence calculation.
|
|
41
|
+
|
|
42
|
+
Returns per-frame token IDs from the current lattice state.
|
|
43
|
+
Used for computing emission-based confidence scores in streaming mode.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
List of token IDs, one per frame.
|
|
47
|
+
"""
|
|
48
|
+
return list(super().get_partial_labels())
|
|
49
|
+
|
|
50
|
+
def finish(self) -> Tuple[List[List["AlignedToken"]], List[List[int]]]:
|
|
51
|
+
"""Finish decoding and return results including AlignedToken objects.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
A tuple containing:
|
|
55
|
+
- results: List of List of AlignedToken objects (wrapped in list for batch consistency)
|
|
56
|
+
- labels: List of List of timestamps (wrapped in list for batch consistency)
|
|
57
|
+
"""
|
|
58
|
+
final_result = super().finish()
|
|
59
|
+
|
|
60
|
+
# Convert to AlignedToken objects
|
|
61
|
+
aligned_tokens = []
|
|
62
|
+
token_ids = final_result["token_ids"]
|
|
63
|
+
timestamps = final_result["timestamps"]
|
|
64
|
+
durations = final_result["durations"]
|
|
65
|
+
scores = final_result["scores"]
|
|
66
|
+
|
|
67
|
+
for tid, ts, dur, score in zip(token_ids, timestamps, durations, scores):
|
|
68
|
+
aligned_tokens.append(AlignedToken(tid, ts, dur, score))
|
|
69
|
+
|
|
70
|
+
# Wrap in lists to match the expected format (list of lists for batch)
|
|
71
|
+
# currently we only support single stream in this wrapper for simplicty of k2py
|
|
72
|
+
# but the interface expects batch results
|
|
73
|
+
return [aligned_tokens], [final_result["labels"]]
|
|
74
|
+
|
|
75
|
+
def AlignSegments(
|
|
76
|
+
graph_result: Dict[str, Any],
|
|
77
|
+
scores: Any, # numpy array
|
|
78
|
+
search_beam: float,
|
|
79
|
+
output_beam: float,
|
|
80
|
+
min_active_states: int,
|
|
81
|
+
max_active_states: int,
|
|
82
|
+
use_double_scores: bool = True,
|
|
83
|
+
allow_partial: bool = True,
|
|
84
|
+
) -> Tuple[List[List["AlignedToken"]], List[List[int]]]:
|
|
85
|
+
"""Align segments using offline dense intersection.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
graph_result: Dictionary returned by CreateFsaVecFromStr
|
|
89
|
+
scores: Numpy array of scores (rows, cols)
|
|
90
|
+
search_beam: Beam size for search
|
|
91
|
+
output_beam: Beam size for output
|
|
92
|
+
min_active_states: Minimum active states
|
|
93
|
+
max_active_states: Maximum active states
|
|
94
|
+
use_double_scores: Whether to use double precision for scores
|
|
95
|
+
allow_partial: If True, treat all states on last frame as final
|
|
96
|
+
when no final state is active. Default True.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
A tuple containing:
|
|
100
|
+
- results: List of List of AlignedToken objects (wrapped in list for batch consistency)
|
|
101
|
+
- labels: List of List of timestamps (wrapped in list for batch consistency)
|
|
102
|
+
"""
|
|
103
|
+
from ._k2 import AlignSegments as _AlignSegments
|
|
104
|
+
|
|
105
|
+
final_result = _AlignSegments(
|
|
106
|
+
graph_result,
|
|
107
|
+
scores,
|
|
108
|
+
search_beam,
|
|
109
|
+
output_beam,
|
|
110
|
+
min_active_states,
|
|
111
|
+
max_active_states,
|
|
112
|
+
use_double_scores,
|
|
113
|
+
allow_partial,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Convert to AlignedToken objects
|
|
117
|
+
aligned_tokens = []
|
|
118
|
+
token_ids = final_result["token_ids"]
|
|
119
|
+
timestamps = final_result["timestamps"]
|
|
120
|
+
durations = final_result["durations"]
|
|
121
|
+
scores = final_result["scores"]
|
|
122
|
+
|
|
123
|
+
for tid, ts, dur, score in zip(token_ids, timestamps, durations, scores):
|
|
124
|
+
aligned_tokens.append(AlignedToken(tid, ts, dur, score))
|
|
125
|
+
|
|
126
|
+
# Wrap in lists to match the expected format (list of lists for batch)
|
|
127
|
+
return [aligned_tokens], [final_result["labels"]]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
__all__ = [
|
|
131
|
+
"CreateFsaVecFromStr",
|
|
132
|
+
"FsaVec",
|
|
133
|
+
"Array2",
|
|
134
|
+
"OnlineDenseIntersecter",
|
|
135
|
+
"AlignedToken",
|
|
136
|
+
"AlignSegments",
|
|
137
|
+
]
|
|
Binary file
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: k2py
|
|
3
|
+
Version: 0.2.4
|
|
4
|
+
Summary: Python bindings for k2
|
|
5
|
+
Keywords: k2,forced-alignment,speech,asr
|
|
6
|
+
Author: The LattifAI Development Team
|
|
7
|
+
License: Apache-2.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Classifier: Programming Language :: C++
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
21
|
+
Project-URL: Homepage, https://github.com/lattifai/k2py
|
|
22
|
+
Project-URL: Repository, https://github.com/lattifai/k2py.git
|
|
23
|
+
Project-URL: Issues, https://github.com/lattifai/k2py/issues
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# k2py
|
|
28
|
+
|
|
29
|
+
Python bindings for k2
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- **Efficient Streaming Decoding**: OnlineDenseIntersecter for real-time forced alignment
|
|
34
|
+
- **Cross-Platform**: Supports Linux, macOS, and Windows
|
|
35
|
+
- **Python 3.10+**: Compatible with modern Python versions
|
|
36
|
+
- **Built with pybind11**: Fast C++ bindings with minimal overhead
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
### From PyPI
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install k2py
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Usage
|
|
47
|
+
|
|
48
|
+
### Basic Example
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from k2py import OnlineDenseIntersecter, CreateFsaVecFromStr
|
|
52
|
+
import numpy as np
|
|
53
|
+
|
|
54
|
+
# Create FSA from string representation
|
|
55
|
+
fsa_str = "0 1 1 1.0\n1 2 2 1.0\n2"
|
|
56
|
+
result = CreateFsaVecFromStr(fsa_str, final_state=2)
|
|
57
|
+
|
|
58
|
+
# Initialize decoder
|
|
59
|
+
decoder = OnlineDenseIntersecter(
|
|
60
|
+
result["fsa"],
|
|
61
|
+
result["aux_labels"],
|
|
62
|
+
search_beam=20.0,
|
|
63
|
+
output_beam=8.0,
|
|
64
|
+
min_active_states=30,
|
|
65
|
+
max_active_states=10000
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Prepare acoustic scores (num_frames x vocab_size)
|
|
69
|
+
scores = np.random.randn(100, 50).astype(np.float32)
|
|
70
|
+
|
|
71
|
+
# Decode
|
|
72
|
+
lattice = decoder.DecodeWithArray(scores, return_lattice=True)
|
|
73
|
+
|
|
74
|
+
# Get final alignment result
|
|
75
|
+
alignment = decoder.Finish()
|
|
76
|
+
print(f"Token IDs: {alignment['token_ids']}")
|
|
77
|
+
print(f"Timestamps: {alignment['timestamps']}")
|
|
78
|
+
print(f"Durations: {alignment['durations']}")
|
|
79
|
+
```
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
k2py/__init__.py,sha256=KR-D9Luea5f6G6-O9pHsQoJTPH7kn2zLwuFXSIDs-M8,4741
|
|
2
|
+
k2py/_k2.cp314-win_amd64.pyd,sha256=mqhKfBM-kqIrKs3PH5-UC42Mwsaj9y_FzEShcMMzOuI,681472
|
|
3
|
+
k2py-0.2.4.dist-info/METADATA,sha256=YZxTxwdJp7Hawm0YeUP1tPYebyv0Lbc17TB2hjgO8OY,2306
|
|
4
|
+
k2py-0.2.4.dist-info/WHEEL,sha256=gWMs92Yhbl9pSGNRFWCXG1mfeuNl7HhxcJG5aLu4nQc,106
|
|
5
|
+
k2py-0.2.4.dist-info/RECORD,,
|