cjm-transcript-segment-align 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cjm_transcript_segment_align/__init__.py +1 -0
- cjm_transcript_segment_align/_modidx.py +101 -0
- cjm_transcript_segment_align/components/__init__.py +0 -0
- cjm_transcript_segment_align/components/handlers.py +331 -0
- cjm_transcript_segment_align/components/helpers.py +85 -0
- cjm_transcript_segment_align/components/keyboard_config.py +323 -0
- cjm_transcript_segment_align/components/step_renderer.py +624 -0
- cjm_transcript_segment_align/html_ids.py +44 -0
- cjm_transcript_segment_align/routes/__init__.py +0 -0
- cjm_transcript_segment_align/routes/chrome.py +169 -0
- cjm_transcript_segment_align/routes/forced_alignment.py +396 -0
- cjm_transcript_segment_align/services/__init__.py +0 -0
- cjm_transcript_segment_align/services/forced_alignment.py +301 -0
- cjm_transcript_segment_align-0.0.1.dist-info/LICENSE +201 -0
- cjm_transcript_segment_align-0.0.1.dist-info/METADATA +777 -0
- cjm_transcript_segment_align-0.0.1.dist-info/RECORD +19 -0
- cjm_transcript_segment_align-0.0.1.dist-info/WHEEL +5 -0
- cjm_transcript_segment_align-0.0.1.dist-info/entry_points.txt +2 -0
- cjm_transcript_segment_align-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,777 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: cjm-transcript-segment-align
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: FastHTML dual-column text segmentation & VAD alignment UI for transcript decomposition workflows with forced alignment-based text splitting for aligning text segments with VAD chunks.
|
|
5
|
+
Home-page: https://github.com/cj-mills/cjm-transcript-segment-align
|
|
6
|
+
Author: Christian J. Mills
|
|
7
|
+
Author-email: 9126128+cj-mills@users.noreply.github.com
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Keywords: nbdev jupyter notebook python
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Natural Language :: English
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Python: >=3.12
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: cjm-plugin-system
|
|
19
|
+
Requires-Dist: cjm_transcription_plugin_system
|
|
20
|
+
Requires-Dist: cjm_transcript_segmentation
|
|
21
|
+
Requires-Dist: cjm_transcript_vad_align
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Dynamic: author
|
|
24
|
+
Dynamic: author-email
|
|
25
|
+
Dynamic: classifier
|
|
26
|
+
Dynamic: description
|
|
27
|
+
Dynamic: description-content-type
|
|
28
|
+
Dynamic: home-page
|
|
29
|
+
Dynamic: keywords
|
|
30
|
+
Dynamic: license
|
|
31
|
+
Dynamic: provides-extra
|
|
32
|
+
Dynamic: requires-dist
|
|
33
|
+
Dynamic: requires-python
|
|
34
|
+
Dynamic: summary
|
|
35
|
+
|
|
36
|
+
# cjm-transcript-segment-align
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
|
|
40
|
+
|
|
41
|
+
## Install
|
|
42
|
+
|
|
43
|
+
``` bash
|
|
44
|
+
pip install cjm_transcript_segment_align
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Project Structure
|
|
48
|
+
|
|
49
|
+
nbs/
|
|
50
|
+
├── components/ (4)
|
|
51
|
+
│ ├── handlers.ipynb # Handler wrappers for cross-domain coordination (alignment status updates)
|
|
52
|
+
│ ├── helpers.ipynb # State extraction helpers for cross-domain coordination in Phase 2 combined step
|
|
53
|
+
│ ├── keyboard_config.ipynb # Shared keyboard navigation configuration for the combined Phase 2 step
|
|
54
|
+
│ └── step_renderer.ipynb # Phase 2 combined step renderer: dual-column layout for Segment & Align
|
|
55
|
+
├── routes/ (2)
|
|
56
|
+
│ ├── chrome.ipynb # Shared chrome switching route handlers for the combined Phase 2 step
|
|
57
|
+
│ └── forced_alignment.ipynb # Routes for triggering forced alignment, polling progress, and toggling between NLTK and force-aligned pre-splits
|
|
58
|
+
├── services/ (1)
|
|
59
|
+
│ └── forced_alignment.ipynb # Forced alignment service for audio-informed text pre-splitting via forced alignment plugin
|
|
60
|
+
└── html_ids.ipynb # HTML ID constants for Phase 2 Shell: Dual-Column Layout shared chrome
|
|
61
|
+
|
|
62
|
+
Total: 8 notebooks across 3 directories
|
|
63
|
+
|
|
64
|
+
## Module Dependencies
|
|
65
|
+
|
|
66
|
+
``` mermaid
|
|
67
|
+
graph LR
|
|
68
|
+
components_handlers[components.handlers<br/>handlers]
|
|
69
|
+
components_helpers[components.helpers<br/>helpers]
|
|
70
|
+
components_keyboard_config[components.keyboard_config<br/>keyboard_config]
|
|
71
|
+
components_step_renderer[components.step_renderer<br/>step_combined]
|
|
72
|
+
html_ids[html_ids<br/>html_ids]
|
|
73
|
+
routes_chrome[routes.chrome<br/>chrome]
|
|
74
|
+
routes_forced_alignment[routes.forced_alignment<br/>forced_alignment]
|
|
75
|
+
services_forced_alignment[services.forced_alignment<br/>forced_alignment]
|
|
76
|
+
|
|
77
|
+
components_handlers --> components_step_renderer
|
|
78
|
+
components_handlers --> html_ids
|
|
79
|
+
components_handlers --> components_keyboard_config
|
|
80
|
+
components_keyboard_config --> html_ids
|
|
81
|
+
components_step_renderer --> html_ids
|
|
82
|
+
components_step_renderer --> components_keyboard_config
|
|
83
|
+
components_step_renderer --> components_helpers
|
|
84
|
+
routes_chrome --> html_ids
|
|
85
|
+
routes_chrome --> components_keyboard_config
|
|
86
|
+
routes_chrome --> components_step_renderer
|
|
87
|
+
routes_forced_alignment --> components_step_renderer
|
|
88
|
+
routes_forced_alignment --> html_ids
|
|
89
|
+
routes_forced_alignment --> services_forced_alignment
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
*13 cross-module dependencies detected*
|
|
93
|
+
|
|
94
|
+
## CLI Reference
|
|
95
|
+
|
|
96
|
+
No CLI commands found in this project.
|
|
97
|
+
|
|
98
|
+
## Module Overview
|
|
99
|
+
|
|
100
|
+
Detailed documentation for each module in the project:
|
|
101
|
+
|
|
102
|
+
### chrome (`chrome.ipynb`)
|
|
103
|
+
|
|
104
|
+
> Shared chrome switching route handlers for the combined Phase 2 step
|
|
105
|
+
|
|
106
|
+
#### Import
|
|
107
|
+
|
|
108
|
+
``` python
|
|
109
|
+
from cjm_transcript_segment_align.routes.chrome import (
|
|
110
|
+
DEBUG_SWITCH_CHROME,
|
|
111
|
+
init_chrome_router
|
|
112
|
+
)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
#### Functions
|
|
116
|
+
|
|
117
|
+
``` python
|
|
118
|
+
async def _handle_switch_chrome(
|
|
119
|
+
state_store:SQLiteWorkflowStateStore, # State store instance
|
|
120
|
+
workflow_id:str, # Workflow identifier
|
|
121
|
+
request, # FastHTML request object
|
|
122
|
+
sess, # FastHTML session object
|
|
123
|
+
seg_urls:SegmentationUrls, # URL bundle for segmentation routes
|
|
124
|
+
align_urls:AlignmentUrls, # URL bundle for alignment routes
|
|
125
|
+
) -> tuple: # OOB swaps for shared chrome containers
|
|
126
|
+
"Switch shared chrome content based on active column."
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
``` python
|
|
130
|
+
def init_chrome_router(
|
|
131
|
+
state_store: SQLiteWorkflowStateStore, # State store instance
|
|
132
|
+
workflow_id: str, # Workflow identifier
|
|
133
|
+
seg_urls: SegmentationUrls, # URL bundle for segmentation routes
|
|
134
|
+
align_urls: AlignmentUrls, # URL bundle for alignment routes
|
|
135
|
+
prefix: str, # Route prefix (e.g., "/workflow/core/chrome")
|
|
136
|
+
) -> Tuple[APIRouter, Dict[str, Callable]]: # (router, route_dict)
|
|
137
|
+
"Initialize chrome switching routes."
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
#### Variables
|
|
141
|
+
|
|
142
|
+
``` python
|
|
143
|
+
DEBUG_SWITCH_CHROME = False
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### forced_alignment (`forced_alignment.ipynb`)
|
|
147
|
+
|
|
148
|
+
> Routes for triggering forced alignment, polling progress, and toggling
|
|
149
|
+
> between NLTK and force-aligned pre-splits
|
|
150
|
+
|
|
151
|
+
#### Import
|
|
152
|
+
|
|
153
|
+
``` python
|
|
154
|
+
from cjm_transcript_segment_align.routes.forced_alignment import (
|
|
155
|
+
FA_CONTAINER_ID,
|
|
156
|
+
FA_STATUS_ID,
|
|
157
|
+
render_fa_trigger_button,
|
|
158
|
+
render_fa_progress,
|
|
159
|
+
render_fa_toggle,
|
|
160
|
+
render_fa_controls,
|
|
161
|
+
init_forced_alignment_routers
|
|
162
|
+
)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
#### Functions
|
|
166
|
+
|
|
167
|
+
``` python
|
|
168
|
+
def render_fa_trigger_button(
|
|
169
|
+
trigger_url: str, # URL for forced alignment trigger route
|
|
170
|
+
disabled: bool = False, # Whether button is disabled
|
|
171
|
+
) -> Any: # Force Align trigger button
|
|
172
|
+
"Render the Force Align trigger button."
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
``` python
|
|
176
|
+
def render_fa_progress(
|
|
177
|
+
progress_val: float, # Progress value 0.0-1.0
|
|
178
|
+
message: str, # Progress stage message
|
|
179
|
+
progress_url: str, # URL for progress polling
|
|
180
|
+
) -> Any: # Progress indicator with polling
|
|
181
|
+
"Render forced alignment progress indicator with HTMX polling."
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
``` python
|
|
185
|
+
def render_fa_toggle(
|
|
186
|
+
active_presplit: str, # "nltk" or "forced_alignment"
|
|
187
|
+
toggle_url: str, # URL for toggle route
|
|
188
|
+
) -> Any: # Toggle button group
|
|
189
|
+
"Render the NLTK / Force Aligned toggle button group."
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
``` python
|
|
193
|
+
def render_fa_controls(
|
|
194
|
+
trigger_url: str = "", # URL for trigger route
|
|
195
|
+
toggle_url: str = "", # URL for toggle route
|
|
196
|
+
active_presplit: Optional[str] = None, # Current active mode (None = no FA done yet)
|
|
197
|
+
fa_available: bool = False, # Whether FA plugin is available
|
|
198
|
+
oob: bool = False, # Whether to render as OOB swap
|
|
199
|
+
) -> Any: # FA controls container
|
|
200
|
+
"""
|
|
201
|
+
Render the forced alignment controls container.
|
|
202
|
+
|
|
203
|
+
Shows either:
|
|
204
|
+
- Trigger button (if FA not yet run)
|
|
205
|
+
- Toggle (if FA has been run)
|
|
206
|
+
- Nothing (if FA plugin not available)
|
|
207
|
+
"""
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
``` python
|
|
211
|
+
async def _handle_fa_trigger(
|
|
212
|
+
state_store: SQLiteWorkflowStateStore,
|
|
213
|
+
workflow_id: str,
|
|
214
|
+
fa_service: ForcedAlignmentService,
|
|
215
|
+
source_service: SourceService,
|
|
216
|
+
request: Any,
|
|
217
|
+
sess: Any,
|
|
218
|
+
seg_urls: SegmentationUrls,
|
|
219
|
+
progress_url: str,
|
|
220
|
+
toggle_url: str,
|
|
221
|
+
) -> Any: # OOB updates for card stack, alignment status, FA controls, mini-stats
|
|
222
|
+
"Trigger forced alignment and replace working segments."
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
``` python
|
|
226
|
+
async def _handle_fa_toggle(
|
|
227
|
+
state_store: SQLiteWorkflowStateStore,
|
|
228
|
+
workflow_id: str,
|
|
229
|
+
request: Any,
|
|
230
|
+
sess: Any,
|
|
231
|
+
seg_urls: SegmentationUrls,
|
|
232
|
+
toggle_url: str,
|
|
233
|
+
) -> Any: # OOB updates for card stack, alignment status, FA controls, mini-stats
|
|
234
|
+
"Toggle between NLTK and force-aligned pre-splits."
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
``` python
|
|
238
|
+
def init_forced_alignment_routers(
|
|
239
|
+
state_store: SQLiteWorkflowStateStore, # State store instance
|
|
240
|
+
workflow_id: str, # Workflow identifier
|
|
241
|
+
fa_service: ForcedAlignmentService, # Forced alignment service
|
|
242
|
+
source_service: SourceService, # Source service for audio paths/text
|
|
243
|
+
seg_urls: SegmentationUrls, # Segmentation URL bundle
|
|
244
|
+
prefix: str, # Route prefix (e.g., "/fa")
|
|
245
|
+
) -> Tuple[APIRouter, Dict[str, Callable]]: # (router, route_dict)
|
|
246
|
+
"Initialize forced alignment routes."
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
#### Variables
|
|
250
|
+
|
|
251
|
+
``` python
|
|
252
|
+
FA_CONTAINER_ID = 'sd-fa-controls'
|
|
253
|
+
FA_STATUS_ID = 'sd-fa-status'
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### forced_alignment (`forced_alignment.ipynb`)
|
|
257
|
+
|
|
258
|
+
> Forced alignment service for audio-informed text pre-splitting via
|
|
259
|
+
> forced alignment plugin
|
|
260
|
+
|
|
261
|
+
#### Import
|
|
262
|
+
|
|
263
|
+
``` python
|
|
264
|
+
from cjm_transcript_segment_align.services.forced_alignment import (
|
|
265
|
+
map_fa_words_to_text,
|
|
266
|
+
assign_words_to_chunks,
|
|
267
|
+
build_segments_from_alignment,
|
|
268
|
+
ForcedAlignmentService
|
|
269
|
+
)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
#### Functions
|
|
273
|
+
|
|
274
|
+
``` python
|
|
275
|
+
def _strip_punct(text: str) -> str
|
|
276
|
+
"Strip punctuation from text for comparison with FA output."
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
``` python
|
|
280
|
+
def map_fa_words_to_text(
|
|
281
|
+
text: str, # Original text with punctuation
|
|
282
|
+
fa_items: List[ForcedAlignItem], # FA word-level alignment results
|
|
283
|
+
) -> List[Tuple[int, int]]: # List of (start_char, end_char) spans into original text
|
|
284
|
+
"""
|
|
285
|
+
Map forced alignment words back to character spans in the original text.
|
|
286
|
+
|
|
287
|
+
Walks through the original text, matching each FA word (punctuation-stripped)
|
|
288
|
+
against original text tokens. Returns character offset pairs for each FA word.
|
|
289
|
+
"""
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
``` python
|
|
293
|
+
def assign_words_to_chunks(
|
|
294
|
+
fa_items: List[ForcedAlignItem], # FA word-level alignment results
|
|
295
|
+
vad_chunks: List[VADChunk], # VAD chunks with start/end times
|
|
296
|
+
) -> List[int]: # Chunk index for each FA word
|
|
297
|
+
"""
|
|
298
|
+
Assign each FA word to a VAD chunk based on timestamp overlap.
|
|
299
|
+
|
|
300
|
+
Words whose start_time falls within a chunk's [start, end] range are
|
|
301
|
+
assigned to that chunk. Words in silence gaps are assigned to the
|
|
302
|
+
nearest chunk by time proximity.
|
|
303
|
+
"""
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
``` python
|
|
307
|
+
def build_segments_from_alignment(
|
|
308
|
+
text: str, # Original text with punctuation
|
|
309
|
+
spans: List[Tuple[int, int]], # Character spans from map_fa_words_to_text
|
|
310
|
+
assignments: List[int], # Chunk index per word from assign_words_to_chunks
|
|
311
|
+
num_chunks: int, # Total number of VAD chunks
|
|
312
|
+
source_id: Optional[str] = None, # Source block ID for traceability
|
|
313
|
+
source_provider_id: Optional[str] = None, # Source provider identifier
|
|
314
|
+
) -> List[TextSegment]: # One segment per VAD chunk
|
|
315
|
+
"""
|
|
316
|
+
Build TextSegment list by grouping words by their assigned VAD chunk.
|
|
317
|
+
|
|
318
|
+
Each VAD chunk gets one TextSegment whose text is the joined original
|
|
319
|
+
(punctuated) words assigned to that chunk.
|
|
320
|
+
"""
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
#### Classes
|
|
324
|
+
|
|
325
|
+
``` python
|
|
326
|
+
class ForcedAlignmentService:
|
|
327
|
+
def __init__(
|
|
328
|
+
self,
|
|
329
|
+
plugin_manager: PluginManager, # Plugin manager for accessing forced alignment plugin
|
|
330
|
+
plugin_name: str = "cjm-transcription-plugin-qwen3-forced-aligner", # Name of the FA plugin
|
|
331
|
+
)
|
|
332
|
+
"Service for audio-informed text pre-splitting via forced alignment plugin."
|
|
333
|
+
|
|
334
|
+
def __init__(
|
|
335
|
+
self,
|
|
336
|
+
plugin_manager: PluginManager, # Plugin manager for accessing forced alignment plugin
|
|
337
|
+
plugin_name: str = "cjm-transcription-plugin-qwen3-forced-aligner", # Name of the FA plugin
|
|
338
|
+
)
|
|
339
|
+
"Initialize the forced alignment service."
|
|
340
|
+
|
|
341
|
+
def is_available(self) -> bool: # True if plugin is loaded and ready
|
|
342
|
+
"""Check if the forced alignment plugin is available."""
|
|
343
|
+
return self._manager.get_plugin(self._plugin_name) is not None
|
|
344
|
+
|
|
345
|
+
def ensure_loaded(
|
|
346
|
+
self,
|
|
347
|
+
config: Optional[Dict[str, Any]] = None, # Optional plugin configuration
|
|
348
|
+
) -> bool: # True if successfully loaded
|
|
349
|
+
"Check if the forced alignment plugin is available."
|
|
350
|
+
|
|
351
|
+
def ensure_loaded(
|
|
352
|
+
self,
|
|
353
|
+
config: Optional[Dict[str, Any]] = None, # Optional plugin configuration
|
|
354
|
+
) -> bool: # True if successfully loaded
|
|
355
|
+
"Ensure the forced alignment plugin is loaded."
|
|
356
|
+
|
|
357
|
+
async def align_and_split_async(
|
|
358
|
+
self,
|
|
359
|
+
audio_path: str, # Path to the audio file
|
|
360
|
+
text: str, # Original transcript text blob (with punctuation)
|
|
361
|
+
vad_chunks: List[VADChunk], # VAD chunks for this audio
|
|
362
|
+
source_id: Optional[str] = None, # Source block ID for traceability
|
|
363
|
+
source_provider_id: Optional[str] = None, # Source provider identifier
|
|
364
|
+
) -> List[TextSegment]: # One segment per VAD chunk
|
|
365
|
+
"Run forced alignment and split text into segments matching VAD chunks."
|
|
366
|
+
|
|
367
|
+
def align_and_split(
|
|
368
|
+
self,
|
|
369
|
+
audio_path: str, # Path to the audio file
|
|
370
|
+
text: str, # Original transcript text blob
|
|
371
|
+
vad_chunks: List[VADChunk], # VAD chunks for this audio
|
|
372
|
+
source_id: Optional[str] = None,
|
|
373
|
+
source_provider_id: Optional[str] = None,
|
|
374
|
+
) -> List[TextSegment]: # One segment per VAD chunk
|
|
375
|
+
"Run forced alignment and split text synchronously."
|
|
376
|
+
|
|
377
|
+
async def align_and_split_combined_async(
|
|
378
|
+
self,
|
|
379
|
+
source_blocks: List[Any], # SourceBlock objects with id, provider_id, text
|
|
380
|
+
audio_paths: List[str], # Audio file path per source block
|
|
381
|
+
vad_chunks_by_source: List[List[VADChunk]], # VAD chunks per source block
|
|
382
|
+
) -> List[TextSegment]: # Combined segments with global indexing
|
|
383
|
+
"Align and split multiple source blocks with their respective audio."
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
#### Variables
|
|
387
|
+
|
|
388
|
+
``` python
|
|
389
|
+
_PUNCT_RE
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
### handlers (`handlers.ipynb`)
|
|
393
|
+
|
|
394
|
+
> Handler wrappers for cross-domain coordination (alignment status
|
|
395
|
+
> updates)
|
|
396
|
+
|
|
397
|
+
#### Import
|
|
398
|
+
|
|
399
|
+
``` python
|
|
400
|
+
from cjm_transcript_segment_align.components.handlers import (
|
|
401
|
+
wrapped_seg_split,
|
|
402
|
+
wrapped_seg_merge,
|
|
403
|
+
wrapped_seg_undo,
|
|
404
|
+
wrapped_seg_reset,
|
|
405
|
+
wrapped_seg_ai_split,
|
|
406
|
+
wrap_seg_mutation_handler,
|
|
407
|
+
wrap_align_mutation_handler,
|
|
408
|
+
create_seg_init_chrome_wrapper,
|
|
409
|
+
create_align_init_chrome_wrapper
|
|
410
|
+
)
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
#### Functions
|
|
414
|
+
|
|
415
|
+
``` python
|
|
416
|
+
def _find_session_id(args, kwargs):
|
|
417
|
+
"""Find session_id from args or kwargs."""
|
|
418
|
+
# First check kwargs
|
|
419
|
+
if 'sess' in kwargs
|
|
420
|
+
"Find session_id from args or kwargs."
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
``` python
|
|
424
|
+
def wrap_seg_mutation_handler(
|
|
425
|
+
handler: Callable, # Handler function to wrap
|
|
426
|
+
) -> Callable: # Wrapped handler that appends alignment status OOB
|
|
427
|
+
"""
|
|
428
|
+
Wrap a segmentation mutation handler to add alignment status OOB.
|
|
429
|
+
|
|
430
|
+
The handler is expected to take (state_store, workflow_id, ...) as first params.
|
|
431
|
+
"""
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
``` python
|
|
435
|
+
def wrap_align_mutation_handler(
|
|
436
|
+
handler: Callable, # Handler function to wrap
|
|
437
|
+
) -> Callable: # Wrapped handler that appends alignment status OOB
|
|
438
|
+
"""
|
|
439
|
+
Wrap an alignment mutation handler to add alignment status OOB.
|
|
440
|
+
|
|
441
|
+
The handler is expected to take (state_store, workflow_id, ...) as first params.
|
|
442
|
+
"""
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
``` python
|
|
446
|
+
def create_seg_init_chrome_wrapper(
|
|
447
|
+
align_urls:AlignmentUrls, # URL bundle for alignment routes (for KB system)
|
|
448
|
+
switch_chrome_url:str, # URL for chrome switching (for KB system)
|
|
449
|
+
fa_trigger_url:str="", # URL for forced alignment trigger (optional)
|
|
450
|
+
fa_toggle_url:str="", # URL for forced alignment toggle (optional)
|
|
451
|
+
fa_available:bool=False, # Whether forced alignment plugin is available
|
|
452
|
+
) -> Callable: # Wrapped handler that builds KB system and shared chrome
|
|
453
|
+
"""
|
|
454
|
+
Create a wrapper for seg init that builds combined KB system and shared chrome.
|
|
455
|
+
|
|
456
|
+
This is a factory that captures the URLs needed for KB system assembly.
|
|
457
|
+
Optionally includes forced alignment controls if FA plugin is available.
|
|
458
|
+
"""
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
``` python
|
|
462
|
+
def create_align_init_chrome_wrapper() -> Callable: # Wrapped handler that adds alignment status
|
|
463
|
+
"""Create a wrapper for align init that adds mini-stats and alignment status.
|
|
464
|
+
|
|
465
|
+
Alignment init is simpler than seg init - it doesn't need to build the
|
|
466
|
+
full KB system (seg init handles that). It just updates alignment-specific
|
|
467
|
+
chrome and the alignment status badge.
|
|
468
|
+
"""
|
|
469
|
+
async def wrapped_align_init(
|
|
470
|
+
state_store:WorkflowStateStore,
|
|
471
|
+
workflow_id:str,
|
|
472
|
+
source_service:SourceService,
|
|
473
|
+
alignment_service:AlignmentService,
|
|
474
|
+
request:Any,
|
|
475
|
+
sess:Any,
|
|
476
|
+
urls:AlignmentUrls,
|
|
477
|
+
visible_count:int=5,
|
|
478
|
+
card_width:int=40,
|
|
479
|
+
)
|
|
480
|
+
"""
|
|
481
|
+
Create a wrapper for align init that adds mini-stats and alignment status.
|
|
482
|
+
|
|
483
|
+
Alignment init is simpler than seg init - it doesn't need to build the
|
|
484
|
+
full KB system (seg init handles that). It just updates alignment-specific
|
|
485
|
+
chrome and the alignment status badge.
|
|
486
|
+
"""
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
### helpers (`helpers.ipynb`)
|
|
490
|
+
|
|
491
|
+
> State extraction helpers for cross-domain coordination in Phase 2
|
|
492
|
+
> combined step
|
|
493
|
+
|
|
494
|
+
#### Import
|
|
495
|
+
|
|
496
|
+
``` python
|
|
497
|
+
from cjm_transcript_segment_align.components.helpers import (
|
|
498
|
+
SEG_DEFAULT_VISIBLE_COUNT,
|
|
499
|
+
SEG_DEFAULT_CARD_WIDTH,
|
|
500
|
+
ALIGN_DEFAULT_VISIBLE_COUNT,
|
|
501
|
+
ALIGN_DEFAULT_CARD_WIDTH,
|
|
502
|
+
check_alignment_ready,
|
|
503
|
+
extract_seg_state,
|
|
504
|
+
extract_alignment_state,
|
|
505
|
+
get_segment_count,
|
|
506
|
+
get_chunk_count
|
|
507
|
+
)
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
#### Functions
|
|
511
|
+
|
|
512
|
+
``` python
|
|
513
|
+
def check_alignment_ready(
|
|
514
|
+
segment_count:int, # Number of text segments
|
|
515
|
+
chunk_count:int, # Number of VAD chunks
|
|
516
|
+
) -> bool: # True if counts match for 1:1 alignment
|
|
517
|
+
"Check if segment and VAD chunk counts match for 1:1 alignment."
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
``` python
|
|
521
|
+
def extract_seg_state(
|
|
522
|
+
ctx:InteractionContext, # Interaction context with state
|
|
523
|
+
) -> Dict[str, Any]: # Extracted state values
|
|
524
|
+
"Extract segmentation state as explicit values for renderers."
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
``` python
|
|
528
|
+
def extract_alignment_state(
|
|
529
|
+
ctx:InteractionContext, # Interaction context with state
|
|
530
|
+
) -> Dict[str, Any]: # Extracted state values
|
|
531
|
+
"Extract alignment state as explicit values for renderers."
|
|
532
|
+
```
|
|
533
|
+
|
|
534
|
+
``` python
|
|
535
|
+
def get_segment_count(
|
|
536
|
+
ctx:InteractionContext, # Interaction context with state
|
|
537
|
+
) -> int: # Number of segments
|
|
538
|
+
"Get segment count from state without full extraction."
|
|
539
|
+
```
|
|
540
|
+
|
|
541
|
+
``` python
|
|
542
|
+
def get_chunk_count(
|
|
543
|
+
ctx:InteractionContext, # Interaction context with state
|
|
544
|
+
) -> int: # Number of VAD chunks
|
|
545
|
+
"Get VAD chunk count from state without full extraction."
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
#### Variables
|
|
549
|
+
|
|
550
|
+
``` python
|
|
551
|
+
SEG_DEFAULT_VISIBLE_COUNT = 3
|
|
552
|
+
SEG_DEFAULT_CARD_WIDTH = 80
|
|
553
|
+
ALIGN_DEFAULT_VISIBLE_COUNT = 5
|
|
554
|
+
ALIGN_DEFAULT_CARD_WIDTH = 40
|
|
555
|
+
```
|
|
556
|
+
|
|
557
|
+
### html_ids (`html_ids.ipynb`)
|
|
558
|
+
|
|
559
|
+
> HTML ID constants for Phase 2 Shell: Dual-Column Layout shared chrome
|
|
560
|
+
|
|
561
|
+
#### Import
|
|
562
|
+
|
|
563
|
+
``` python
|
|
564
|
+
from cjm_transcript_segment_align.html_ids import (
|
|
565
|
+
CombinedHtmlIds
|
|
566
|
+
)
|
|
567
|
+
```
|
|
568
|
+
|
|
569
|
+
#### Classes
|
|
570
|
+
|
|
571
|
+
``` python
|
|
572
|
+
class CombinedHtmlIds:
|
|
573
|
+
"HTML ID constants for Phase 2 Shell: Dual-Column Layout shared chrome."
|
|
574
|
+
|
|
575
|
+
def as_selector(
|
|
576
|
+
id_str:str # The HTML ID to convert
|
|
577
|
+
) -> str: # CSS selector with # prefix
|
|
578
|
+
"Convert an ID to a CSS selector format."
|
|
579
|
+
```
|
|
580
|
+
|
|
581
|
+
### keyboard_config (`keyboard_config.ipynb`)
|
|
582
|
+
|
|
583
|
+
> Shared keyboard navigation configuration for the combined Phase 2 step
|
|
584
|
+
|
|
585
|
+
#### Import
|
|
586
|
+
|
|
587
|
+
``` python
|
|
588
|
+
from cjm_transcript_segment_align.components.keyboard_config import (
|
|
589
|
+
DEBUG_KB_SYSTEM,
|
|
590
|
+
ZONE_CHANGE_CALLBACK,
|
|
591
|
+
SWITCH_CHROME_BTN_ID,
|
|
592
|
+
render_keyboard_hints_collapsible,
|
|
593
|
+
build_combined_kb_system,
|
|
594
|
+
generate_zone_change_js
|
|
595
|
+
)
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
#### Functions
|
|
599
|
+
|
|
600
|
+
``` python
|
|
601
|
+
def render_keyboard_hints_collapsible(
|
|
602
|
+
manager:ZoneManager, # Keyboard zone manager with actions configured
|
|
603
|
+
container_id:str="sd-keyboard-hints", # HTML ID for the hints container
|
|
604
|
+
include_zone_switch:bool=False, # Whether to include zone switch hints
|
|
605
|
+
) -> Any: # Collapsible keyboard hints component
|
|
606
|
+
"Render keyboard shortcut hints in a collapsible DaisyUI collapse."
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
``` python
|
|
610
|
+
def build_combined_kb_system(
|
|
611
|
+
seg_urls:SegmentationUrls, # URL bundle for segmentation routes
|
|
612
|
+
align_urls:AlignmentUrls, # URL bundle for alignment routes
|
|
613
|
+
) -> Tuple[ZoneManager, Any]: # (keyboard manager, rendered keyboard system)
|
|
614
|
+
"Build combined keyboard system with segmentation and alignment zones."
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
``` python
|
|
618
|
+
def generate_zone_change_js(
|
|
619
|
+
switch_chrome_url:str="", # URL for chrome swap handler (empty = no swap)
|
|
620
|
+
) -> Script: # Script element with zone change callback and click handlers
|
|
621
|
+
"Generate JavaScript for zone change handling and column click handlers."
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
#### Variables
|
|
625
|
+
|
|
626
|
+
``` python
|
|
627
|
+
DEBUG_KB_SYSTEM = True
|
|
628
|
+
ZONE_CHANGE_CALLBACK = 'onCombinedZoneChange'
|
|
629
|
+
SWITCH_CHROME_BTN_ID = 'sd-switch-chrome-btn'
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
### step_combined (`step_renderer.ipynb`)
|
|
633
|
+
|
|
634
|
+
> Phase 2 combined step renderer: dual-column layout for Segment & Align
|
|
635
|
+
|
|
636
|
+
#### Import
|
|
637
|
+
|
|
638
|
+
``` python
|
|
639
|
+
from cjm_transcript_segment_align.components.step_renderer import (
|
|
640
|
+
DEBUG_COMBINED_RENDER,
|
|
641
|
+
render_seg_mini_stats_badge,
|
|
642
|
+
render_align_mini_stats_badge,
|
|
643
|
+
render_alignment_status_text,
|
|
644
|
+
render_alignment_status,
|
|
645
|
+
render_footer_inner_content,
|
|
646
|
+
render_combined_step
|
|
647
|
+
)
|
|
648
|
+
```
|
|
649
|
+
|
|
650
|
+
#### Functions
|
|
651
|
+
|
|
652
|
+
``` python
|
|
653
|
+
def _render_column_header(
|
|
654
|
+
title:str, # Column title (e.g., "Text Decomposition")
|
|
655
|
+
stats_id:str, # HTML ID for the mini-stats badge area
|
|
656
|
+
header_id:str, # HTML ID for the column header container
|
|
657
|
+
initial_text:str="--", # Initial text for the mini-stats badge
|
|
658
|
+
) -> Any: # Column header component
|
|
659
|
+
"Render a column header with title and mini-stats badge."
|
|
660
|
+
```
|
|
661
|
+
|
|
662
|
+
``` python
|
|
663
|
+
def render_seg_mini_stats_badge(
|
|
664
|
+
segments:List[TextSegment], # Current segments
|
|
665
|
+
oob:bool=False, # Whether to render as OOB swap
|
|
666
|
+
) -> Any: # Mini-stats badge Span
|
|
667
|
+
"Render the segmentation mini-stats badge for the column header."
|
|
668
|
+
```
|
|
669
|
+
|
|
670
|
+
``` python
|
|
671
|
+
def render_align_mini_stats_badge(
|
|
672
|
+
chunks:List[VADChunk], # Current VAD chunks
|
|
673
|
+
oob:bool=False, # Whether to render as OOB swap
|
|
674
|
+
) -> Any: # Mini-stats badge Span
|
|
675
|
+
"Render the alignment mini-stats badge for the column header."
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
``` python
|
|
679
|
+
def render_alignment_status_text(
|
|
680
|
+
segment_count:int, # Number of text segments
|
|
681
|
+
chunk_count:int, # Number of VAD chunks
|
|
682
|
+
) -> str: # Status message text
|
|
683
|
+
"Generate alignment status message based on segment and VAD chunk counts."
|
|
684
|
+
```
|
|
685
|
+
|
|
686
|
+
``` python
|
|
687
|
+
def render_alignment_status(
|
|
688
|
+
segment_count:int, # Number of text segments
|
|
689
|
+
chunk_count:int, # Number of VAD chunks
|
|
690
|
+
oob:bool=False, # Whether to render as OOB swap
|
|
691
|
+
) -> Any: # Alignment status badge component
|
|
692
|
+
"Render the alignment status indicator badge."
|
|
693
|
+
```
|
|
694
|
+
|
|
695
|
+
``` python
|
|
696
|
+
def render_footer_inner_content(
|
|
697
|
+
column_footer:Any, # Column-specific footer content (decomp or align)
|
|
698
|
+
segment_count:int, # Number of text segments
|
|
699
|
+
chunk_count:int, # Number of VAD chunks
|
|
700
|
+
) -> Any: # Styled wrapper div with column footer and alignment status
|
|
701
|
+
"""
|
|
702
|
+
Render the footer inner content with consistent styling.
|
|
703
|
+
|
|
704
|
+
This ensures the footer layout (justify-between) is preserved across
|
|
705
|
+
all OOB swaps. Both the column-specific footer content and the
|
|
706
|
+
alignment status indicator are wrapped in a flex container.
|
|
707
|
+
"""
|
|
708
|
+
```
|
|
709
|
+
|
|
710
|
+
``` python
|
|
711
|
+
def _placeholder(
|
|
712
|
+
text:str, # Placeholder message
|
|
713
|
+
) -> Any: # Styled placeholder paragraph
|
|
714
|
+
"Render a placeholder text element for uninitialized chrome containers."
|
|
715
|
+
```
|
|
716
|
+
|
|
717
|
+
``` python
|
|
718
|
+
def _render_shared_chrome(
|
|
719
|
+
seg_state:dict=None, # Extracted segmentation state (None = show placeholders)
|
|
720
|
+
align_state:dict=None, # Extracted alignment state (None = no VAD data yet)
|
|
721
|
+
urls:SegmentationUrls=None, # Segmentation URL bundle (required when seg_state provided)
|
|
722
|
+
kb_manager:Any=None, # Keyboard manager (required when seg_state provided)
|
|
723
|
+
) -> tuple: # (hints, toolbar, controls, footer)
|
|
724
|
+
"""
|
|
725
|
+
Render shared chrome containers, populated with segmentation content when initialized.
|
|
726
|
+
|
|
727
|
+
Takes extracted state dicts from `extract_seg_state()` and `extract_alignment_state()`
|
|
728
|
+
which contain deserialized TextSegment and VADChunk objects.
|
|
729
|
+
"""
|
|
730
|
+
```
|
|
731
|
+
|
|
732
|
+
``` python
|
|
733
|
+
def _render_seg_column(
|
|
734
|
+
is_active:bool=True, # Whether this column is initially active
|
|
735
|
+
column_body:Any=None, # Pre-rendered column body (None = not initialized)
|
|
736
|
+
mini_stats_text:str="--", # Mini-stats badge text
|
|
737
|
+
init_url:str="", # URL for auto-trigger initialization
|
|
738
|
+
) -> Any: # Left column component
|
|
739
|
+
"Render the left segmentation column."
|
|
740
|
+
```
|
|
741
|
+
|
|
742
|
+
``` python
|
|
743
|
+
def _render_alignment_column(
|
|
744
|
+
is_active:bool=False, # Whether this column is initially active
|
|
745
|
+
column_body:Any=None, # Pre-rendered column body (None = not initialized)
|
|
746
|
+
mini_stats_text:str="--", # Mini-stats badge text
|
|
747
|
+
init_url:str="", # URL for auto-trigger initialization
|
|
748
|
+
) -> Any: # Right column component
|
|
749
|
+
"Render the right alignment column."
|
|
750
|
+
```
|
|
751
|
+
|
|
752
|
+
``` python
|
|
753
|
+
def _render_keyboard_system_container(
|
|
754
|
+
kb_system:Any=None, # Rendered keyboard system (None = empty container)
|
|
755
|
+
oob:bool=False, # Whether to render as OOB swap
|
|
756
|
+
) -> Any: # Div with id=KEYBOARD_SYSTEM containing KB elements
|
|
757
|
+
"Render stable container for keyboard navigation system elements."
|
|
758
|
+
```
|
|
759
|
+
|
|
760
|
+
``` python
|
|
761
|
+
def render_combined_step(
|
|
762
|
+
ctx:InteractionContext, # Interaction context with state and data
|
|
763
|
+
seg_urls:SegmentationUrls=None, # URL bundle for segmentation routes
|
|
764
|
+
align_urls:AlignmentUrls=None, # URL bundle for alignment routes
|
|
765
|
+
switch_chrome_url:str="", # URL for chrome switching route
|
|
766
|
+
) -> Any: # FastHTML component with full dual-column layout
|
|
767
|
+
"Render Phase 2: Combined Segment & Align step with dual-column layout."
|
|
768
|
+
```
|
|
769
|
+
|
|
770
|
+
#### Variables
|
|
771
|
+
|
|
772
|
+
``` python
|
|
773
|
+
DEBUG_COMBINED_RENDER = True
|
|
774
|
+
_FOOTER_INNER_CLS
|
|
775
|
+
_SEG_COLUMN_CLS
|
|
776
|
+
_ALIGNMENT_COLUMN_CLS
|
|
777
|
+
```
|