signlangtk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signlangtk-0.1.0.dist-info/METADATA +749 -0
- signlangtk-0.1.0.dist-info/RECORD +245 -0
- signlangtk-0.1.0.dist-info/WHEEL +5 -0
- signlangtk-0.1.0.dist-info/entry_points.txt +2 -0
- signlangtk-0.1.0.dist-info/licenses/LICENSE +36 -0
- signlangtk-0.1.0.dist-info/top_level.txt +1 -0
- sltk/__init__.py +91 -0
- sltk/analysis/__init__.py +120 -0
- sltk/analysis/captions.py +465 -0
- sltk/analysis/clustering.py +455 -0
- sltk/analysis/cross_dataset.py +1086 -0
- sltk/analysis/datasets.py +502 -0
- sltk/analysis/linguistic.py +973 -0
- sltk/api/__init__.py +15 -0
- sltk/api/cache.py +836 -0
- sltk/api/dependencies.py +616 -0
- sltk/api/main.py +243 -0
- sltk/api/models.py +2373 -0
- sltk/api/routers/__init__.py +53 -0
- sltk/api/routers/admin.py +76 -0
- sltk/api/routers/analysis.py +2026 -0
- sltk/api/routers/audio.py +230 -0
- sltk/api/routers/chat.py +169 -0
- sltk/api/routers/corpus.py +1326 -0
- sltk/api/routers/datasets.py +1242 -0
- sltk/api/routers/embeddings.py +1292 -0
- sltk/api/routers/extraction.py +887 -0
- sltk/api/routers/features.py +545 -0
- sltk/api/routers/jobs.py +959 -0
- sltk/api/routers/linguistics.py +1035 -0
- sltk/api/routers/nm_annotator.py +259 -0
- sltk/api/routers/nms.py +375 -0
- sltk/api/routers/poses.py +171 -0
- sltk/api/routers/processing.py +409 -0
- sltk/api/routers/search.py +477 -0
- sltk/api/routers/segmentation.py +273 -0
- sltk/api/routers/settings.py +305 -0
- sltk/api/routers/signrep.py +856 -0
- sltk/api/routers/videos.py +216 -0
- sltk/api/routers/visualization.py +176 -0
- sltk/api/routers/workspace.py +1350 -0
- sltk/api/security.py +517 -0
- sltk/api/static/assets/index-BNaO_dUm.js +752 -0
- sltk/api/static/assets/index-CjVA5X6p.css +1 -0
- sltk/api/static/index.html +23 -0
- sltk/api/validators.py +852 -0
- sltk/cli/__init__.py +7 -0
- sltk/cli/main.py +801 -0
- sltk/config.py +447 -0
- sltk/data/__init__.py +43 -0
- sltk/data/core.py +547 -0
- sltk/data/datasets/__init__.py +134 -0
- sltk/data/datasets/asl_citizen.py +267 -0
- sltk/data/datasets/asldict.py +294 -0
- sltk/data/datasets/base.py +351 -0
- sltk/data/datasets/bobsl.py +449 -0
- sltk/data/datasets/bslcp.py +319 -0
- sltk/data/datasets/csl_daily.py +259 -0
- sltk/data/datasets/how2sign.py +280 -0
- sltk/data/datasets/phoenix.py +267 -0
- sltk/data/datasets/wlasl.py +286 -0
- sltk/data/datasets/ytsl.py +650 -0
- sltk/data/feature_discovery.py +329 -0
- sltk/data/feature_manager.py +551 -0
- sltk/data/formats.py +478 -0
- sltk/data/rotations.py +264 -0
- sltk/db/__init__.py +7 -0
- sltk/db/corpus.py +4499 -0
- sltk/db/ingest.py +135 -0
- sltk/db/queries.py +491 -0
- sltk/db/schema.py +126 -0
- sltk/embedding/__init__.py +119 -0
- sltk/embedding/pipeline.py +1520 -0
- sltk/embedding/signrep.py +326 -0
- sltk/embedding/signrep_models/__init__.py +28 -0
- sltk/embedding/signrep_models/augmentation/__init__.py +5 -0
- sltk/embedding/signrep_models/augmentation/video_transform.py +40 -0
- sltk/embedding/signrep_models/backbones/__init__.py +100 -0
- sltk/embedding/signrep_models/backbones/hfhub.py +70 -0
- sltk/embedding/signrep_models/backbones/hiera.py +581 -0
- sltk/embedding/signrep_models/backbones/hiera_clstoken.py +648 -0
- sltk/embedding/signrep_models/backbones/hiera_nodec_mae.py +281 -0
- sltk/embedding/signrep_models/backbones/hiera_utils.py +316 -0
- sltk/embedding/signrep_models/final_model.py +101 -0
- sltk/embedding/signrep_models/heads/__init__.py +9 -0
- sltk/embedding/signrep_models/heads/dict_head.py +76 -0
- sltk/embedding/similarity.py +583 -0
- sltk/exceptions.py +271 -0
- sltk/extraction/__init__.py +146 -0
- sltk/extraction/base.py +309 -0
- sltk/extraction/depth_anything_deps/__init__.py +0 -0
- sltk/extraction/depth_anything_deps/efficient_run.py +214 -0
- sltk/extraction/depth_anything_deps/utils/dc_utils.py +97 -0
- sltk/extraction/depth_anything_deps/utils/util.py +74 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2.py +415 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2_layers/__init__.py +11 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2_layers/attention.py +83 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2_layers/block.py +252 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2_layers/drop_path.py +35 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2_layers/layer_scale.py +28 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2_layers/mlp.py +41 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2_layers/patch_embed.py +89 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dinov2_layers/swiglu_ffn.py +63 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dpt.py +160 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/dpt_temporal.py +125 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/motion_module/attention.py +429 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/motion_module/motion_module.py +321 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/util/blocks.py +162 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/util/transform.py +158 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/video_depth.py +163 -0
- sltk/extraction/depth_anything_deps/video_depth_anything/video_depth_stream.py +161 -0
- sltk/extraction/depth_extraction.py +382 -0
- sltk/extraction/mediapipe.py +611 -0
- sltk/extraction/nlf.py +464 -0
- sltk/extraction/nlf_deps/__init__.py +16 -0
- sltk/extraction/nlf_deps/nlf_processor.py +584 -0
- sltk/extraction/rtmpose.py +627 -0
- sltk/extraction/rtmpose_deps/__init__.py +11 -0
- sltk/extraction/rtmpose_deps/filtering.py +251 -0
- sltk/extraction/rtmpose_deps/rtmpose_processor.py +148 -0
- sltk/extraction/smplfx_deps/__init__.py +0 -0
- sltk/extraction/smplfx_deps/bspline_kernel.py +489 -0
- sltk/extraction/smplfx_deps/extraction_utils.py +34 -0
- sltk/extraction/smplfx_deps/h5_adapters.py +414 -0
- sltk/extraction/smplfx_deps/h5_processing.py +251 -0
- sltk/extraction/smplfx_deps/losses.py +364 -0
- sltk/extraction/smplfx_deps/maths_utils.py +203 -0
- sltk/extraction/smplfx_deps/smpl_fx/__init__.py +31 -0
- sltk/extraction/smplfx_deps/smpl_fx/body_masks.py +424 -0
- sltk/extraction/smplfx_deps/smpl_fx/body_models.py +3141 -0
- sltk/extraction/smplfx_deps/smpl_fx/joint_names.py +320 -0
- sltk/extraction/smplfx_deps/smpl_fx/lbs.py +536 -0
- sltk/extraction/smplfx_deps/smpl_fx/teeth.py +339 -0
- sltk/extraction/smplfx_deps/smpl_fx/utils.py +132 -0
- sltk/extraction/smplfx_deps/smpl_fx/utils_3d.py +333 -0
- sltk/extraction/smplfx_deps/smpl_fx/vertex_ids.py +77 -0
- sltk/extraction/smplfx_deps/smpl_fx/vertex_joint_selector.py +80 -0
- sltk/extraction/smplfx_deps/smplx_smoother_optimizer.py +609 -0
- sltk/extraction/smplfx_deps/temporal_losses.py +130 -0
- sltk/extraction/smplfx_deps/utils.py +310 -0
- sltk/extraction/smplfx_deps/utils_smplx.py +374 -0
- sltk/extraction/smplfx_deps/vision3d.py +214 -0
- sltk/extraction/smplfx_fitting.py +691 -0
- sltk/extraction/teaser.py +500 -0
- sltk/extraction/teaser_deps/__init__.py +23 -0
- sltk/extraction/teaser_deps/flame/__init__.py +8 -0
- sltk/extraction/teaser_deps/flame/flame_model.py +462 -0
- sltk/extraction/teaser_deps/flame/lbs.py +254 -0
- sltk/extraction/teaser_deps/flame/processor.py +269 -0
- sltk/extraction/teaser_deps/teaser_encoder.py +170 -0
- sltk/extraction/teaser_deps/teaser_processor.py +371 -0
- sltk/extraction/uplift.py +532 -0
- sltk/extraction/weights.py +269 -0
- sltk/extraction/wilor.py +500 -0
- sltk/extraction/wilor_deps/__init__.py +19 -0
- sltk/extraction/wilor_deps/chumpy_stub.py +46 -0
- sltk/extraction/wilor_deps/utils.py +367 -0
- sltk/extraction/wilor_deps/vitdet_gpu.py +482 -0
- sltk/extraction/wilor_deps/wilor/__init__.py +0 -0
- sltk/extraction/wilor_deps/wilor/configs/__init__.py +126 -0
- sltk/extraction/wilor_deps/wilor/models/__init__.py +62 -0
- sltk/extraction/wilor_deps/wilor/models/backbones/__init__.py +30 -0
- sltk/extraction/wilor_deps/wilor/models/backbones/vit.py +511 -0
- sltk/extraction/wilor_deps/wilor/models/discriminator.py +98 -0
- sltk/extraction/wilor_deps/wilor/models/heads/__init__.py +1 -0
- sltk/extraction/wilor_deps/wilor/models/heads/refinement_net.py +259 -0
- sltk/extraction/wilor_deps/wilor/models/losses.py +100 -0
- sltk/extraction/wilor_deps/wilor/models/mano_wrapper.py +113 -0
- sltk/extraction/wilor_deps/wilor/models/wilor.py +445 -0
- sltk/extraction/wilor_deps/wilor/utils/__init__.py +51 -0
- sltk/extraction/wilor_deps/wilor/utils/geometry.py +135 -0
- sltk/extraction/wilor_deps/wilor/utils/mesh_renderer.py +215 -0
- sltk/extraction/wilor_deps/wilor/utils/misc.py +203 -0
- sltk/extraction/wilor_deps/wilor/utils/pose_utils.py +369 -0
- sltk/extraction/wilor_deps/wilor/utils/pylogger.py +17 -0
- sltk/extraction/wilor_deps/wilor/utils/render_openpose.py +433 -0
- sltk/extraction/wilor_deps/wilor/utils/renderer.py +461 -0
- sltk/extraction/wilor_deps/wilor/utils/rich_utils.py +109 -0
- sltk/extraction/wilor_deps/wilor/utils/skeleton_renderer.py +229 -0
- sltk/extraction/wilor_deps/wilor_processor.py +726 -0
- sltk/glossing/__init__.py +35 -0
- sltk/glossing/vocabulary.py +387 -0
- sltk/io/__init__.py +134 -0
- sltk/io/annotations.py +582 -0
- sltk/io/converters.py +1754 -0
- sltk/io/elan.py +786 -0
- sltk/io/elan_roundtrip.py +608 -0
- sltk/io/h5.py +467 -0
- sltk/io/safe_write.py +962 -0
- sltk/linguistics/__init__.py +152 -0
- sltk/linguistics/nonmanual.py +1209 -0
- sltk/linguistics/phonology.py +1392 -0
- sltk/linguistics/reliability.py +1121 -0
- sltk/llm/__init__.py +0 -0
- sltk/llm/agent.py +793 -0
- sltk/llm/tools.py +721 -0
- sltk/metrics/__init__.py +46 -0
- sltk/metrics/segmentation.py +1117 -0
- sltk/metrics/translation.py +205 -0
- sltk/nms/__init__.py +17 -0
- sltk/nms/constants.py +24 -0
- sltk/nms/detectors/__init__.py +7 -0
- sltk/nms/detectors/blinks.py +187 -0
- sltk/nms/detectors/eyes.py +307 -0
- sltk/nms/detectors/head.py +256 -0
- sltk/nms/detectors/mouth.py +143 -0
- sltk/nms/detectors/pipeline.py +188 -0
- sltk/nms/io/__init__.py +5 -0
- sltk/nms/io/exporters.py +324 -0
- sltk/nms/io/loaders.py +174 -0
- sltk/nms/io/video.py +25 -0
- sltk/nms/models.py +61 -0
- sltk/nms/overlay/__init__.py +3 -0
- sltk/nms/overlay/drawing.py +65 -0
- sltk/nms/overlay/renderer.py +420 -0
- sltk/nms/processing/__init__.py +5 -0
- sltk/nms/processing/adaptive.py +95 -0
- sltk/nms/processing/utils.py +83 -0
- sltk/nms/processing/validity.py +96 -0
- sltk/nms/runner.py +279 -0
- sltk/processing/__init__.py +50 -0
- sltk/processing/features.py +809 -0
- sltk/processing/normalization.py +356 -0
- sltk/segmentation/__init__.py +41 -0
- sltk/segmentation/_signsegmentor_v2/__init__.py +11 -0
- sltk/segmentation/_signsegmentor_v2/inference.py +110 -0
- sltk/segmentation/_signsegmentor_v2/model.py +280 -0
- sltk/segmentation/_signsegmentor_v2/postprocess.py +61 -0
- sltk/segmentation/h5_loader.py +130 -0
- sltk/segmentation/model.py +200 -0
- sltk/segmentation/output.py +350 -0
- sltk/segmentation/postprocess.py +114 -0
- sltk/segmentation/runner.py +313 -0
- sltk/segmentation/segmenter_v2.py +280 -0
- sltk/segmentation/transformer/__init__.py +15 -0
- sltk/segmentation/transformer/layers.py +391 -0
- sltk/utils/__init__.py +29 -0
- sltk/utils/research.py +547 -0
- sltk/visualisation/__init__.py +34 -0
- sltk/visualisation/analysis.py +415 -0
- sltk/visualisation/poses.py +384 -0
- sltk/visualisation/segments.py +298 -0
- sltk/visualization/__init__.py +19 -0
- sltk/visualization/pipeline.py +471 -0
- sltk/visualization/skeleton.py +596 -0
|
@@ -0,0 +1,749 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: signlangtk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Sign Language Toolkit for sign language research
|
|
5
|
+
Author: Sign Language Research Team
|
|
6
|
+
License-Expression: CC-BY-NC-4.0
|
|
7
|
+
Project-URL: Repository, https://github.com/ed-fish/sign-language-toolkit
|
|
8
|
+
Project-URL: Documentation, https://github.com/ed-fish/sign-language-toolkit#readme
|
|
9
|
+
Project-URL: Issues, https://github.com/ed-fish/sign-language-toolkit/issues
|
|
10
|
+
Keywords: sign language,computer vision,machine learning,linguistics,ELAN
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: numpy>=1.24.0
|
|
21
|
+
Requires-Dist: scipy>=1.10.0
|
|
22
|
+
Requires-Dist: h5py>=3.8.0
|
|
23
|
+
Requires-Dist: tqdm>=4.65.0
|
|
24
|
+
Requires-Dist: pyyaml>=6.0
|
|
25
|
+
Requires-Dist: click>=8.1.0
|
|
26
|
+
Requires-Dist: defusedxml>=0.7.0
|
|
27
|
+
Requires-Dist: nltk>=3.8.0
|
|
28
|
+
Requires-Dist: huggingface_hub>=0.20.0
|
|
29
|
+
Provides-Extra: mediapipe
|
|
30
|
+
Requires-Dist: mediapipe>=0.10.0; extra == "mediapipe"
|
|
31
|
+
Provides-Extra: wilor
|
|
32
|
+
Requires-Dist: torch>=2.0.0; extra == "wilor"
|
|
33
|
+
Requires-Dist: smplx>=0.1.28; extra == "wilor"
|
|
34
|
+
Requires-Dist: pytorch-lightning>=2.0.0; extra == "wilor"
|
|
35
|
+
Requires-Dist: yacs>=0.1.8; extra == "wilor"
|
|
36
|
+
Requires-Dist: ultralytics>=8.0.0; extra == "wilor"
|
|
37
|
+
Requires-Dist: timm>=0.9.0; extra == "wilor"
|
|
38
|
+
Requires-Dist: dill>=0.3.0; extra == "wilor"
|
|
39
|
+
Provides-Extra: nlf
|
|
40
|
+
Requires-Dist: torch>=2.0.0; extra == "nlf"
|
|
41
|
+
Provides-Extra: teaser
|
|
42
|
+
Requires-Dist: torch>=2.0.0; extra == "teaser"
|
|
43
|
+
Requires-Dist: ultralytics>=8.0.0; extra == "teaser"
|
|
44
|
+
Requires-Dist: timm>=0.9.0; extra == "teaser"
|
|
45
|
+
Provides-Extra: rtmpose
|
|
46
|
+
Requires-Dist: torch>=2.0.0; extra == "rtmpose"
|
|
47
|
+
Requires-Dist: mmpose>=1.1.0; extra == "rtmpose"
|
|
48
|
+
Requires-Dist: mmdet>=3.0.0; extra == "rtmpose"
|
|
49
|
+
Requires-Dist: mmengine>=0.7.0; extra == "rtmpose"
|
|
50
|
+
Requires-Dist: mmcv>=2.0.0; extra == "rtmpose"
|
|
51
|
+
Requires-Dist: openmim>=0.3.0; extra == "rtmpose"
|
|
52
|
+
Requires-Dist: decord>=0.6.0; extra == "rtmpose"
|
|
53
|
+
Provides-Extra: smplfx
|
|
54
|
+
Requires-Dist: torch>=2.0.0; extra == "smplfx"
|
|
55
|
+
Requires-Dist: smplx>=0.1.28; extra == "smplfx"
|
|
56
|
+
Requires-Dist: h5py>=3.10.0; extra == "smplfx"
|
|
57
|
+
Requires-Dist: hdf5plugin>=4.0.0; extra == "smplfx"
|
|
58
|
+
Requires-Dist: decord>=0.6.0; extra == "smplfx"
|
|
59
|
+
Provides-Extra: torch
|
|
60
|
+
Requires-Dist: torch>=2.0.0; extra == "torch"
|
|
61
|
+
Requires-Dist: torchvision>=0.15.0; extra == "torch"
|
|
62
|
+
Provides-Extra: data
|
|
63
|
+
Requires-Dist: lmdb>=1.4.0; extra == "data"
|
|
64
|
+
Requires-Dist: msgpack>=1.0.0; extra == "data"
|
|
65
|
+
Provides-Extra: metrics
|
|
66
|
+
Requires-Dist: sacrebleu>=2.3.0; extra == "metrics"
|
|
67
|
+
Requires-Dist: rouge-score>=0.1.2; extra == "metrics"
|
|
68
|
+
Provides-Extra: analysis
|
|
69
|
+
Requires-Dist: scikit-learn>=1.3.0; extra == "analysis"
|
|
70
|
+
Requires-Dist: umap-learn>=0.5.0; extra == "analysis"
|
|
71
|
+
Requires-Dist: hdbscan>=0.8.0; extra == "analysis"
|
|
72
|
+
Requires-Dist: albumentations>=1.3.0; extra == "analysis"
|
|
73
|
+
Provides-Extra: vis
|
|
74
|
+
Requires-Dist: matplotlib>=3.7.0; extra == "vis"
|
|
75
|
+
Requires-Dist: opencv-python>=4.8.0; extra == "vis"
|
|
76
|
+
Provides-Extra: api
|
|
77
|
+
Requires-Dist: fastapi>=0.109.0; extra == "api"
|
|
78
|
+
Requires-Dist: uvicorn[standard]>=0.25.0; extra == "api"
|
|
79
|
+
Requires-Dist: pydantic>=2.5.0; extra == "api"
|
|
80
|
+
Requires-Dist: python-multipart>=0.0.6; extra == "api"
|
|
81
|
+
Requires-Dist: slowapi>=0.1.9; extra == "api"
|
|
82
|
+
Requires-Dist: openai>=1.12.0; extra == "api"
|
|
83
|
+
Requires-Dist: anthropic>=0.39.0; extra == "api"
|
|
84
|
+
Provides-Extra: dev
|
|
85
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
86
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
87
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
88
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
89
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
90
|
+
Provides-Extra: docs
|
|
91
|
+
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
|
92
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == "docs"
|
|
93
|
+
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "docs"
|
|
94
|
+
Requires-Dist: pymdown-extensions>=10.0; extra == "docs"
|
|
95
|
+
Provides-Extra: all
|
|
96
|
+
Requires-Dist: sltk[analysis,api,data,mediapipe,metrics,nlf,rtmpose,smplfx,teaser,torch,vis,wilor]; extra == "all"
|
|
97
|
+
Dynamic: license-file
|
|
98
|
+
|
|
99
|
+
# Sign Language Toolkit (SLTK)
|
|
100
|
+
|
|
101
|
+
A research toolkit for sign language video analysis: workspace management, pose extraction, automatic segmentation, ELAN annotation editing, and a REST API serving a React annotation workstation.
|
|
102
|
+
|
|
103
|
+
## Installation
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# Core (data loading, formats, ELAN I/O)
|
|
107
|
+
pip install -e .
|
|
108
|
+
|
|
109
|
+
# With pose extraction
|
|
110
|
+
pip install -e ".[mediapipe]"
|
|
111
|
+
|
|
112
|
+
# With web API + frontend
|
|
113
|
+
pip install -e ".[api]"
|
|
114
|
+
|
|
115
|
+
# Everything
|
|
116
|
+
pip install -e ".[all]"
|
|
117
|
+
|
|
118
|
+
# Development (includes pytest, black, ruff, mypy)
|
|
119
|
+
pip install -e ".[dev]"
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Requires Python 3.10+.
|
|
123
|
+
|
|
124
|
+
## Quick Start
|
|
125
|
+
|
|
126
|
+
### Running the API
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# Start FastAPI backend (port 8000) + Vite frontend (port 5173)
|
|
130
|
+
bash scripts/run_dev.sh
|
|
131
|
+
|
|
132
|
+
# Or run the backend only
|
|
133
|
+
uvicorn sltk.api.main:app --host 0.0.0.0 --port 8000
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Interactive docs at `http://localhost:8000/docs` (Swagger UI).
|
|
137
|
+
|
|
138
|
+
### Python Library
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from sltk.data import PoseSequence, Segment, SegmentList
|
|
142
|
+
from sltk.io import read_eaf, write_eaf
|
|
143
|
+
|
|
144
|
+
# Load poses from H5 file
|
|
145
|
+
poses = PoseSequence.load("video_wilor.h5", format="wilor", fps=25)
|
|
146
|
+
|
|
147
|
+
# Load ELAN annotations
|
|
148
|
+
segments = read_eaf("annotations.eaf", tiers=["Gloss"])
|
|
149
|
+
|
|
150
|
+
# Create segments and export to ELAN
|
|
151
|
+
new_segments = SegmentList([
|
|
152
|
+
Segment(start=0.0, end=1.5, label="HELLO", tier="Gloss"),
|
|
153
|
+
Segment(start=1.5, end=3.0, label="WORLD", tier="Gloss"),
|
|
154
|
+
])
|
|
155
|
+
write_eaf(new_segments, "output.eaf", video_path="source.mp4")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### CLI
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
sltk convert input.npy output.h5 --from mediapipe --to wilor --fps 25
|
|
162
|
+
sltk evaluate predictions.txt references.txt --task translation
|
|
163
|
+
sltk to-elan segments.json --video source.mp4 --output annotations.eaf
|
|
164
|
+
sltk from-elan annotations.eaf --output segments.json --tier Gloss
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Processing Pipeline
|
|
170
|
+
|
|
171
|
+
SLTK provides a three-stage pipeline for processing sign language videos: **pose extraction** (video → H5), **segmentation** (H5 → sign boundaries), and **spotting** (segments → gloss labels). Each stage can be run independently.
|
|
172
|
+
|
|
173
|
+
### Overview
|
|
174
|
+
|
|
175
|
+
```
|
|
176
|
+
Video (.mp4)
|
|
177
|
+
│
|
|
178
|
+
├─► 1. Pose Extraction ──► {stem}_wilor.h5
|
|
179
|
+
│ (WiLoR hand model: MANO params, 3D keypoints)
|
|
180
|
+
│
|
|
181
|
+
└─► 2. Segmentation ──► {stem}_segments.eaf / .json
|
|
182
|
+
│ (Transformer BIO labelling: OUT/IN/BEGIN)
|
|
183
|
+
│
|
|
184
|
+
└─► 3. Spotting ──► {stem}_spotted.eaf
|
|
185
|
+
(SignRep: match segments to dictionary glosses)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Stage 1: Pose Extraction (Video → H5)
|
|
189
|
+
|
|
190
|
+
Extract hand poses from video using WiLoR. This produces an H5 file containing MANO rotation matrices and 3D keypoints per frame.
|
|
191
|
+
|
|
192
|
+
**Python:**
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
from sltk.extraction.wilor import WiLoRExtractor, WiLoRConfig
|
|
196
|
+
|
|
197
|
+
config = WiLoRConfig(
|
|
198
|
+
checkpoint_path="path/to/wilor_final.ckpt",
|
|
199
|
+
detector_path="path/to/detector.pt",
|
|
200
|
+
)
|
|
201
|
+
extractor = WiLoRExtractor(config)
|
|
202
|
+
extractor.load_model()
|
|
203
|
+
result = extractor.extract_from_video("video.mp4")
|
|
204
|
+
# Saves to video_wilor.h5
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
**API:**
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
# Start extraction job (runs in background)
|
|
211
|
+
curl -X POST http://localhost:8000/api/extraction/start \
|
|
212
|
+
-H "Content-Type: application/json" \
|
|
213
|
+
-d '{
|
|
214
|
+
"video_path": "/data/video.mp4",
|
|
215
|
+
"output_root": "/data/output",
|
|
216
|
+
"config": {"enable_wilor": true, "device": "cuda"}
|
|
217
|
+
}'
|
|
218
|
+
|
|
219
|
+
# Poll status
|
|
220
|
+
curl http://localhost:8000/api/extraction/status/{job_id}
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**H5 file structure (WiLoR):**
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
video_wilor.h5
|
|
227
|
+
attrs: fps, num_frames, resolution, extractor
|
|
228
|
+
frame_idx: (num_frames, 2) # (start_idx, count) per frame
|
|
229
|
+
kpts_3d: (num_detections, 21, 3)
|
|
230
|
+
right: (num_detections,) # True = right hand
|
|
231
|
+
mano/
|
|
232
|
+
hand_pose: (num_detections, 15, 3, 3) # rotation matrices
|
|
233
|
+
global_orient:(num_detections, 1, 3, 3)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
**Weight resolution** — model checkpoints are found in this priority order:
|
|
237
|
+
|
|
238
|
+
1. Explicit path argument
|
|
239
|
+
2. Environment variable (`SLTK_WILOR_CHECKPOINT`, `SLTK_WILOR_DETECTOR`)
|
|
240
|
+
3. Bundled at `sltk/weights/wilor/`
|
|
241
|
+
|
|
242
|
+
MediaPipe and NLF extractors are also available for body/face poses — see `sltk/extraction/`.
|
|
243
|
+
|
|
244
|
+
### Stage 2: Segmentation (H5 → Segments)
|
|
245
|
+
|
|
246
|
+
The segmenter v2 is a Transformer that reads WiLoR H5 files and predicts per-frame BIO labels (0=OUT, 1=IN, 2=BEGIN), identifying where signs start and end.
|
|
247
|
+
|
|
248
|
+
**If you already have H5 files**, this is where you start.
|
|
249
|
+
|
|
250
|
+
**Python:**
|
|
251
|
+
|
|
252
|
+
```python
|
|
253
|
+
from sltk.segmentation.runner import segment_h5
|
|
254
|
+
from sltk.segmentation.output import OutputFormat
|
|
255
|
+
|
|
256
|
+
# Segment a single H5 file → JSON output
|
|
257
|
+
segment_h5(
|
|
258
|
+
"video_wilor.h5",
|
|
259
|
+
output_path="video_segments.json",
|
|
260
|
+
output_format=OutputFormat.JSON,
|
|
261
|
+
fps=25.0,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Segment a single H5 file → ELAN output
|
|
265
|
+
segment_h5(
|
|
266
|
+
"video_wilor.h5",
|
|
267
|
+
output_path="video_segments.eaf",
|
|
268
|
+
output_format=OutputFormat.ELAN,
|
|
269
|
+
fps=25.0,
|
|
270
|
+
media_path="video.mp4", # links video in the EAF file
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Segment an entire directory of H5 files
|
|
274
|
+
segment_h5(
|
|
275
|
+
"/data/poses/",
|
|
276
|
+
output_path="/data/segments/output.json",
|
|
277
|
+
output_format=OutputFormat.JSON,
|
|
278
|
+
fps=25.0,
|
|
279
|
+
)
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
**Lower-level control:**
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
from sltk.segmentation.runner import get_runner
|
|
286
|
+
from sltk.segmentation.h5_loader import h5_to_features
|
|
287
|
+
from sltk.segmentation.postprocess import extract_segments
|
|
288
|
+
|
|
289
|
+
# Load features from H5 (converts MANO rotations → 192-dim features)
|
|
290
|
+
features = h5_to_features("video_wilor.h5") # shape: (num_frames, 192)
|
|
291
|
+
|
|
292
|
+
# Get the inference runner (singleton, loads checkpoint once)
|
|
293
|
+
runner = get_runner()
|
|
294
|
+
|
|
295
|
+
# Predict BIO labels
|
|
296
|
+
labels = runner.predict(features) # shape: (num_frames,) with values 0/1/2
|
|
297
|
+
|
|
298
|
+
# Extract segment boundaries as (start_frame, end_frame) tuples
|
|
299
|
+
segments = extract_segments(labels)
|
|
300
|
+
# [(12, 45), (50, 82), (90, 120), ...]
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
**API:**
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
# Segment a single H5 file
|
|
307
|
+
curl -X POST http://localhost:8000/api/segmentation/segment \
|
|
308
|
+
-H "Content-Type: application/json" \
|
|
309
|
+
-d '{"h5_path": "/data/video_wilor.h5", "fps": 25.0}'
|
|
310
|
+
|
|
311
|
+
# Segment a directory (batch)
|
|
312
|
+
curl -X POST http://localhost:8000/api/segmentation/segment/batch \
|
|
313
|
+
-H "Content-Type: application/json" \
|
|
314
|
+
-d '{
|
|
315
|
+
"directory": "/data/poses/",
|
|
316
|
+
"fps": 25.0,
|
|
317
|
+
"output_path": "/data/segments/",
|
|
318
|
+
"output_format": "json"
|
|
319
|
+
}'
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
**JSON output format:**
|
|
323
|
+
|
|
324
|
+
```json
|
|
325
|
+
{
|
|
326
|
+
"video_name": {
|
|
327
|
+
"fps": 25.0,
|
|
328
|
+
"num_frames": 3000,
|
|
329
|
+
"segments": [
|
|
330
|
+
{"start_frame": 12, "end_frame": 45, "start_sec": 0.48, "end_sec": 1.80},
|
|
331
|
+
{"start_frame": 50, "end_frame": 82, "start_sec": 2.00, "end_sec": 3.28}
|
|
332
|
+
]
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
**ELAN output:** creates a tier `{video_name}_segmentation` with each segment labelled `"SIGN"`, authored by `segmenter_v2`.
|
|
338
|
+
|
|
339
|
+
**Checkpoint resolution:** set `SLTK_SEGMENTOR_CHECKPOINT` or place `segmentor_v2.ckpt` in `sltk/weights/segmentor/`.
|
|
340
|
+
|
|
341
|
+
### Stage 3: Spotting (Segments → Gloss Labels)
|
|
342
|
+
|
|
343
|
+
Spotting uses SignRep to extract 768-dim visual features from video frames, then matches each detected segment against a **dictionary** of known sign features to produce ranked gloss predictions.
|
|
344
|
+
|
|
345
|
+
**Prerequisites:**
|
|
346
|
+
|
|
347
|
+
- A segmented video (from Stage 2) with known segment boundaries
|
|
348
|
+
- A dictionary of sign features — `.npz` files with key `best_latent`, one per sign, typically stored at `/vol/research/SignFeaturePool/features2/{dataset}/{method}/`
|
|
349
|
+
|
|
350
|
+
**Python — full pipeline:**
|
|
351
|
+
|
|
352
|
+
```python
|
|
353
|
+
from sltk.embedding.pipeline import SignRepPipeline
|
|
354
|
+
|
|
355
|
+
pipeline = SignRepPipeline()
|
|
356
|
+
|
|
357
|
+
# Step 1: Extract dense features from the full video (sliding 16-frame windows)
|
|
358
|
+
continuous = pipeline.extract_continuous("video.mp4", stride=4)
|
|
359
|
+
# continuous.features: (L, 768) L2-normalized
|
|
360
|
+
|
|
361
|
+
# Step 2: Load dictionary features
|
|
362
|
+
dictionary = pipeline.load_dictionary(
|
|
363
|
+
["/data/dictionaries/bsldict/signrep/"],
|
|
364
|
+
feature_key="best_latent",
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Step 3: Define segments (from Stage 2 output, or load from JSON/EAF)
|
|
368
|
+
segments = [
|
|
369
|
+
{"segment_id": 0, "start_frame": 12, "end_frame": 45},
|
|
370
|
+
{"segment_id": 1, "start_frame": 50, "end_frame": 82},
|
|
371
|
+
]
|
|
372
|
+
|
|
373
|
+
# Step 4: Spot — match each segment against dictionary
|
|
374
|
+
result = pipeline.spot(
|
|
375
|
+
features=continuous,
|
|
376
|
+
segments=segments,
|
|
377
|
+
dictionary=dictionary,
|
|
378
|
+
top_k=10,
|
|
379
|
+
segment_pooling="max", # "max", "mean", or "softmax_weighted"
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Each spotted segment has ranked gloss matches
|
|
383
|
+
for seg in result.segments:
|
|
384
|
+
print(f"Segment {seg.start_ms}ms–{seg.end_ms}ms:")
|
|
385
|
+
for gl in seg.top_glosses:
|
|
386
|
+
print(f" Rank {gl['rank']}: {gl['gloss']} ({gl['similarity']:.3f})")
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
**Python — one-shot from video:**
|
|
390
|
+
|
|
391
|
+
```python
|
|
392
|
+
from sltk.embedding.pipeline import SignRepPipeline
|
|
393
|
+
|
|
394
|
+
pipeline = SignRepPipeline()
|
|
395
|
+
result = pipeline.spot_from_video(
|
|
396
|
+
video_path="video.mp4",
|
|
397
|
+
segments_json="video_segments.json", # from Stage 2
|
|
398
|
+
dictionary_dirs=["/data/dictionaries/bsldict/signrep/"],
|
|
399
|
+
top_k=20,
|
|
400
|
+
stride=4,
|
|
401
|
+
)
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
**Save results as ELAN:**
|
|
405
|
+
|
|
406
|
+
```python
|
|
407
|
+
from sltk.segmentation.output import save_spotted_elan
|
|
408
|
+
|
|
409
|
+
save_spotted_elan(
|
|
410
|
+
result,
|
|
411
|
+
output_path="video_spotted.eaf",
|
|
412
|
+
fps=25.0,
|
|
413
|
+
media_path="video.mp4",
|
|
414
|
+
)
|
|
415
|
+
```
|
|
416
|
+
|
|
417
|
+
This creates an EAF file with tiers `Rank-1` through `Rank-N` (gloss labels) and `Score-1` through `Score-N` (similarity scores), authored by `signrep_spotter`.
|
|
418
|
+
|
|
419
|
+
**API:**
|
|
420
|
+
|
|
421
|
+
```bash
|
|
422
|
+
# Extract continuous features (cached server-side for 30 min)
|
|
423
|
+
curl -X POST http://localhost:8000/api/signrep/continuous/extract \
|
|
424
|
+
-H "Content-Type: application/json" \
|
|
425
|
+
-d '{"video_path": "/data/video.mp4", "stride": 4}'
|
|
426
|
+
# Returns: {"features_id": "abc123", ...}
|
|
427
|
+
|
|
428
|
+
# Spot glosses using cached features
|
|
429
|
+
curl -X POST http://localhost:8000/api/signrep/spot \
|
|
430
|
+
-H "Content-Type: application/json" \
|
|
431
|
+
-d '{
|
|
432
|
+
"features_id": "abc123",
|
|
433
|
+
"segments": [{"segment_id": 0, "start_frame": 12, "end_frame": 45}],
|
|
434
|
+
"dictionary_dirs": ["/data/dictionaries/bsldict/signrep/"],
|
|
435
|
+
"top_k": 10
|
|
436
|
+
}'
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
**Checkpoint:** set `SLTK_SIGNREP_CHECKPOINT` or place `ckpt.pt` in `sltk/weights/signrep/`.
|
|
440
|
+
|
|
441
|
+
### End-to-End: Processing API
|
|
442
|
+
|
|
443
|
+
The processing API combines segmentation and spotting into a single background job. It expects WiLoR H5 files to already exist alongside the videos.
|
|
444
|
+
|
|
445
|
+
**API:**
|
|
446
|
+
|
|
447
|
+
```bash
|
|
448
|
+
# Segmentation only
|
|
449
|
+
curl -X POST http://localhost:8000/api/processing/submit \
|
|
450
|
+
-H "Content-Type: application/json" \
|
|
451
|
+
-d '{
|
|
452
|
+
"video_paths": ["/data/video1.mp4", "/data/video2.mp4"],
|
|
453
|
+
"type": "segments",
|
|
454
|
+
"fps": 25.0
|
|
455
|
+
}'
|
|
456
|
+
|
|
457
|
+
# Segmentation + spotting
|
|
458
|
+
curl -X POST http://localhost:8000/api/processing/submit \
|
|
459
|
+
-H "Content-Type: application/json" \
|
|
460
|
+
-d '{
|
|
461
|
+
"video_paths": ["/data/video1.mp4", "/data/video2.mp4"],
|
|
462
|
+
"type": "spots",
|
|
463
|
+
"dictionary_dirs": ["/data/dictionaries/bsldict/signrep/"],
|
|
464
|
+
"top_k": 5,
|
|
465
|
+
"fps": 25.0,
|
|
466
|
+
"workspace": "my_workspace"
|
|
467
|
+
}'
|
|
468
|
+
|
|
469
|
+
# Poll job status
|
|
470
|
+
curl http://localhost:8000/api/processing/status/{job_id}
|
|
471
|
+
|
|
472
|
+
# Download output EAF
|
|
473
|
+
curl -O http://localhost:8000/api/processing/output/{job_id}/video1_spotted.eaf
|
|
474
|
+
```
|
|
475
|
+
|
|
476
|
+
**H5 file lookup:** the processing API searches for `{stem}_wilor.h5` next to the video, in a `poses/` subdirectory, or in a `{stem}/` subdirectory. If no H5 is found, the video is skipped.
|
|
477
|
+
|
|
478
|
+
**Output files:**
|
|
479
|
+
|
|
480
|
+
| Type | Output file | Description |
|
|
481
|
+
|------|-------------|-------------|
|
|
482
|
+
| `segments` | `{stem}_segments.eaf` | Sign boundaries (SIGN labels) |
|
|
483
|
+
| `spots` | `{stem}_segments.eaf` + `{stem}_spotted.eaf` | Boundaries + ranked gloss labels |
|
|
484
|
+
|
|
485
|
+
When a `workspace` is specified, output EAF files are auto-ingested into the corpus database.
|
|
486
|
+
|
|
487
|
+
### Building a Dictionary
|
|
488
|
+
|
|
489
|
+
Before spotting, you need a dictionary of sign features. Extract one feature per isolated sign video:
|
|
490
|
+
|
|
491
|
+
```python
|
|
492
|
+
from sltk.embedding.pipeline import SignRepPipeline
|
|
493
|
+
|
|
494
|
+
pipeline = SignRepPipeline()
|
|
495
|
+
|
|
496
|
+
# Single sign video → 768-dim feature
|
|
497
|
+
result = pipeline.extract_dictionary("isolated_sign.mp4", method="middle")
|
|
498
|
+
result.save_npz("dictionary/HELLO.npz")
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
**Batch extraction via the API:**
|
|
502
|
+
|
|
503
|
+
```bash
|
|
504
|
+
curl -X POST http://localhost:8000/api/signrep/dictionary/batch/job \
|
|
505
|
+
-H "Content-Type: application/json" \
|
|
506
|
+
-d '{
|
|
507
|
+
"video_dir": "/data/isolated_signs/",
|
|
508
|
+
"output_dir": "/data/dictionary/",
|
|
509
|
+
"method": "middle"
|
|
510
|
+
}'
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
Each `.npz` file is named after the gloss (e.g., `HELLO.npz`) and contains a `best_latent` key with the 768-dim feature vector.
|
|
514
|
+
|
|
515
|
+
---
|
|
516
|
+
|
|
517
|
+
## Architecture
|
|
518
|
+
|
|
519
|
+
```
|
|
520
|
+
sltk/
|
|
521
|
+
├── api/ # FastAPI REST API (16 routers, 88+ endpoints)
|
|
522
|
+
│ ├── main.py # App init, middleware, router registration
|
|
523
|
+
│ ├── models.py # Pydantic request/response schemas
|
|
524
|
+
│ ├── routers/ # Route handlers (see API Reference below)
|
|
525
|
+
│ ├── dependencies.py # Path validation, security
|
|
526
|
+
│ └── security.py # Security headers, CORS
|
|
527
|
+
├── io/ # File I/O
|
|
528
|
+
│ ├── elan.py # ELAN .eaf read/write
|
|
529
|
+
│ ├── elan_roundtrip.py # XML-preserving ELAN editing (round-trip safe)
|
|
530
|
+
│ ├── h5.py # HDF5 pose data I/O
|
|
531
|
+
│ └── safe_write.py # Atomic file operations
|
|
532
|
+
├── extraction/ # Pose extraction (MediaPipe, WiLoR, NLF)
|
|
533
|
+
├── segmentation/ # Transformer-based sign segmentation
|
|
534
|
+
├── visualization/ # Skeleton overlay video generation
|
|
535
|
+
├── processing/ # Feature computation, normalization
|
|
536
|
+
├── analysis/ # Clustering, embeddings, statistics
|
|
537
|
+
├── data/ # Core types (PoseSequence, Segment, Sample)
|
|
538
|
+
│ └── datasets/ # Dataset loaders (BOBSL, How2Sign, BSLCP, etc.)
|
|
539
|
+
└── config.py # Configuration and environment
|
|
540
|
+
frontend/ # React/Vite annotation workstation
|
|
541
|
+
scripts/
|
|
542
|
+
└── run_dev.sh # Dev server launcher
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
---
|
|
546
|
+
|
|
547
|
+
## API Reference
|
|
548
|
+
|
|
549
|
+
Base URL: `http://localhost:8000`
|
|
550
|
+
|
|
551
|
+
### Workspace Management — `/api/workspace`
|
|
552
|
+
|
|
553
|
+
Multi-workspace system for organizing videos and annotation files. Persists to `~/.sltk/workspaces.json`.
|
|
554
|
+
|
|
555
|
+
| Method | Endpoint | Description |
|
|
556
|
+
|--------|----------|-------------|
|
|
557
|
+
| `GET` | `/list` | List all workspaces |
|
|
558
|
+
| `POST` | `/create` | Create a new workspace |
|
|
559
|
+
| `POST` | `/switch` | Switch active workspace |
|
|
560
|
+
| `POST` | `/scan` | Scan directory for videos + ELAN files |
|
|
561
|
+
| `GET` | `/status` | Current workspace status |
|
|
562
|
+
| `PUT` | `/rename` | Rename workspace |
|
|
563
|
+
| `DELETE` | `/clear` | Clear active workspace |
|
|
564
|
+
| `DELETE` | `/{name}` | Delete workspace by name |
|
|
565
|
+
| `PATCH` | `/match` | Override video-ELAN matching |
|
|
566
|
+
| `POST` | `/rescan` | Rescan for new files |
|
|
567
|
+
|
|
568
|
+
### Videos — `/api/videos` `/api/video`
|
|
569
|
+
|
|
570
|
+
| Method | Endpoint | Description |
|
|
571
|
+
|--------|----------|-------------|
|
|
572
|
+
| `POST` | `/videos/discover` | Discover videos in directory (recursive) |
|
|
573
|
+
| `GET` | `/videos/info` | Video metadata (fps, resolution, duration) |
|
|
574
|
+
| `GET` | `/video/stream` | Stream video with HTTP Range support |
|
|
575
|
+
|
|
576
|
+
### Audio — `/api/audio`
|
|
577
|
+
|
|
578
|
+
| Method | Endpoint | Description |
|
|
579
|
+
|--------|----------|-------------|
|
|
580
|
+
| `GET` | `/waveform` | Extract waveform peaks. Params: `path`, `samples` (default 8000) |
|
|
581
|
+
|
|
582
|
+
### Extraction — `/api/extraction`
|
|
583
|
+
|
|
584
|
+
Pose extraction jobs (MediaPipe, WiLoR, NLF/SMPL-X).
|
|
585
|
+
|
|
586
|
+
| Method | Endpoint | Description |
|
|
587
|
+
|--------|----------|-------------|
|
|
588
|
+
| `GET` | `/status/{job_id}` | Poll extraction progress |
|
|
589
|
+
| `POST` | `/cancel/{job_id}` | Cancel extraction |
|
|
590
|
+
| `GET` | `/jobs` | List all extraction jobs |
|
|
591
|
+
| `GET` | `/logs/{job_id}` | Job logs (last 1000 entries) |
|
|
592
|
+
|
|
593
|
+
### Poses — `/api/poses`
|
|
594
|
+
|
|
595
|
+
| Method | Endpoint | Description |
|
|
596
|
+
|--------|----------|-------------|
|
|
597
|
+
| `GET` | `/load` | Load pose data from H5 file |
|
|
598
|
+
| `GET` | `/metadata` | Pose metadata (frames, keypoints, format) |
|
|
599
|
+
| `GET` | `/frame` | Single frame pose data |
|
|
600
|
+
| `GET` | `/statistics` | Pose statistics |
|
|
601
|
+
|
|
602
|
+
### Visualization — `/api/visualization`
|
|
603
|
+
|
|
604
|
+
Skeleton overlay video generation from H5 pose data.
|
|
605
|
+
|
|
606
|
+
| Method | Endpoint | Description |
|
|
607
|
+
|--------|----------|-------------|
|
|
608
|
+
| `POST` | `/generate` | Generate overlay. Body: `{video_path, h5_path, viz_type}` |
|
|
609
|
+
| `GET` | `/status/{job_id}` | Poll generation progress |
|
|
610
|
+
| `GET` | `/check` | Check if cached overlay exists |
|
|
611
|
+
|
|
612
|
+
`viz_type`: `"mediapipe"`, `"wilor"`, or `"nlf"`
|
|
613
|
+
|
|
614
|
+
### Datasets — `/api/datasets`
|
|
615
|
+
|
|
616
|
+
| Method | Endpoint | Description |
|
|
617
|
+
|--------|----------|-------------|
|
|
618
|
+
| `POST` | `/connect` | Register dataset connection |
|
|
619
|
+
| `GET` | `/connections` | List connected datasets |
|
|
620
|
+
| `GET` | `/list` | List available datasets |
|
|
621
|
+
| `GET` | `/{name}/videos` | Videos in dataset |
|
|
622
|
+
| `DELETE` | `/connections/{name}` | Remove connection |
|
|
623
|
+
|
|
624
|
+
### Features — `/api/features`
|
|
625
|
+
|
|
626
|
+
| Method | Endpoint | Description |
|
|
627
|
+
|--------|----------|-------------|
|
|
628
|
+
| `POST` | `/detect` | Detect features in video |
|
|
629
|
+
| `GET` | `/scan` | Scan for feature files |
|
|
630
|
+
| `GET` | `/datasets/{name}/features/summary` | Feature summary for dataset |
|
|
631
|
+
|
|
632
|
+
### Analysis — `/api/analysis`
|
|
633
|
+
|
|
634
|
+
Research-oriented endpoints for vocabulary, statistics, and linguistic analysis.
|
|
635
|
+
|
|
636
|
+
| Method | Endpoint | Description |
|
|
637
|
+
|--------|----------|-------------|
|
|
638
|
+
| `GET` | `/vocabulary` | Extract vocabulary from dataset |
|
|
639
|
+
| `POST` | `/batch/statistics` | Batch statistical analysis |
|
|
640
|
+
| `POST` | `/research/vocabulary-mapping` | Map glosses across datasets |
|
|
641
|
+
| `POST` | `/research/compare-datasets` | Compare two datasets |
|
|
642
|
+
| `POST` | `/research/find-gloss-examples` | Find gloss examples |
|
|
643
|
+
| `POST` | `/linguistic/concordance` | Gloss concordance |
|
|
644
|
+
| `POST` | `/linguistic/cooccurrence` | Co-occurrence analysis |
|
|
645
|
+
| `POST` | `/linguistic/ngrams` | N-gram frequency |
|
|
646
|
+
| `POST` | `/linguistic/duration-analysis` | Duration statistics |
|
|
647
|
+
|
|
648
|
+
### Embeddings — `/api/embeddings`
|
|
649
|
+
|
|
650
|
+
| Method | Endpoint | Description |
|
|
651
|
+
|--------|----------|-------------|
|
|
652
|
+
| `GET` | `/status/{dataset}` | Embedding generation status |
|
|
653
|
+
| `DELETE` | `/cache/{dataset}` | Clear embeddings |
|
|
654
|
+
| `GET` | `/signrep/status` | SignRep model status |
|
|
655
|
+
|
|
656
|
+
### Linguistics — `/api/linguistics`
|
|
657
|
+
|
|
658
|
+
Inter-rater reliability and phonological analysis.
|
|
659
|
+
|
|
660
|
+
| Method | Endpoint | Description |
|
|
661
|
+
|--------|----------|-------------|
|
|
662
|
+
| `POST` | `/reliability/kappa` | Cohen's kappa |
|
|
663
|
+
| `POST` | `/reliability/krippendorff` | Krippendorff's alpha |
|
|
664
|
+
| `POST` | `/reliability/boundary-agreement` | Boundary agreement |
|
|
665
|
+
| `POST` | `/reliability/confusion-matrix` | Confusion matrix |
|
|
666
|
+
| `POST` | `/phonological-form` | Extract phonological form |
|
|
667
|
+
| `POST` | `/phonological-distance` | Phonological distance |
|
|
668
|
+
|
|
669
|
+
### Jobs — `/api/jobs`
|
|
670
|
+
|
|
671
|
+
| Method | Endpoint | Description |
|
|
672
|
+
|--------|----------|-------------|
|
|
673
|
+
| `GET` | `/status` | Job system status |
|
|
674
|
+
| `GET` | `/gpu` | GPU status and memory |
|
|
675
|
+
| `GET` | `/list` | List active jobs |
|
|
676
|
+
| `POST` | `/{job_id}/cancel` | Cancel job |
|
|
677
|
+
|
|
678
|
+
### Settings — `/api/settings`
|
|
679
|
+
|
|
680
|
+
| Method | Endpoint | Description |
|
|
681
|
+
|--------|----------|-------------|
|
|
682
|
+
| `GET` | `/` | Get app settings |
|
|
683
|
+
| `POST` | `/` | Update settings |
|
|
684
|
+
| `GET` | `/info` | System info |
|
|
685
|
+
| `GET` | `/system/weights` | Model weights info |
|
|
686
|
+
|
|
687
|
+
---
|
|
688
|
+
|
|
689
|
+
## Middleware & Security
|
|
690
|
+
|
|
691
|
+
The API applies the following middleware (in order):
|
|
692
|
+
|
|
693
|
+
1. **GZip** — compresses responses >500 bytes
|
|
694
|
+
2. **Security headers** — `X-Frame-Options: DENY`, `X-Content-Type-Options: nosniff`, CSP, Permissions-Policy
|
|
695
|
+
3. **CORS** — configurable via `SLTK_CORS_ORIGINS` env var (default: `localhost:5173,localhost:3000`)
|
|
696
|
+
4. **Path validation** — whitelist check against `SLTK_ALLOWED_PATHS`, directory traversal prevention
|
|
697
|
+
|
|
698
|
+
## Configuration
|
|
699
|
+
|
|
700
|
+
Environment variables (set in `.env` or shell):
|
|
701
|
+
|
|
702
|
+
| Variable | Description | Default |
|
|
703
|
+
|----------|-------------|---------|
|
|
704
|
+
| `SLTK_CORS_ORIGINS` | Allowed CORS origins (comma-separated) | `http://localhost:5173,http://localhost:3000` |
|
|
705
|
+
| `SLTK_ALLOWED_PATHS` | Allowed filesystem paths for API access | `/vol/research,/home` |
|
|
706
|
+
| `SLTK_RESEARCH_DATA_ROOT` | Root for research data | `/vol/research` |
|
|
707
|
+
| `SLTK_DATASETS_ROOT` | Root for raw datasets | `/vol/research/datasets` |
|
|
708
|
+
| `SLTK_FEATURE_ROOT` | Root for extracted features | `/vol/research/SignFeaturePool/features2` |
|
|
709
|
+
| `SLTK_NLF_MODEL_PATH` | Path to NLF model weights | — |
|
|
710
|
+
| `SLTK_WILOR_MODEL_PATH` | Path to WiLoR model weights | — |
|
|
711
|
+
| `SLTK_SIGNREP_CHECKPOINT` | Path to SignRep checkpoint | — |
|
|
712
|
+
| `SLTK_SEGMENTOR_PATH` | Path to segmentor checkpoint | — |
|
|
713
|
+
|
|
714
|
+
## Supported Pose Formats
|
|
715
|
+
|
|
716
|
+
| Format | Joints | Description |
|
|
717
|
+
|--------|--------|-------------|
|
|
718
|
+
| **MediaPipe** | 33 body + 21x2 hands + 468 face | Holistic pose estimation |
|
|
719
|
+
| **WiLoR** | 21 per hand | MANO hand model with rotation matrices |
|
|
720
|
+
| **NLF/SMPL-X** | 55 joints | Full body with axis-angle rotations |
|
|
721
|
+
|
|
722
|
+
All stored as HDF5 (`.h5`) files.
|
|
723
|
+
|
|
724
|
+
## Testing
|
|
725
|
+
|
|
726
|
+
```bash
|
|
727
|
+
# Run full suite (1429 tests)
|
|
728
|
+
pytest
|
|
729
|
+
|
|
730
|
+
# With coverage
|
|
731
|
+
pytest --cov=sltk --cov-report=html
|
|
732
|
+
|
|
733
|
+
# Specific markers
|
|
734
|
+
pytest -m api # API tests only
|
|
735
|
+
pytest -m "not slow" # Skip slow tests
|
|
736
|
+
pytest -m gpu # GPU tests only
|
|
737
|
+
```
|
|
738
|
+
|
|
739
|
+
## CI
|
|
740
|
+
|
|
741
|
+
GitHub Actions runs on every push/PR to `main`:
|
|
742
|
+
- **Lint**: black + ruff
|
|
743
|
+
- **Type check**: mypy
|
|
744
|
+
- **Tests**: pytest across Python 3.10, 3.11, 3.12 (coverage threshold: 40%)
|
|
745
|
+
- **Frontend**: npm build
|
|
746
|
+
|
|
747
|
+
## License
|
|
748
|
+
|
|
749
|
+
MIT
|