signspell 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- signspell-1.0.0/LICENSE +21 -0
- signspell-1.0.0/MANIFEST.in +3 -0
- signspell-1.0.0/PKG-INFO +116 -0
- signspell-1.0.0/README.md +83 -0
- signspell-1.0.0/pyproject.toml +48 -0
- signspell-1.0.0/setup.cfg +4 -0
- signspell-1.0.0/src/asl_alphabet/__init__.py +21 -0
- signspell-1.0.0/src/asl_alphabet/app.py +188 -0
- signspell-1.0.0/src/asl_alphabet/cli.py +46 -0
- signspell-1.0.0/src/asl_alphabet/config.py +26 -0
- signspell-1.0.0/src/asl_alphabet/models/alphabet_30_30_all_4.h5 +0 -0
- signspell-1.0.0/src/asl_alphabet/recognizer.py +114 -0
- signspell-1.0.0/src/signspell.egg-info/PKG-INFO +116 -0
- signspell-1.0.0/src/signspell.egg-info/SOURCES.txt +16 -0
- signspell-1.0.0/src/signspell.egg-info/dependency_links.txt +1 -0
- signspell-1.0.0/src/signspell.egg-info/entry_points.txt +2 -0
- signspell-1.0.0/src/signspell.egg-info/requires.txt +9 -0
- signspell-1.0.0/src/signspell.egg-info/top_level.txt +1 -0
signspell-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sundar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
signspell-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: signspell
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Live ASL fingerspelling alphabet recognition with a polished webcam UI.
|
|
5
|
+
Author-email: Sundar <you@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/TheMadrasTechie/signspell
|
|
8
|
+
Project-URL: Repository, https://github.com/TheMadrasTechie/signspell
|
|
9
|
+
Project-URL: Issues, https://github.com/TheMadrasTechie/signspell/issues
|
|
10
|
+
Keywords: asl,sign-language,computer-vision,mediapipe,fingerspelling,accessibility
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Education
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
20
|
+
Classifier: Topic :: Multimedia :: Video :: Capture
|
|
21
|
+
Requires-Python: <3.12,>=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy<2
|
|
25
|
+
Requires-Dist: opencv-python<4.10
|
|
26
|
+
Requires-Dist: mediapipe<0.11,>=0.10
|
|
27
|
+
Requires-Dist: tensorflow<2.16,>=2.15
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: build; extra == "dev"
|
|
30
|
+
Requires-Dist: twine; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# signspell
|
|
35
|
+
|
|
36
|
+
**Live ASL fingerspelling alphabet recognition — straight from your webcam.**
|
|
37
|
+
|
|
38
|
+
`signspell` recognises the American Sign Language manual alphabet (A–Z) in real
|
|
39
|
+
time using MediaPipe hand tracking and an LSTM model trained on 30-frame
|
|
40
|
+
keypoint sequences. It ships with a pretrained model and a polished webcam UI,
|
|
41
|
+
and it works both as a command-line tool and an importable library.
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install signspell
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
> Requires Python 3.9–3.11. A webcam is required for live recognition.
|
|
52
|
+
|
|
53
|
+
## Run it (CLI)
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
signspell # default webcam, bundled model, pro UI
|
|
57
|
+
signspell --camera 1 # pick a different camera
|
|
58
|
+
signspell --threshold 0.6 # require higher confidence
|
|
59
|
+
signspell --no-mirror # disable mirrored view
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**In-window keys:** `q` quit · `c` clear sentence · `SPACE` add a space.
|
|
63
|
+
|
|
64
|
+
## Use it (library)
|
|
65
|
+
|
|
66
|
+
Run the full UI from code:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import signspell
|
|
70
|
+
signspell.run()
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Or drive recognition yourself, frame by frame:
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import cv2
|
|
77
|
+
import signspell
|
|
78
|
+
|
|
79
|
+
rec = signspell.Recognizer()
|
|
80
|
+
cap = cv2.VideoCapture(0)
|
|
81
|
+
|
|
82
|
+
while True:
|
|
83
|
+
ok, frame = cap.read()
|
|
84
|
+
if not ok:
|
|
85
|
+
break
|
|
86
|
+
letter, confidence, probs = rec.predict(frame)
|
|
87
|
+
if letter:
|
|
88
|
+
print(letter, f"{confidence:.2f}")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The `Recognizer` keeps a rolling 30-frame buffer internally, so you just feed
|
|
92
|
+
frames and read predictions. `letter` is `None` until the buffer fills or when
|
|
93
|
+
confidence is below the threshold.
|
|
94
|
+
|
|
95
|
+
## How it works
|
|
96
|
+
|
|
97
|
+
1. **MediaPipe Holistic** extracts 21 right-hand landmarks per frame.
|
|
98
|
+
2. The last **30 frames** of `(x, y, z)` keypoints form a sequence.
|
|
99
|
+
3. An **LSTM** classifies the sequence into one of 26 letters.
|
|
100
|
+
4. A short stability window prevents flicker before a letter is committed.
|
|
101
|
+
|
|
102
|
+
## Bring your own model
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
signspell --model path/to/your_model.h5
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
signspell.Recognizer(model_path="path/to/your_model.h5")
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Your model must accept input shape `(1, 30, 63)` and output 26 class scores.
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
|
|
116
|
+
MIT © Sundar
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# signspell
|
|
2
|
+
|
|
3
|
+
**Live ASL fingerspelling alphabet recognition — straight from your webcam.**
|
|
4
|
+
|
|
5
|
+
`signspell` recognises the American Sign Language manual alphabet (A–Z) in real
|
|
6
|
+
time using MediaPipe hand tracking and an LSTM model trained on 30-frame
|
|
7
|
+
keypoint sequences. It ships with a pretrained model and a polished webcam UI,
|
|
8
|
+
and it works both as a command-line tool and an importable library.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install signspell
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
> Requires Python 3.9–3.11. A webcam is required for live recognition.
|
|
19
|
+
|
|
20
|
+
## Run it (CLI)
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
signspell # default webcam, bundled model, pro UI
|
|
24
|
+
signspell --camera 1 # pick a different camera
|
|
25
|
+
signspell --threshold 0.6 # require higher confidence
|
|
26
|
+
signspell --no-mirror # disable mirrored view
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
**In-window keys:** `q` quit · `c` clear sentence · `SPACE` add a space.
|
|
30
|
+
|
|
31
|
+
## Use it (library)
|
|
32
|
+
|
|
33
|
+
Run the full UI from code:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import signspell
|
|
37
|
+
signspell.run()
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Or drive recognition yourself, frame by frame:
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
import cv2
|
|
44
|
+
import signspell
|
|
45
|
+
|
|
46
|
+
rec = signspell.Recognizer()
|
|
47
|
+
cap = cv2.VideoCapture(0)
|
|
48
|
+
|
|
49
|
+
while True:
|
|
50
|
+
ok, frame = cap.read()
|
|
51
|
+
if not ok:
|
|
52
|
+
break
|
|
53
|
+
letter, confidence, probs = rec.predict(frame)
|
|
54
|
+
if letter:
|
|
55
|
+
print(letter, f"{confidence:.2f}")
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
The `Recognizer` keeps a rolling 30-frame buffer internally, so you just feed
|
|
59
|
+
frames and read predictions. `letter` is `None` until the buffer fills or when
|
|
60
|
+
confidence is below the threshold.
|
|
61
|
+
|
|
62
|
+
## How it works
|
|
63
|
+
|
|
64
|
+
1. **MediaPipe Holistic** extracts 21 right-hand landmarks per frame.
|
|
65
|
+
2. The last **30 frames** of `(x, y, z)` keypoints form a sequence.
|
|
66
|
+
3. An **LSTM** classifies the sequence into one of 26 letters.
|
|
67
|
+
4. A short stability window prevents flicker before a letter is committed.
|
|
68
|
+
|
|
69
|
+
## Bring your own model
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
signspell --model path/to/your_model.h5
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
signspell.Recognizer(model_path="path/to/your_model.h5")
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Your model must accept input shape `(1, 30, 63)` and output 26 class scores.
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
MIT © Sundar
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "signspell"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Live ASL fingerspelling alphabet recognition with a polished webcam UI."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9,<3.12"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Sundar", email = "you@example.com" }]
|
|
13
|
+
keywords = ["asl", "sign-language", "computer-vision", "mediapipe", "fingerspelling", "accessibility"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Intended Audience :: Education",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.9",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
24
|
+
"Topic :: Multimedia :: Video :: Capture",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"numpy<2",
|
|
28
|
+
"opencv-python<4.10",
|
|
29
|
+
"mediapipe>=0.10,<0.11",
|
|
30
|
+
"tensorflow>=2.15,<2.16",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/TheMadrasTechie/signspell"
|
|
35
|
+
Repository = "https://github.com/TheMadrasTechie/signspell"
|
|
36
|
+
Issues = "https://github.com/TheMadrasTechie/signspell/issues"
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
signspell = "asl_alphabet.cli:main"
|
|
40
|
+
|
|
41
|
+
[project.optional-dependencies]
|
|
42
|
+
dev = ["build", "twine", "pytest"]
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.packages.find]
|
|
45
|
+
where = ["src"]
|
|
46
|
+
|
|
47
|
+
[tool.setuptools.package-data]
|
|
48
|
+
asl_alphabet = ["models/*.h5"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
signspell — live ASL fingerspelling alphabet recognition.
|
|
3
|
+
|
|
4
|
+
Quick start
|
|
5
|
+
-----------
|
|
6
|
+
import signspell
|
|
7
|
+
|
|
8
|
+
# Run the live recognizer with the bundled model and pro UI:
|
|
9
|
+
signspell.run()
|
|
10
|
+
|
|
11
|
+
# Or drive it yourself, frame by frame:
|
|
12
|
+
rec = signspell.Recognizer()
|
|
13
|
+
letter, confidence, probabilities = rec.predict(frame_bgr)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from .recognizer import Recognizer
|
|
17
|
+
from .app import run
|
|
18
|
+
from .config import ACTIONS, SEQUENCE_LENGTH
|
|
19
|
+
|
|
20
|
+
__version__ = "0.1.0"
|
|
21
|
+
__all__ = ["Recognizer", "run", "ACTIONS", "SEQUENCE_LENGTH", "__version__"]
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Live application: pro UI built on top of the headless Recognizer.
|
|
3
|
+
|
|
4
|
+
Public entry point: signspell.run(...) — also wired to the `signspell` CLI.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import time
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from .config import ACTIONS, SEQUENCE_LENGTH, DEFAULT_THRESHOLD, STABILITY_WINDOW
|
|
13
|
+
from .recognizer import Recognizer
|
|
14
|
+
|
|
15
|
+
# Colour palette (BGR)
|
|
16
|
+
C_BG_PANEL = (28, 28, 32)
|
|
17
|
+
C_ACCENT = (245, 166, 35)
|
|
18
|
+
C_ACCENT_2 = (88, 214, 141)
|
|
19
|
+
C_TEXT = (240, 240, 240)
|
|
20
|
+
C_TEXT_DIM = (150, 150, 155)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _rounded_panel(img, x1, y1, x2, y2, color, alpha=0.78):
|
|
24
|
+
import cv2
|
|
25
|
+
overlay = img.copy()
|
|
26
|
+
cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
|
|
27
|
+
cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _draw_hand(image, results):
|
|
31
|
+
import cv2
|
|
32
|
+
from mediapipe.python.solutions import holistic as mp_holistic
|
|
33
|
+
|
|
34
|
+
lm = results.right_hand_landmarks
|
|
35
|
+
if not lm:
|
|
36
|
+
return
|
|
37
|
+
h, w = image.shape[:2]
|
|
38
|
+
pts = [(int(p.x * w), int(p.y * h), p.z) for p in lm.landmark]
|
|
39
|
+
|
|
40
|
+
for a, b in mp_holistic.HAND_CONNECTIONS:
|
|
41
|
+
pa, pb = pts[a][:2], pts[b][:2]
|
|
42
|
+
cv2.line(image, pa, pb, (0, 0, 0), 5, cv2.LINE_AA)
|
|
43
|
+
cv2.line(image, pa, pb, C_ACCENT, 2, cv2.LINE_AA)
|
|
44
|
+
|
|
45
|
+
zs = [p[2] for p in pts]
|
|
46
|
+
zmin, zmax = min(zs), max(zs)
|
|
47
|
+
span = (zmax - zmin) or 1e-6
|
|
48
|
+
for (x, y, z) in pts:
|
|
49
|
+
t = 1.0 - (z - zmin) / span
|
|
50
|
+
r = int(3 + 5 * t)
|
|
51
|
+
col = (int(66 + 150 * t), int(180 + 60 * t), 230)
|
|
52
|
+
cv2.circle(image, (x, y), r + 2, (0, 0, 0), -1, cv2.LINE_AA)
|
|
53
|
+
cv2.circle(image, (x, y), r, col, -1, cv2.LINE_AA)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _draw_prob_panel(image, probs):
|
|
57
|
+
import cv2
|
|
58
|
+
h, w = image.shape[:2]
|
|
59
|
+
pw = 230
|
|
60
|
+
x0 = w - pw
|
|
61
|
+
_rounded_panel(image, x0, 0, w, h, C_BG_PANEL, 0.72)
|
|
62
|
+
cv2.putText(image, "CONFIDENCE", (x0 + 16, 32),
|
|
63
|
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, C_TEXT_DIM, 1, cv2.LINE_AA)
|
|
64
|
+
order = np.argsort(probs)[::-1][:8]
|
|
65
|
+
y = 58
|
|
66
|
+
for rank, idx in enumerate(order):
|
|
67
|
+
prob = float(probs[idx])
|
|
68
|
+
bar_w = int(prob * (pw - 80))
|
|
69
|
+
col = C_ACCENT_2 if rank == 0 else C_ACCENT
|
|
70
|
+
cv2.putText(image, ACTIONS[idx], (x0 + 16, y + 20),
|
|
71
|
+
cv2.FONT_HERSHEY_SIMPLEX, 0.7,
|
|
72
|
+
C_TEXT if rank == 0 else C_TEXT_DIM, 2, cv2.LINE_AA)
|
|
73
|
+
cv2.rectangle(image, (x0 + 46, y + 5), (x0 + 46 + (pw - 80), y + 22),
|
|
74
|
+
(50, 50, 55), -1)
|
|
75
|
+
cv2.rectangle(image, (x0 + 46, y + 5), (x0 + 46 + bar_w, y + 22), col, -1)
|
|
76
|
+
cv2.putText(image, f"{prob*100:4.0f}%", (x0 + pw - 28, y + 20),
|
|
77
|
+
cv2.FONT_HERSHEY_SIMPLEX, 0.4, C_TEXT_DIM, 1, cv2.LINE_AA)
|
|
78
|
+
y += 34
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _draw_sentence_bar(image, sentence, top_conf, threshold):
|
|
82
|
+
import cv2
|
|
83
|
+
h, w = image.shape[:2]
|
|
84
|
+
_rounded_panel(image, 0, 0, w - 230, 56, C_BG_PANEL, 0.78)
|
|
85
|
+
text = ''.join(sentence) if sentence else "..."
|
|
86
|
+
cv2.putText(image, text, (16, 40),
|
|
87
|
+
cv2.FONT_HERSHEY_SIMPLEX, 1.1, C_TEXT, 2, cv2.LINE_AA)
|
|
88
|
+
meter_w = int((w - 230) * top_conf)
|
|
89
|
+
cv2.rectangle(image, (0, 56), (w - 230, 60), (50, 50, 55), -1)
|
|
90
|
+
col = C_ACCENT_2 if top_conf > threshold else C_ACCENT
|
|
91
|
+
cv2.rectangle(image, (0, 56), (meter_w, 60), col, -1)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _draw_status(image, fps, hand_present):
|
|
95
|
+
import cv2
|
|
96
|
+
h, w = image.shape[:2]
|
|
97
|
+
_rounded_panel(image, 0, h - 34, 200, h, C_BG_PANEL, 0.7)
|
|
98
|
+
cv2.putText(image, f"FPS {fps:4.1f}", (12, h - 12),
|
|
99
|
+
cv2.FONT_HERSHEY_SIMPLEX, 0.55, C_TEXT, 1, cv2.LINE_AA)
|
|
100
|
+
dot = C_ACCENT_2 if hand_present else (80, 80, 90)
|
|
101
|
+
cv2.circle(image, (130, h - 17), 7, dot, -1, cv2.LINE_AA)
|
|
102
|
+
cv2.putText(image, "HAND", (145, h - 12),
|
|
103
|
+
cv2.FONT_HERSHEY_SIMPLEX, 0.45,
|
|
104
|
+
C_TEXT if hand_present else C_TEXT_DIM, 1, cv2.LINE_AA)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def run(
|
|
108
|
+
model_path: Optional[str] = None,
|
|
109
|
+
camera: int = 0,
|
|
110
|
+
threshold: float = DEFAULT_THRESHOLD,
|
|
111
|
+
mirror: bool = True,
|
|
112
|
+
window_name: str = "signspell — ASL Alphabet",
|
|
113
|
+
):
|
|
114
|
+
"""Launch the live recognizer with the pro UI.
|
|
115
|
+
|
|
116
|
+
Keys: q = quit | c = clear sentence | SPACE = add a space
|
|
117
|
+
"""
|
|
118
|
+
import cv2
|
|
119
|
+
|
|
120
|
+
rec = Recognizer(model_path=model_path, threshold=threshold)
|
|
121
|
+
|
|
122
|
+
sentence, predictions = [], []
|
|
123
|
+
probs = np.zeros(len(ACTIONS))
|
|
124
|
+
prev_t = time.time()
|
|
125
|
+
fps = 0.0
|
|
126
|
+
|
|
127
|
+
cap = cv2.VideoCapture(camera)
|
|
128
|
+
if not cap.isOpened():
|
|
129
|
+
rec.close()
|
|
130
|
+
raise RuntimeError(
|
|
131
|
+
f"Could not open camera index {camera}. Try 0, 1, or 2."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
while cap.isOpened():
|
|
136
|
+
ret, frame = cap.read()
|
|
137
|
+
if not ret:
|
|
138
|
+
break
|
|
139
|
+
if mirror:
|
|
140
|
+
frame = cv2.flip(frame, 1)
|
|
141
|
+
|
|
142
|
+
letter, conf, p = rec.predict(frame)
|
|
143
|
+
results = rec.last_results
|
|
144
|
+
hand_present = results.right_hand_landmarks is not None
|
|
145
|
+
if p is not None:
|
|
146
|
+
probs = p
|
|
147
|
+
|
|
148
|
+
# Draw on a BGR copy
|
|
149
|
+
image = cv2.cvtColor(
|
|
150
|
+
cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), cv2.COLOR_RGB2BGR
|
|
151
|
+
)
|
|
152
|
+
_draw_hand(image, results)
|
|
153
|
+
|
|
154
|
+
# Stabilise before committing a letter to the sentence
|
|
155
|
+
if p is not None:
|
|
156
|
+
predictions.append(int(np.argmax(probs)))
|
|
157
|
+
if (
|
|
158
|
+
len(predictions) >= STABILITY_WINDOW
|
|
159
|
+
and len(np.unique(predictions[-STABILITY_WINDOW:])) == 1
|
|
160
|
+
and letter is not None
|
|
161
|
+
):
|
|
162
|
+
if not sentence or letter != sentence[-1]:
|
|
163
|
+
sentence.append(letter)
|
|
164
|
+
if len(sentence) > 12:
|
|
165
|
+
sentence = sentence[-12:]
|
|
166
|
+
|
|
167
|
+
now = time.time()
|
|
168
|
+
inst = 1.0 / max(now - prev_t, 1e-6)
|
|
169
|
+
fps = 0.9 * fps + 0.1 * inst if fps else inst
|
|
170
|
+
prev_t = now
|
|
171
|
+
|
|
172
|
+
top_conf = float(probs[np.argmax(probs)]) if probs.any() else 0.0
|
|
173
|
+
_draw_prob_panel(image, probs)
|
|
174
|
+
_draw_sentence_bar(image, sentence, top_conf, threshold)
|
|
175
|
+
_draw_status(image, fps, hand_present)
|
|
176
|
+
|
|
177
|
+
cv2.imshow(window_name, image)
|
|
178
|
+
key = cv2.waitKey(10) & 0xFF
|
|
179
|
+
if key == ord('q'):
|
|
180
|
+
break
|
|
181
|
+
elif key == ord('c'):
|
|
182
|
+
sentence = []
|
|
183
|
+
elif key == ord(' '):
|
|
184
|
+
sentence.append(' ')
|
|
185
|
+
finally:
|
|
186
|
+
cap.release()
|
|
187
|
+
cv2.destroyAllWindows()
|
|
188
|
+
rec.close()
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Command-line interface for signspell."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
from . import __version__
|
|
6
|
+
from .app import run
|
|
7
|
+
from .config import DEFAULT_THRESHOLD
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main(argv=None):
|
|
11
|
+
parser = argparse.ArgumentParser(
|
|
12
|
+
prog="signspell",
|
|
13
|
+
description="Live ASL fingerspelling alphabet recognition.",
|
|
14
|
+
)
|
|
15
|
+
parser.add_argument(
|
|
16
|
+
"-c", "--camera", type=int, default=0,
|
|
17
|
+
help="Camera index (default: 0).",
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"-m", "--model", default=None,
|
|
21
|
+
help="Path to a custom .h5 model (default: bundled model).",
|
|
22
|
+
)
|
|
23
|
+
parser.add_argument(
|
|
24
|
+
"-t", "--threshold", type=float, default=DEFAULT_THRESHOLD,
|
|
25
|
+
help=f"Confidence threshold (default: {DEFAULT_THRESHOLD}).",
|
|
26
|
+
)
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--no-mirror", action="store_true",
|
|
29
|
+
help="Disable the mirrored webcam view.",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"-V", "--version", action="version",
|
|
33
|
+
version=f"signspell {__version__}",
|
|
34
|
+
)
|
|
35
|
+
args = parser.parse_args(argv)
|
|
36
|
+
|
|
37
|
+
run(
|
|
38
|
+
model_path=args.model,
|
|
39
|
+
camera=args.camera,
|
|
40
|
+
threshold=args.threshold,
|
|
41
|
+
mirror=not args.no_mirror,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
main()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Shared configuration and constants."""
|
|
2
|
+
|
|
3
|
+
from importlib import resources
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
# Model input contract — must match how the model was trained.
|
|
7
|
+
ACTIONS = np.array(
|
|
8
|
+
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
|
9
|
+
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
|
|
10
|
+
)
|
|
11
|
+
SEQUENCE_LENGTH = 30 # frames per prediction window
|
|
12
|
+
NUM_KEYPOINTS = 21 * 3 # right hand: 21 landmarks x (x, y, z)
|
|
13
|
+
|
|
14
|
+
DEFAULT_THRESHOLD = 0.5
|
|
15
|
+
STABILITY_WINDOW = 10 # consecutive agreeing frames before a letter sticks
|
|
16
|
+
|
|
17
|
+
# Bundled model filename (lives in asl_alphabet/models/)
|
|
18
|
+
MODEL_FILENAME = "alphabet_30_30_all_4.h5"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def default_model_path() -> str:
|
|
22
|
+
"""Return the filesystem path to the model bundled inside the package."""
|
|
23
|
+
with resources.as_file(
|
|
24
|
+
resources.files("asl_alphabet.models") / MODEL_FILENAME
|
|
25
|
+
) as p:
|
|
26
|
+
return str(p)
|
|
Binary file
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Headless recognition engine.
|
|
3
|
+
|
|
4
|
+
The Recognizer wraps MediaPipe Holistic + the trained LSTM and exposes a
|
|
5
|
+
simple frame-in / prediction-out API. It holds the rolling 30-frame buffer
|
|
6
|
+
internally so callers just feed frames.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections import deque
|
|
10
|
+
from typing import Optional, Tuple
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from .config import (
|
|
15
|
+
ACTIONS,
|
|
16
|
+
SEQUENCE_LENGTH,
|
|
17
|
+
NUM_KEYPOINTS,
|
|
18
|
+
DEFAULT_THRESHOLD,
|
|
19
|
+
default_model_path,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Recognizer:
|
|
24
|
+
"""Live ASL alphabet recognizer.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
model_path : str, optional
|
|
29
|
+
Path to a Keras .h5 model. Defaults to the bundled model.
|
|
30
|
+
threshold : float
|
|
31
|
+
Minimum softmax confidence for a prediction to count as a letter.
|
|
32
|
+
min_detection_confidence, min_tracking_confidence : float
|
|
33
|
+
Passed straight to MediaPipe Holistic.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
model_path: Optional[str] = None,
|
|
39
|
+
threshold: float = DEFAULT_THRESHOLD,
|
|
40
|
+
min_detection_confidence: float = 0.5,
|
|
41
|
+
min_tracking_confidence: float = 0.5,
|
|
42
|
+
):
|
|
43
|
+
# Imported lazily so `import signspell` is cheap and so that a missing
|
|
44
|
+
# heavy dep surfaces a clear message only when you actually run.
|
|
45
|
+
import mediapipe as mp
|
|
46
|
+
from tensorflow.keras.models import load_model
|
|
47
|
+
|
|
48
|
+
self.threshold = threshold
|
|
49
|
+
self._model = load_model(model_path or default_model_path())
|
|
50
|
+
|
|
51
|
+
self._mp_holistic = mp.solutions.holistic
|
|
52
|
+
self._holistic = self._mp_holistic.Holistic(
|
|
53
|
+
min_detection_confidence=min_detection_confidence,
|
|
54
|
+
min_tracking_confidence=min_tracking_confidence,
|
|
55
|
+
)
|
|
56
|
+
self._sequence = deque(maxlen=SEQUENCE_LENGTH)
|
|
57
|
+
self._last_results = None
|
|
58
|
+
|
|
59
|
+
# -- public API --------------------------------------------------------
|
|
60
|
+
def process(self, frame_bgr):
|
|
61
|
+
"""Run MediaPipe on a BGR frame; returns the raw results object."""
|
|
62
|
+
import cv2
|
|
63
|
+
|
|
64
|
+
image_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
|
|
65
|
+
image_rgb.flags.writeable = False
|
|
66
|
+
results = self._holistic.process(image_rgb)
|
|
67
|
+
self._last_results = results
|
|
68
|
+
return results
|
|
69
|
+
|
|
70
|
+
def predict(
|
|
71
|
+
self, frame_bgr
|
|
72
|
+
) -> Tuple[Optional[str], float, Optional[np.ndarray]]:
|
|
73
|
+
"""Feed one frame, return (letter, confidence, full_prob_vector).
|
|
74
|
+
|
|
75
|
+
`letter` is None until the 30-frame buffer fills or when confidence
|
|
76
|
+
is below threshold. `probabilities` is the length-26 softmax vector
|
|
77
|
+
(or None before the buffer is full).
|
|
78
|
+
"""
|
|
79
|
+
results = self.process(frame_bgr)
|
|
80
|
+
self._sequence.append(self._extract_keypoints(results))
|
|
81
|
+
|
|
82
|
+
if len(self._sequence) < SEQUENCE_LENGTH:
|
|
83
|
+
return None, 0.0, None
|
|
84
|
+
|
|
85
|
+
probs = self._model.predict(
|
|
86
|
+
np.expand_dims(np.array(self._sequence), axis=0), verbose=0
|
|
87
|
+
)[0]
|
|
88
|
+
idx = int(np.argmax(probs))
|
|
89
|
+
conf = float(probs[idx])
|
|
90
|
+
letter = ACTIONS[idx] if conf >= self.threshold else None
|
|
91
|
+
return letter, conf, probs
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def last_results(self):
|
|
95
|
+
"""MediaPipe results from the most recent process()/predict() call."""
|
|
96
|
+
return self._last_results
|
|
97
|
+
|
|
98
|
+
def close(self):
|
|
99
|
+
self._holistic.close()
|
|
100
|
+
|
|
101
|
+
def __enter__(self):
|
|
102
|
+
return self
|
|
103
|
+
|
|
104
|
+
def __exit__(self, *exc):
|
|
105
|
+
self.close()
|
|
106
|
+
|
|
107
|
+
# -- internals ---------------------------------------------------------
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _extract_keypoints(results) -> np.ndarray:
|
|
110
|
+
if results.right_hand_landmarks:
|
|
111
|
+
return np.array(
|
|
112
|
+
[[r.x, r.y, r.z] for r in results.right_hand_landmarks.landmark]
|
|
113
|
+
).flatten()
|
|
114
|
+
return np.zeros(NUM_KEYPOINTS)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: signspell
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Live ASL fingerspelling alphabet recognition with a polished webcam UI.
|
|
5
|
+
Author-email: Sundar <you@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/TheMadrasTechie/signspell
|
|
8
|
+
Project-URL: Repository, https://github.com/TheMadrasTechie/signspell
|
|
9
|
+
Project-URL: Issues, https://github.com/TheMadrasTechie/signspell/issues
|
|
10
|
+
Keywords: asl,sign-language,computer-vision,mediapipe,fingerspelling,accessibility
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Education
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
20
|
+
Classifier: Topic :: Multimedia :: Video :: Capture
|
|
21
|
+
Requires-Python: <3.12,>=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy<2
|
|
25
|
+
Requires-Dist: opencv-python<4.10
|
|
26
|
+
Requires-Dist: mediapipe<0.11,>=0.10
|
|
27
|
+
Requires-Dist: tensorflow<2.16,>=2.15
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: build; extra == "dev"
|
|
30
|
+
Requires-Dist: twine; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# signspell
|
|
35
|
+
|
|
36
|
+
**Live ASL fingerspelling alphabet recognition — straight from your webcam.**
|
|
37
|
+
|
|
38
|
+
`signspell` recognises the American Sign Language manual alphabet (A–Z) in real
|
|
39
|
+
time using MediaPipe hand tracking and an LSTM model trained on 30-frame
|
|
40
|
+
keypoint sequences. It ships with a pretrained model and a polished webcam UI,
|
|
41
|
+
and it works both as a command-line tool and an importable library.
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install signspell
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
> Requires Python 3.9–3.11. A webcam is required for live recognition.
|
|
52
|
+
|
|
53
|
+
## Run it (CLI)
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
signspell # default webcam, bundled model, pro UI
|
|
57
|
+
signspell --camera 1 # pick a different camera
|
|
58
|
+
signspell --threshold 0.6 # require higher confidence
|
|
59
|
+
signspell --no-mirror # disable mirrored view
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**In-window keys:** `q` quit · `c` clear sentence · `SPACE` add a space.
|
|
63
|
+
|
|
64
|
+
## Use it (library)
|
|
65
|
+
|
|
66
|
+
Run the full UI from code:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import signspell
|
|
70
|
+
signspell.run()
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Or drive recognition yourself, frame by frame:
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import cv2
|
|
77
|
+
import signspell
|
|
78
|
+
|
|
79
|
+
rec = signspell.Recognizer()
|
|
80
|
+
cap = cv2.VideoCapture(0)
|
|
81
|
+
|
|
82
|
+
while True:
|
|
83
|
+
ok, frame = cap.read()
|
|
84
|
+
if not ok:
|
|
85
|
+
break
|
|
86
|
+
letter, confidence, probs = rec.predict(frame)
|
|
87
|
+
if letter:
|
|
88
|
+
print(letter, f"{confidence:.2f}")
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The `Recognizer` keeps a rolling 30-frame buffer internally, so you just feed
|
|
92
|
+
frames and read predictions. `letter` is `None` until the buffer fills or when
|
|
93
|
+
confidence is below the threshold.
|
|
94
|
+
|
|
95
|
+
## How it works
|
|
96
|
+
|
|
97
|
+
1. **MediaPipe Holistic** extracts 21 right-hand landmarks per frame.
|
|
98
|
+
2. The last **30 frames** of `(x, y, z)` keypoints form a sequence.
|
|
99
|
+
3. An **LSTM** classifies the sequence into one of 26 letters.
|
|
100
|
+
4. A short stability window prevents flicker before a letter is committed.
|
|
101
|
+
|
|
102
|
+
## Bring your own model
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
signspell --model path/to/your_model.h5
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
signspell.Recognizer(model_path="path/to/your_model.h5")
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Your model must accept input shape `(1, 30, 63)` and output 26 class scores.
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
|
|
116
|
+
MIT © Sundar
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
3
|
+
README.md
|
|
4
|
+
pyproject.toml
|
|
5
|
+
src/asl_alphabet/__init__.py
|
|
6
|
+
src/asl_alphabet/app.py
|
|
7
|
+
src/asl_alphabet/cli.py
|
|
8
|
+
src/asl_alphabet/config.py
|
|
9
|
+
src/asl_alphabet/recognizer.py
|
|
10
|
+
src/asl_alphabet/models/alphabet_30_30_all_4.h5
|
|
11
|
+
src/signspell.egg-info/PKG-INFO
|
|
12
|
+
src/signspell.egg-info/SOURCES.txt
|
|
13
|
+
src/signspell.egg-info/dependency_links.txt
|
|
14
|
+
src/signspell.egg-info/entry_points.txt
|
|
15
|
+
src/signspell.egg-info/requires.txt
|
|
16
|
+
src/signspell.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
asl_alphabet
|