sip-lab 1.23.0 → 1.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/binding.gyp +4 -0
- package/build_deps.sh +19 -0
- package/index.js +18 -0
- package/package.json +3 -2
- package/pocketsphinx/model/CMakeLists.txt +3 -0
- package/pocketsphinx/model/en-us/cmudict-en-us.dict +134782 -0
- package/pocketsphinx/model/en-us/en-us/README +34 -0
- package/pocketsphinx/model/en-us/en-us/feat.params +12 -0
- package/pocketsphinx/model/en-us/en-us/mdef +0 -0
- package/pocketsphinx/model/en-us/en-us/means +0 -0
- package/pocketsphinx/model/en-us/en-us/noisedict +5 -0
- package/pocketsphinx/model/en-us/en-us/sendump +0 -0
- package/pocketsphinx/model/en-us/en-us/transition_matrices +0 -0
- package/pocketsphinx/model/en-us/en-us/variances +0 -0
- package/pocketsphinx/model/en-us/en-us-phone.lm.bin +0 -0
- package/pocketsphinx/model/en-us/en-us.lm.bin +0 -0
- package/prebuilds/linux-x64/sip-lab.node +0 -0
- package/samples/artifacts/hello_good_morning.wav +0 -0
- package/samples/play_wav_and_speech_recog.bad_transcript.pcmu8000.js +182 -0
- package/samples/speech_synth_and_recog.speex16000.js +186 -0
- package/samples/tcp_and_extra_headers.js +44 -1
- package/src/addon.cpp +37 -0
- package/src/event_templates.cpp +6 -0
- package/src/event_templates.hpp +2 -0
- package/src/pjmedia/include/pjmedia/pocketsphinx_port.h +19 -0
- package/src/pjmedia/src/pjmedia/flite_port.c +4 -7
- package/src/pjmedia/src/pjmedia/pocketsphinx_port.c +169 -46
- package/src/sip.cpp +398 -273
- package/src/sip.hpp +3 -0
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
|
-
#include <
|
|
21
|
+
#include <pocketsphinx_port.h>
|
|
22
22
|
#include <pjmedia/errno.h>
|
|
23
23
|
#include <pjmedia/port.h>
|
|
24
24
|
#include <pj/assert.h>
|
|
@@ -44,43 +44,31 @@ struct pocketsphinx_t
|
|
|
44
44
|
{
|
|
45
45
|
struct pjmedia_port base;
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
/*
|
|
52
|
-
* Register a callback to be called when we get translation
|
|
53
|
-
*/
|
|
54
|
-
PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_set_speech_cb(pjmedia_port *port,
|
|
55
|
-
void *user_data,
|
|
56
|
-
void (*cb)(pjmedia_port *port,
|
|
57
|
-
void *usr_data, char *transcript))
|
|
58
|
-
{
|
|
59
|
-
struct pocketsphinx_t *flite;
|
|
47
|
+
ps_decoder_t *decoder;
|
|
48
|
+
ps_config_t *config;
|
|
49
|
+
ps_endpointer_t *ep;
|
|
60
50
|
|
|
61
|
-
|
|
62
|
-
|
|
51
|
+
unsigned in_spf;
|
|
52
|
+
unsigned out_spf;
|
|
63
53
|
|
|
64
|
-
|
|
65
|
-
|
|
54
|
+
short *samples;
|
|
55
|
+
unsigned sample_count;
|
|
66
56
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
pocketsphinx->cb = cb;
|
|
71
|
-
|
|
72
|
-
return PJ_SUCCESS;
|
|
73
|
-
}
|
|
57
|
+
pj_bool_t subscribed;
|
|
58
|
+
void (*cb)(pjmedia_port*, void*, char*);
|
|
59
|
+
void *cb_user_data;
|
|
74
60
|
|
|
61
|
+
char transcript[4096];
|
|
62
|
+
};
|
|
75
63
|
|
|
76
64
|
static pj_status_t speech_on_event(pjmedia_event *event,
|
|
77
65
|
void *user_data)
|
|
78
66
|
{
|
|
79
|
-
struct pocketsphinx_t *
|
|
67
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*)user_data;
|
|
80
68
|
|
|
81
69
|
if (event->type == PJMEDIA_EVENT_CALLBACK) {
|
|
82
|
-
if (
|
|
83
|
-
(*
|
|
70
|
+
if (port->cb)
|
|
71
|
+
(*port->cb)(&port->base, port->cb_user_data, port->transcript);
|
|
84
72
|
}
|
|
85
73
|
|
|
86
74
|
return PJ_SUCCESS;
|
|
@@ -91,52 +79,176 @@ PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_create( pj_pool_t *pool,
|
|
|
91
79
|
unsigned channel_count,
|
|
92
80
|
unsigned samples_per_frame,
|
|
93
81
|
unsigned bits_per_sample,
|
|
82
|
+
void (*cb)(pjmedia_port*, void *user_data, char *transcript),
|
|
83
|
+
void *cb_user_data,
|
|
94
84
|
pjmedia_port **p_port)
|
|
95
85
|
{
|
|
96
|
-
struct
|
|
86
|
+
struct pocketsphinx_t *port;
|
|
97
87
|
const pj_str_t name = pj_str("pocketsphinx");
|
|
98
88
|
|
|
99
|
-
PJ_ASSERT_RETURN(pool && clock_rate && channel_count &&
|
|
89
|
+
PJ_ASSERT_RETURN(pool && clock_rate && channel_count == 1 &&
|
|
100
90
|
samples_per_frame && bits_per_sample == 16 &&
|
|
101
91
|
p_port != NULL, PJ_EINVAL);
|
|
102
92
|
|
|
103
93
|
PJ_ASSERT_RETURN(pool && p_port, PJ_EINVAL);
|
|
104
94
|
|
|
105
|
-
|
|
95
|
+
port = PJ_POOL_ZALLOC_T(pool, struct pocketsphinx_t);
|
|
106
96
|
PJ_ASSERT_RETURN(pool != NULL, PJ_ENOMEM);
|
|
107
97
|
|
|
108
|
-
pjmedia_port_info_init(&
|
|
98
|
+
pjmedia_port_info_init(&port->base.info, &name, SIGNATURE, clock_rate,
|
|
109
99
|
channel_count, bits_per_sample, samples_per_frame);
|
|
110
100
|
|
|
111
|
-
|
|
112
|
-
|
|
101
|
+
port->base.put_frame = &pocketsphinx_put_frame;
|
|
102
|
+
port->base.on_destroy = &pocketsphinx_on_destroy;
|
|
113
103
|
|
|
114
|
-
|
|
115
|
-
|
|
104
|
+
port->cb = cb;
|
|
105
|
+
port->cb_user_data = cb_user_data;
|
|
116
106
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
107
|
+
port->config = ps_config_init(NULL);
|
|
108
|
+
ps_default_search_args(port->config);
|
|
109
|
+
|
|
110
|
+
if ((port->decoder = ps_init(port->config)) == NULL) {
|
|
111
|
+
TRACE_((THIS_FILE, "pocketsphinx port: decoder init failed\n"));
|
|
112
|
+
return !PJ_SUCCESS;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if ((port->ep = ps_endpointer_init(0, 0.0, 0, clock_rate, 0)) == NULL) {
|
|
116
|
+
TRACE_((THIS_FILE, "pocketsphinx port: endpointer init failed\n"));
|
|
117
|
+
return !PJ_SUCCESS;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
port->in_spf = samples_per_frame;
|
|
121
|
+
port->out_spf = ps_endpointer_frame_size(port->ep);
|
|
121
122
|
|
|
122
|
-
|
|
123
|
+
port->samples = (short*) pj_pool_alloc(pool, port->out_spf * sizeof(short));
|
|
124
|
+
if (port->samples == NULL) {
|
|
125
|
+
TRACE_(("Failed to allocate buffer for samples\n"));
|
|
126
|
+
return !PJ_SUCCESS;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
TRACE_((THIS_FILE, "pocketsphinx port created: %u/%u/%u/%u", clock_rate,
|
|
123
130
|
channel_count, samples_per_frame, bits_per_sample));
|
|
124
131
|
|
|
125
|
-
|
|
132
|
+
printf("pocketsphinx_create in_spf=%i out_spf=%i\n", port->in_spf, port->out_spf);
|
|
133
|
+
*p_port = &port->base;
|
|
126
134
|
return PJ_SUCCESS;
|
|
127
135
|
}
|
|
128
136
|
|
|
137
|
+
void feed(struct pocketsphinx_t *port, short *frame) {
|
|
138
|
+
const int16 *speech;
|
|
139
|
+
int prev_in_speech = ps_endpointer_in_speech(port->ep);
|
|
140
|
+
speech = ps_endpointer_process(port->ep, frame);
|
|
141
|
+
if (speech != NULL) {
|
|
142
|
+
const char *hyp;
|
|
143
|
+
if (!prev_in_speech) {
|
|
144
|
+
printf("pocketsphinx speech start at %.2f\n", ps_endpointer_speech_start(port->ep));
|
|
145
|
+
ps_start_utt(port->decoder);
|
|
146
|
+
}
|
|
147
|
+
if (ps_process_raw(port->decoder, speech, port->out_spf, FALSE, FALSE) < 0) {
|
|
148
|
+
printf("pocketsphinx ps_process_raw() failed\n");
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
|
|
152
|
+
//printf("pocketsphinx partial result: %s\n", hyp);
|
|
153
|
+
}
|
|
154
|
+
if (!ps_endpointer_in_speech(port->ep)) {
|
|
155
|
+
printf("Speech end at %.2f\n", ps_endpointer_speech_end(port->ep));
|
|
156
|
+
ps_end_utt(port->decoder);
|
|
157
|
+
if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
|
|
158
|
+
printf("pocketsphinx speech: %s\n", hyp);
|
|
159
|
+
if(strlen(hyp) == 0) return;
|
|
160
|
+
|
|
161
|
+
strncpy(port->transcript, hyp, sizeof(port->transcript) - 1);
|
|
162
|
+
|
|
163
|
+
// Ensure the destination string is null-terminated
|
|
164
|
+
port->transcript[sizeof(port->transcript) - 1] = '\0';
|
|
165
|
+
|
|
166
|
+
if(port->cb) {
|
|
167
|
+
if (!port->subscribed) {
|
|
168
|
+
pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
|
|
169
|
+
port, port);
|
|
170
|
+
port->subscribed = (status == PJ_SUCCESS)? PJ_TRUE:
|
|
171
|
+
PJ_FALSE;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (port->subscribed) {
|
|
175
|
+
pjmedia_event event;
|
|
176
|
+
|
|
177
|
+
pjmedia_event_init(&event, PJMEDIA_EVENT_CALLBACK,
|
|
178
|
+
NULL, port);
|
|
179
|
+
pjmedia_event_publish(NULL, port, &event,
|
|
180
|
+
PJMEDIA_EVENT_PUBLISH_POST_EVENT);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
unsigned feed_one(struct pocketsphinx_t *port, pjmedia_frame *frame){
|
|
189
|
+
unsigned used_samples = port->out_spf - port->sample_count;
|
|
190
|
+
memcpy((short*)port->samples + port->sample_count, frame->buf, used_samples * sizeof(short));
|
|
191
|
+
feed(port, port->samples);
|
|
192
|
+
port->sample_count = 0;
|
|
193
|
+
return used_samples;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
void feed_all(struct pocketsphinx_t *port, pjmedia_frame *frame) {
|
|
197
|
+
unsigned samples = frame->size / 2;
|
|
198
|
+
unsigned used_samples = 0;
|
|
199
|
+
if(port->sample_count > 0) {
|
|
200
|
+
used_samples = feed_one(port, frame);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
short *out_frame = frame->buf + used_samples;
|
|
204
|
+
samples -= used_samples;
|
|
205
|
+
unsigned count = 0;
|
|
206
|
+
while(samples >= port->out_spf) {
|
|
207
|
+
feed(port, out_frame);
|
|
208
|
+
count++;
|
|
209
|
+
out_frame += (count * port->out_spf);
|
|
210
|
+
samples -= port->out_spf;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if(samples) {
|
|
214
|
+
memcpy(port->samples, out_frame, samples * sizeof(short));
|
|
215
|
+
port->sample_count = samples;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
129
219
|
static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
|
|
130
220
|
pjmedia_frame *frame)
|
|
131
221
|
{
|
|
132
222
|
if(frame->type != PJMEDIA_FRAME_TYPE_AUDIO) return PJ_SUCCESS;
|
|
133
223
|
|
|
134
|
-
struct
|
|
135
|
-
dtmf_rx(&dport->state, (const pj_int16_t*)frame->buf,
|
|
136
|
-
PJMEDIA_PIA_SPF(&dport->base.info));
|
|
224
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
|
|
137
225
|
|
|
138
|
-
|
|
226
|
+
if(port->in_spf == port->out_spf) {
|
|
227
|
+
//printf("feed\n");
|
|
228
|
+
feed(port, (short*)frame->buf);
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
139
231
|
|
|
232
|
+
if(port->in_spf > port->out_spf) {
|
|
233
|
+
//printf("feed_all\n");
|
|
234
|
+
feed_all(port, frame);
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
unsigned samples = frame->size / 2;
|
|
239
|
+
if(samples + port->sample_count >= port->out_spf) {
|
|
240
|
+
// enough to feed once
|
|
241
|
+
//printf("feed_one\n");
|
|
242
|
+
feed_one(port, frame);
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// not enough to feed.
|
|
247
|
+
//printf("not enough to feed\n");
|
|
248
|
+
memcpy((short*)port->samples + port->sample_count, frame->buf, samples * sizeof(short));
|
|
249
|
+
port->sample_count += samples;
|
|
250
|
+
|
|
251
|
+
return PJ_SUCCESS;
|
|
140
252
|
}
|
|
141
253
|
|
|
142
254
|
/*
|
|
@@ -144,6 +256,17 @@ static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
|
|
|
144
256
|
*/
|
|
145
257
|
static pj_status_t pocketsphinx_on_destroy(pjmedia_port *this_port)
|
|
146
258
|
{
|
|
259
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
|
|
260
|
+
|
|
261
|
+
ps_endpointer_free(port->ep);
|
|
262
|
+
ps_free(port->decoder);
|
|
263
|
+
ps_config_free(port->config);
|
|
264
|
+
|
|
265
|
+
if (port->subscribed) {
|
|
266
|
+
pjmedia_event_unsubscribe(NULL, &speech_on_event, port, port);
|
|
267
|
+
port->subscribed = PJ_FALSE;
|
|
268
|
+
}
|
|
269
|
+
|
|
147
270
|
return PJ_SUCCESS;
|
|
148
271
|
}
|
|
149
272
|
|