sip-lab 1.23.0 → 1.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/binding.gyp +4 -0
- package/build_deps.sh +19 -0
- package/index.js +18 -0
- package/package.json +4 -2
- package/pocketsphinx/model/CMakeLists.txt +3 -0
- package/pocketsphinx/model/en-us/cmudict-en-us.dict +134782 -0
- package/pocketsphinx/model/en-us/en-us/README +34 -0
- package/pocketsphinx/model/en-us/en-us/feat.params +12 -0
- package/pocketsphinx/model/en-us/en-us/mdef +0 -0
- package/pocketsphinx/model/en-us/en-us/means +0 -0
- package/pocketsphinx/model/en-us/en-us/noisedict +5 -0
- package/pocketsphinx/model/en-us/en-us/sendump +0 -0
- package/pocketsphinx/model/en-us/en-us/transition_matrices +0 -0
- package/pocketsphinx/model/en-us/en-us/variances +0 -0
- package/pocketsphinx/model/en-us/en-us-phone.lm.bin +0 -0
- package/pocketsphinx/model/en-us/en-us.lm.bin +0 -0
- package/prebuilds/linux-x64/sip-lab.node +0 -0
- package/runtests +80 -0
- package/samples/artifacts/hello_good_morning.wav +0 -0
- package/samples/play_wav_and_speech_recog.bad_transcript.pcmu8000.js +182 -0
- package/samples/speech_synth_and_recog.speex16000.js +186 -0
- package/samples/tcp_and_extra_headers.js +44 -1
- package/src/addon.cpp +37 -0
- package/src/event_templates.cpp +6 -0
- package/src/event_templates.hpp +2 -0
- package/src/pjmedia/include/pjmedia/pocketsphinx_port.h +19 -0
- package/src/pjmedia/src/pjmedia/flite_port.c +4 -7
- package/src/pjmedia/src/pjmedia/pocketsphinx_port.c +169 -46
- package/src/sip.cpp +464 -293
- package/src/sip.hpp +3 -0
|
@@ -143,10 +143,7 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
|
|
|
143
143
|
if (!strcasecmp(voice, "awb")) {
|
|
144
144
|
flite->v = globals.awb;
|
|
145
145
|
} else if (!strcasecmp(voice, "kal")) {
|
|
146
|
-
|
|
147
|
-
* so kal talks a little bit too fast ...
|
|
148
|
-
* for now: "symlink" kal to kal16
|
|
149
|
-
*/ flite->v = globals.kal16;
|
|
146
|
+
flite->v = globals.kal; // this uses SamplingRate of 8000. All others use 16000
|
|
150
147
|
} else if (!strcasecmp(voice, "rms")) {
|
|
151
148
|
flite->v = globals.rms;
|
|
152
149
|
} else if (!strcasecmp(voice, "slt")) {
|
|
@@ -194,12 +191,12 @@ static pj_status_t flite_get_frame(pjmedia_port *port,
|
|
|
194
191
|
struct flite_t *flite = (struct flite_t*)port;
|
|
195
192
|
|
|
196
193
|
if(!flite->w) {
|
|
197
|
-
printf("flite no data\n");
|
|
194
|
+
//printf("flite no data\n");
|
|
198
195
|
frame->type = PJMEDIA_FRAME_TYPE_NONE;
|
|
199
196
|
return PJ_SUCCESS;
|
|
200
197
|
}
|
|
201
198
|
|
|
202
|
-
printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
|
|
199
|
+
//printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
|
|
203
200
|
if (flite->written_samples + PJMEDIA_PIA_SPF(&port->info) > (unsigned)flite->w->num_samples) {
|
|
204
201
|
printf("flite end of speech\n");
|
|
205
202
|
|
|
@@ -235,7 +232,7 @@ static pj_status_t flite_get_frame(pjmedia_port *port,
|
|
|
235
232
|
memcpy(frame->buf, flite->w->samples + flite->written_samples, PJMEDIA_PIA_SPF(&port->info)*2);
|
|
236
233
|
flite->written_samples += PJMEDIA_PIA_SPF(&port->info);
|
|
237
234
|
frame->type = PJMEDIA_FRAME_TYPE_AUDIO;
|
|
238
|
-
printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
|
|
235
|
+
//printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
|
|
239
236
|
|
|
240
237
|
return PJ_SUCCESS;
|
|
241
238
|
}
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
|
-
#include <
|
|
21
|
+
#include <pocketsphinx_port.h>
|
|
22
22
|
#include <pjmedia/errno.h>
|
|
23
23
|
#include <pjmedia/port.h>
|
|
24
24
|
#include <pj/assert.h>
|
|
@@ -44,43 +44,31 @@ struct pocketsphinx_t
|
|
|
44
44
|
{
|
|
45
45
|
struct pjmedia_port base;
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
/*
|
|
52
|
-
* Register a callback to be called when we get translation
|
|
53
|
-
*/
|
|
54
|
-
PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_set_speech_cb(pjmedia_port *port,
|
|
55
|
-
void *user_data,
|
|
56
|
-
void (*cb)(pjmedia_port *port,
|
|
57
|
-
void *usr_data, char *transcript))
|
|
58
|
-
{
|
|
59
|
-
struct pocketsphinx_t *flite;
|
|
47
|
+
ps_decoder_t *decoder;
|
|
48
|
+
ps_config_t *config;
|
|
49
|
+
ps_endpointer_t *ep;
|
|
60
50
|
|
|
61
|
-
|
|
62
|
-
|
|
51
|
+
unsigned in_spf;
|
|
52
|
+
unsigned out_spf;
|
|
63
53
|
|
|
64
|
-
|
|
65
|
-
|
|
54
|
+
short *samples;
|
|
55
|
+
unsigned sample_count;
|
|
66
56
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
pocketsphinx->cb = cb;
|
|
71
|
-
|
|
72
|
-
return PJ_SUCCESS;
|
|
73
|
-
}
|
|
57
|
+
pj_bool_t subscribed;
|
|
58
|
+
void (*cb)(pjmedia_port*, void*, char*);
|
|
59
|
+
void *cb_user_data;
|
|
74
60
|
|
|
61
|
+
char transcript[4096];
|
|
62
|
+
};
|
|
75
63
|
|
|
76
64
|
static pj_status_t speech_on_event(pjmedia_event *event,
|
|
77
65
|
void *user_data)
|
|
78
66
|
{
|
|
79
|
-
struct pocketsphinx_t *
|
|
67
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*)user_data;
|
|
80
68
|
|
|
81
69
|
if (event->type == PJMEDIA_EVENT_CALLBACK) {
|
|
82
|
-
if (
|
|
83
|
-
(*
|
|
70
|
+
if (port->cb)
|
|
71
|
+
(*port->cb)(&port->base, port->cb_user_data, port->transcript);
|
|
84
72
|
}
|
|
85
73
|
|
|
86
74
|
return PJ_SUCCESS;
|
|
@@ -91,52 +79,176 @@ PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_create( pj_pool_t *pool,
|
|
|
91
79
|
unsigned channel_count,
|
|
92
80
|
unsigned samples_per_frame,
|
|
93
81
|
unsigned bits_per_sample,
|
|
82
|
+
void (*cb)(pjmedia_port*, void *user_data, char *transcript),
|
|
83
|
+
void *cb_user_data,
|
|
94
84
|
pjmedia_port **p_port)
|
|
95
85
|
{
|
|
96
|
-
struct
|
|
86
|
+
struct pocketsphinx_t *port;
|
|
97
87
|
const pj_str_t name = pj_str("pocketsphinx");
|
|
98
88
|
|
|
99
|
-
PJ_ASSERT_RETURN(pool && clock_rate && channel_count &&
|
|
89
|
+
PJ_ASSERT_RETURN(pool && clock_rate && channel_count == 1 &&
|
|
100
90
|
samples_per_frame && bits_per_sample == 16 &&
|
|
101
91
|
p_port != NULL, PJ_EINVAL);
|
|
102
92
|
|
|
103
93
|
PJ_ASSERT_RETURN(pool && p_port, PJ_EINVAL);
|
|
104
94
|
|
|
105
|
-
|
|
95
|
+
port = PJ_POOL_ZALLOC_T(pool, struct pocketsphinx_t);
|
|
106
96
|
PJ_ASSERT_RETURN(pool != NULL, PJ_ENOMEM);
|
|
107
97
|
|
|
108
|
-
pjmedia_port_info_init(&
|
|
98
|
+
pjmedia_port_info_init(&port->base.info, &name, SIGNATURE, clock_rate,
|
|
109
99
|
channel_count, bits_per_sample, samples_per_frame);
|
|
110
100
|
|
|
111
|
-
|
|
112
|
-
|
|
101
|
+
port->base.put_frame = &pocketsphinx_put_frame;
|
|
102
|
+
port->base.on_destroy = &pocketsphinx_on_destroy;
|
|
113
103
|
|
|
114
|
-
|
|
115
|
-
|
|
104
|
+
port->cb = cb;
|
|
105
|
+
port->cb_user_data = cb_user_data;
|
|
116
106
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
107
|
+
port->config = ps_config_init(NULL);
|
|
108
|
+
ps_default_search_args(port->config);
|
|
109
|
+
|
|
110
|
+
if ((port->decoder = ps_init(port->config)) == NULL) {
|
|
111
|
+
TRACE_((THIS_FILE, "pocketsphinx port: decoder init failed\n"));
|
|
112
|
+
return !PJ_SUCCESS;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if ((port->ep = ps_endpointer_init(0, 0.0, 0, clock_rate, 0)) == NULL) {
|
|
116
|
+
TRACE_((THIS_FILE, "pocketsphinx port: endpointer init failed\n"));
|
|
117
|
+
return !PJ_SUCCESS;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
port->in_spf = samples_per_frame;
|
|
121
|
+
port->out_spf = ps_endpointer_frame_size(port->ep);
|
|
121
122
|
|
|
122
|
-
|
|
123
|
+
port->samples = (short*) pj_pool_alloc(pool, port->out_spf * sizeof(short));
|
|
124
|
+
if (port->samples == NULL) {
|
|
125
|
+
TRACE_(("Failed to allocate buffer for samples\n"));
|
|
126
|
+
return !PJ_SUCCESS;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
TRACE_((THIS_FILE, "pocketsphinx port created: %u/%u/%u/%u", clock_rate,
|
|
123
130
|
channel_count, samples_per_frame, bits_per_sample));
|
|
124
131
|
|
|
125
|
-
|
|
132
|
+
printf("pocketsphinx_create in_spf=%i out_spf=%i\n", port->in_spf, port->out_spf);
|
|
133
|
+
*p_port = &port->base;
|
|
126
134
|
return PJ_SUCCESS;
|
|
127
135
|
}
|
|
128
136
|
|
|
137
|
+
void feed(struct pocketsphinx_t *port, short *frame) {
|
|
138
|
+
const int16 *speech;
|
|
139
|
+
int prev_in_speech = ps_endpointer_in_speech(port->ep);
|
|
140
|
+
speech = ps_endpointer_process(port->ep, frame);
|
|
141
|
+
if (speech != NULL) {
|
|
142
|
+
const char *hyp;
|
|
143
|
+
if (!prev_in_speech) {
|
|
144
|
+
printf("pocketsphinx speech start at %.2f\n", ps_endpointer_speech_start(port->ep));
|
|
145
|
+
ps_start_utt(port->decoder);
|
|
146
|
+
}
|
|
147
|
+
if (ps_process_raw(port->decoder, speech, port->out_spf, FALSE, FALSE) < 0) {
|
|
148
|
+
printf("pocketsphinx ps_process_raw() failed\n");
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
|
|
152
|
+
//printf("pocketsphinx partial result: %s\n", hyp);
|
|
153
|
+
}
|
|
154
|
+
if (!ps_endpointer_in_speech(port->ep)) {
|
|
155
|
+
printf("Speech end at %.2f\n", ps_endpointer_speech_end(port->ep));
|
|
156
|
+
ps_end_utt(port->decoder);
|
|
157
|
+
if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
|
|
158
|
+
printf("pocketsphinx speech: %s\n", hyp);
|
|
159
|
+
if(strlen(hyp) == 0) return;
|
|
160
|
+
|
|
161
|
+
strncpy(port->transcript, hyp, sizeof(port->transcript) - 1);
|
|
162
|
+
|
|
163
|
+
// Ensure the destination string is null-terminated
|
|
164
|
+
port->transcript[sizeof(port->transcript) - 1] = '\0';
|
|
165
|
+
|
|
166
|
+
if(port->cb) {
|
|
167
|
+
if (!port->subscribed) {
|
|
168
|
+
pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
|
|
169
|
+
port, port);
|
|
170
|
+
port->subscribed = (status == PJ_SUCCESS)? PJ_TRUE:
|
|
171
|
+
PJ_FALSE;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (port->subscribed) {
|
|
175
|
+
pjmedia_event event;
|
|
176
|
+
|
|
177
|
+
pjmedia_event_init(&event, PJMEDIA_EVENT_CALLBACK,
|
|
178
|
+
NULL, port);
|
|
179
|
+
pjmedia_event_publish(NULL, port, &event,
|
|
180
|
+
PJMEDIA_EVENT_PUBLISH_POST_EVENT);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
unsigned feed_one(struct pocketsphinx_t *port, pjmedia_frame *frame){
|
|
189
|
+
unsigned used_samples = port->out_spf - port->sample_count;
|
|
190
|
+
memcpy((short*)port->samples + port->sample_count, frame->buf, used_samples * sizeof(short));
|
|
191
|
+
feed(port, port->samples);
|
|
192
|
+
port->sample_count = 0;
|
|
193
|
+
return used_samples;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
void feed_all(struct pocketsphinx_t *port, pjmedia_frame *frame) {
|
|
197
|
+
unsigned samples = frame->size / 2;
|
|
198
|
+
unsigned used_samples = 0;
|
|
199
|
+
if(port->sample_count > 0) {
|
|
200
|
+
used_samples = feed_one(port, frame);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
short *out_frame = frame->buf + used_samples;
|
|
204
|
+
samples -= used_samples;
|
|
205
|
+
unsigned count = 0;
|
|
206
|
+
while(samples >= port->out_spf) {
|
|
207
|
+
feed(port, out_frame);
|
|
208
|
+
count++;
|
|
209
|
+
out_frame += (count * port->out_spf);
|
|
210
|
+
samples -= port->out_spf;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if(samples) {
|
|
214
|
+
memcpy(port->samples, out_frame, samples * sizeof(short));
|
|
215
|
+
port->sample_count = samples;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
129
219
|
static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
|
|
130
220
|
pjmedia_frame *frame)
|
|
131
221
|
{
|
|
132
222
|
if(frame->type != PJMEDIA_FRAME_TYPE_AUDIO) return PJ_SUCCESS;
|
|
133
223
|
|
|
134
|
-
struct
|
|
135
|
-
dtmf_rx(&dport->state, (const pj_int16_t*)frame->buf,
|
|
136
|
-
PJMEDIA_PIA_SPF(&dport->base.info));
|
|
224
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
|
|
137
225
|
|
|
138
|
-
|
|
226
|
+
if(port->in_spf == port->out_spf) {
|
|
227
|
+
//printf("feed\n");
|
|
228
|
+
feed(port, (short*)frame->buf);
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
139
231
|
|
|
232
|
+
if(port->in_spf > port->out_spf) {
|
|
233
|
+
//printf("feed_all\n");
|
|
234
|
+
feed_all(port, frame);
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
unsigned samples = frame->size / 2;
|
|
239
|
+
if(samples + port->sample_count >= port->out_spf) {
|
|
240
|
+
// enough to feed once
|
|
241
|
+
//printf("feed_one\n");
|
|
242
|
+
feed_one(port, frame);
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// not enough to feed.
|
|
247
|
+
//printf("not enough to feed\n");
|
|
248
|
+
memcpy((short*)port->samples + port->sample_count, frame->buf, samples * sizeof(short));
|
|
249
|
+
port->sample_count += samples;
|
|
250
|
+
|
|
251
|
+
return PJ_SUCCESS;
|
|
140
252
|
}
|
|
141
253
|
|
|
142
254
|
/*
|
|
@@ -144,6 +256,17 @@ static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
|
|
|
144
256
|
*/
|
|
145
257
|
static pj_status_t pocketsphinx_on_destroy(pjmedia_port *this_port)
|
|
146
258
|
{
|
|
259
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
|
|
260
|
+
|
|
261
|
+
ps_endpointer_free(port->ep);
|
|
262
|
+
ps_free(port->decoder);
|
|
263
|
+
ps_config_free(port->config);
|
|
264
|
+
|
|
265
|
+
if (port->subscribed) {
|
|
266
|
+
pjmedia_event_unsubscribe(NULL, &speech_on_event, port, port);
|
|
267
|
+
port->subscribed = PJ_FALSE;
|
|
268
|
+
}
|
|
269
|
+
|
|
147
270
|
return PJ_SUCCESS;
|
|
148
271
|
}
|
|
149
272
|
|