sip-lab 1.22.0 → 1.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/binding.gyp +4 -0
- package/build_deps.sh +21 -1
- package/index.js +19 -0
- package/package.json +3 -2
- package/pocketsphinx/model/CMakeLists.txt +3 -0
- package/pocketsphinx/model/en-us/cmudict-en-us.dict +134782 -0
- package/pocketsphinx/model/en-us/en-us/README +34 -0
- package/pocketsphinx/model/en-us/en-us/feat.params +12 -0
- package/pocketsphinx/model/en-us/en-us/mdef +0 -0
- package/pocketsphinx/model/en-us/en-us/means +0 -0
- package/pocketsphinx/model/en-us/en-us/noisedict +5 -0
- package/pocketsphinx/model/en-us/en-us/sendump +0 -0
- package/pocketsphinx/model/en-us/en-us/transition_matrices +0 -0
- package/pocketsphinx/model/en-us/en-us/variances +0 -0
- package/pocketsphinx/model/en-us/en-us-phone.lm.bin +0 -0
- package/pocketsphinx/model/en-us/en-us.lm.bin +0 -0
- package/prebuilds/linux-x64/sip-lab.node +0 -0
- package/samples/artifacts/hello_good_morning.wav +0 -0
- package/samples/play_wav_and_speech_recog.bad_transcript.pcmu8000.js +182 -0
- package/samples/speech_synth_and_recog.speex16000.js +186 -0
- package/samples/start_play_wav_with_end_of_file_event.js +269 -0
- package/samples/start_play_wav_with_no_loop.js +257 -0
- package/samples/tcp_and_extra_headers.js +47 -1
- package/samples/text_to_speech.js +22 -3
- package/src/addon.cpp +72 -0
- package/src/event_templates.cpp +20 -7
- package/src/event_templates.hpp +6 -0
- package/src/pjmedia/include/pjmedia/flite_port.h +10 -4
- package/src/pjmedia/include/pjmedia/pocketsphinx_port.h +19 -0
- package/src/pjmedia/src/pjmedia/flite_port.c +91 -25
- package/src/pjmedia/src/pjmedia/pocketsphinx_port.c +273 -0
- package/src/sip.cpp +707 -507
- package/src/sip.hpp +5 -0
package/src/addon.cpp
CHANGED
|
@@ -603,6 +603,41 @@ Napi::Value call_start_speech_synth(const Napi::CallbackInfo &info) {
|
|
|
603
603
|
return env.Null();
|
|
604
604
|
}
|
|
605
605
|
|
|
606
|
+
Napi::Value call_start_speech_recog(const Napi::CallbackInfo &info) {
|
|
607
|
+
Napi::Env env = info.Env();
|
|
608
|
+
|
|
609
|
+
if (info.Length() != 2) {
|
|
610
|
+
Napi::Error::New(env,
|
|
611
|
+
"Wrong number of arguments. Expected: call_id, params.")
|
|
612
|
+
.ThrowAsJavaScriptException();
|
|
613
|
+
return env.Null();
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
if (!info[0].IsNumber()) {
|
|
617
|
+
Napi::TypeError::New(env, "call_id must be number.")
|
|
618
|
+
.ThrowAsJavaScriptException();
|
|
619
|
+
return env.Null();
|
|
620
|
+
}
|
|
621
|
+
int call_id = info[0].As<Napi::Number>().Int32Value();
|
|
622
|
+
|
|
623
|
+
if (!info[1].IsString()) {
|
|
624
|
+
Napi::TypeError::New(env, "params must be a JSON string.")
|
|
625
|
+
.ThrowAsJavaScriptException();
|
|
626
|
+
return env.Null();
|
|
627
|
+
}
|
|
628
|
+
const string json = info[1].As<Napi::String>().Utf8Value();
|
|
629
|
+
|
|
630
|
+
int res = pjw_call_start_speech_recog(call_id, json.c_str());
|
|
631
|
+
|
|
632
|
+
if (res != 0) {
|
|
633
|
+
Napi::Error::New(env, pjw_get_error()).ThrowAsJavaScriptException();
|
|
634
|
+
return env.Null();
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
return env.Null();
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
|
|
606
641
|
Napi::Value call_stop_record_wav(const Napi::CallbackInfo &info) {
|
|
607
642
|
Napi::Env env = info.Env();
|
|
608
643
|
|
|
@@ -704,6 +739,40 @@ Napi::Value call_stop_fax(const Napi::CallbackInfo &info) {
|
|
|
704
739
|
return env.Null();
|
|
705
740
|
}
|
|
706
741
|
|
|
742
|
+
Napi::Value call_stop_speech_synth(const Napi::CallbackInfo &info) {
|
|
743
|
+
Napi::Env env = info.Env();
|
|
744
|
+
|
|
745
|
+
if (info.Length() != 2) {
|
|
746
|
+
Napi::Error::New(env, "Wrong number of arguments. Expected: call_id")
|
|
747
|
+
.ThrowAsJavaScriptException();
|
|
748
|
+
return env.Null();
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
if (!info[0].IsNumber()) {
|
|
752
|
+
Napi::TypeError::New(env, "call_id must be number.")
|
|
753
|
+
.ThrowAsJavaScriptException();
|
|
754
|
+
return env.Null();
|
|
755
|
+
}
|
|
756
|
+
int call_id = info[0].As<Napi::Number>().Int32Value();
|
|
757
|
+
|
|
758
|
+
if (!info[1].IsString()) {
|
|
759
|
+
Napi::TypeError::New(env, "params must be a JSON string.")
|
|
760
|
+
.ThrowAsJavaScriptException();
|
|
761
|
+
return env.Null();
|
|
762
|
+
}
|
|
763
|
+
const string json = info[1].As<Napi::String>().Utf8Value();
|
|
764
|
+
|
|
765
|
+
int res = pjw_call_stop_speech_synth(call_id, json.c_str());
|
|
766
|
+
|
|
767
|
+
if (res != 0) {
|
|
768
|
+
Napi::Error::New(env, pjw_get_error()).ThrowAsJavaScriptException();
|
|
769
|
+
return env.Null();
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
return env.Null();
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
|
|
707
776
|
Napi::Value call_get_stream_stat(const Napi::CallbackInfo &info) {
|
|
708
777
|
Napi::Env env = info.Env();
|
|
709
778
|
|
|
@@ -1302,11 +1371,14 @@ Napi::Object init(Napi::Env env, Napi::Object exports) {
|
|
|
1302
1371
|
|
|
1303
1372
|
exports.Set("call_start_speech_synth", Napi::Function::New(env, call_start_speech_synth));
|
|
1304
1373
|
|
|
1374
|
+
exports.Set("call_start_speech_recog", Napi::Function::New(env, call_start_speech_recog));
|
|
1375
|
+
|
|
1305
1376
|
exports.Set("call_stop_record_wav",
|
|
1306
1377
|
Napi::Function::New(env, call_stop_record_wav));
|
|
1307
1378
|
exports.Set("call_stop_play_wav",
|
|
1308
1379
|
Napi::Function::New(env, call_stop_play_wav));
|
|
1309
1380
|
exports.Set("call_stop_fax", Napi::Function::New(env, call_stop_fax));
|
|
1381
|
+
exports.Set("call_stop_speech_synth", Napi::Function::New(env, call_stop_speech_synth));
|
|
1310
1382
|
exports.Set("call_get_stream_stat",
|
|
1311
1383
|
Napi::Function::New(env, call_get_stream_stat));
|
|
1312
1384
|
// exports.Set("call_refer", Napi::Function::New(env, call_refer));
|
package/src/event_templates.cpp
CHANGED
|
@@ -49,14 +49,9 @@ int make_evt_dtmf(char *dest, int size, long call_id, int digits_len,
|
|
|
49
49
|
|
|
50
50
|
int make_evt_call_ended(char *dest, int size, long call_id, int sip_msg_len,
|
|
51
51
|
const char *sip_msg) {
|
|
52
|
-
printf("make_evt_call_ended sip_msg_len=%i sip_msg=%
|
|
52
|
+
printf("make_evt_call_ended sip_msg_len=%i sip_msg=%p\n", sip_msg_len,
|
|
53
53
|
sip_msg);
|
|
54
|
-
if (
|
|
55
|
-
// received invalid pointer to sip_msg so do not add the message to the
|
|
56
|
-
// event
|
|
57
|
-
return snprintf(dest, size, "{\"event\": \"call_ended\", \"call_id\": %ld}",
|
|
58
|
-
call_id);
|
|
59
|
-
} else if (sip_msg_len > 500 && sip_msg_len < 2000 && sip_msg) {
|
|
54
|
+
if (sip_msg_len > 500 && sip_msg_len < 2000 && sip_msg) {
|
|
60
55
|
/* sip_msg_len sometimes show up as a large value like sip_msg_len=11560297
|
|
61
56
|
* which seems to be a bug in pjsip */
|
|
62
57
|
return snprintf(dest, size,
|
|
@@ -104,6 +99,24 @@ int make_evt_fax_result(char *dest, int size, long call_id, int result) {
|
|
|
104
99
|
result);
|
|
105
100
|
}
|
|
106
101
|
|
|
102
|
+
int make_evt_end_of_file(char *dest, int size, long call_id) {
|
|
103
|
+
return snprintf(
|
|
104
|
+
dest, size,
|
|
105
|
+
"{\"event\": \"end_of_file\", \"call_id\": %ld}", call_id);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
int make_evt_end_of_speech(char *dest, int size, long call_id) {
|
|
109
|
+
return snprintf(
|
|
110
|
+
dest, size,
|
|
111
|
+
"{\"event\": \"end_of_speech\", \"call_id\": %ld}", call_id);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
int make_evt_speech_transcript(char *dest, int size, long call_id, char* transcript) {
|
|
115
|
+
return snprintf(
|
|
116
|
+
dest, size,
|
|
117
|
+
"{\"event\": \"speech_transcript\", \"call_id\": %ld, \"transcript\": \"%s\"}", call_id, transcript);
|
|
118
|
+
}
|
|
119
|
+
|
|
107
120
|
int make_evt_tcp_msg(char *dest, int size, long call_id, const char *protocol, char *data, int data_len) {
|
|
108
121
|
return snprintf(
|
|
109
122
|
dest, size,
|
package/src/event_templates.hpp
CHANGED
|
@@ -34,6 +34,12 @@ int make_evt_registration_status(char *dest, int size, long account_id,
|
|
|
34
34
|
|
|
35
35
|
int make_evt_fax_result(char *dest, int size, long call_id, int result);
|
|
36
36
|
|
|
37
|
+
int make_evt_end_of_file(char *dest, int size, long call_id);
|
|
38
|
+
|
|
39
|
+
int make_evt_end_of_speech(char *dest, int size, long call_id);
|
|
40
|
+
|
|
41
|
+
int make_evt_speech_transcript(char *dest, int size, long call_id, char* transcript);
|
|
42
|
+
|
|
37
43
|
int make_evt_tcp_msg(char *dest, int size, long call_id, const char *protocol, char *data, int data_len);
|
|
38
44
|
|
|
39
45
|
#endif
|
|
@@ -5,18 +5,24 @@
|
|
|
5
5
|
|
|
6
6
|
PJ_BEGIN_DECL
|
|
7
7
|
|
|
8
|
+
enum pjmedia_filte_option
|
|
9
|
+
{
|
|
10
|
+
PJMEDIA_SPEECH_NO_LOOP = 1
|
|
11
|
+
};
|
|
12
|
+
|
|
8
13
|
PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
|
|
9
14
|
unsigned clock_rate,
|
|
10
15
|
unsigned channel_count,
|
|
11
16
|
unsigned samples_per_frame,
|
|
12
17
|
unsigned bits_per_sample,
|
|
13
|
-
void (*cb)(pjmedia_port*,
|
|
14
|
-
void *user_data,
|
|
15
|
-
int result),
|
|
16
|
-
void *user_data,
|
|
17
18
|
const char *voice,
|
|
18
19
|
pjmedia_port **p_port);
|
|
19
20
|
|
|
21
|
+
PJ_DEF(pj_status_t) pjmedia_flite_port_set_eof_cb(pjmedia_port *port,
|
|
22
|
+
void *user_data,
|
|
23
|
+
void (*cb)(pjmedia_port *port,
|
|
24
|
+
void *usr_data));
|
|
25
|
+
|
|
20
26
|
PJ_DEF(pj_status_t) pjmedia_flite_port_speak( pjmedia_port *port,
|
|
21
27
|
const char *text,
|
|
22
28
|
unsigned options);
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#ifndef __POCKETSPHINX_PORT_H__
|
|
2
|
+
#define __POCKETSPHINX_PORT_H__
|
|
3
|
+
|
|
4
|
+
#include <pjmedia/port.h>
|
|
5
|
+
|
|
6
|
+
PJ_BEGIN_DECL
|
|
7
|
+
|
|
8
|
+
PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_create( pj_pool_t *pool,
|
|
9
|
+
unsigned clock_rate,
|
|
10
|
+
unsigned channel_count,
|
|
11
|
+
unsigned samples_per_frame,
|
|
12
|
+
unsigned bits_per_sample,
|
|
13
|
+
void (*cb)(pjmedia_port*, void *user_data, char *transcript),
|
|
14
|
+
void *cb_user_data,
|
|
15
|
+
pjmedia_port **p_port);
|
|
16
|
+
|
|
17
|
+
PJ_END_DECL
|
|
18
|
+
|
|
19
|
+
#endif /* __POCKETSPHINX_PORT_H__ */
|
|
@@ -53,26 +53,62 @@ static struct {
|
|
|
53
53
|
|
|
54
54
|
struct flite_t {
|
|
55
55
|
struct pjmedia_port base;
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
unsigned options;
|
|
57
|
+
|
|
58
58
|
cst_voice *v;
|
|
59
|
-
|
|
59
|
+
unsigned written_samples;
|
|
60
60
|
cst_wave *w;
|
|
61
|
-
|
|
61
|
+
|
|
62
|
+
pj_bool_t subscribed;
|
|
63
|
+
void (*cb)(pjmedia_port*, void*);
|
|
62
64
|
};
|
|
63
65
|
|
|
64
66
|
#define free_wave(w) if (w) {delete_wave(w) ; w = NULL; }
|
|
65
67
|
#define FLITE_BLOCK_SIZE 1024 * 32
|
|
66
68
|
|
|
69
|
+
/*
|
|
70
|
+
* Register a callback to be called when we reach the end of speech
|
|
71
|
+
*/
|
|
72
|
+
PJ_DEF(pj_status_t) pjmedia_flite_port_set_eof_cb(pjmedia_port *port,
|
|
73
|
+
void *user_data,
|
|
74
|
+
void (*cb)(pjmedia_port *port,
|
|
75
|
+
void *usr_data))
|
|
76
|
+
{
|
|
77
|
+
struct flite_t *flite;
|
|
78
|
+
|
|
79
|
+
/* Sanity check */
|
|
80
|
+
PJ_ASSERT_RETURN(port, -PJ_EINVAL);
|
|
81
|
+
|
|
82
|
+
/* Check that this is really a flite port */
|
|
83
|
+
PJ_ASSERT_RETURN(port->info.signature == SIGNATURE, -PJ_EINVALIDOP);
|
|
84
|
+
|
|
85
|
+
flite = (struct flite_t*) port;
|
|
86
|
+
|
|
87
|
+
flite->base.port_data.pdata = user_data;
|
|
88
|
+
flite->cb = cb;
|
|
89
|
+
|
|
90
|
+
return PJ_SUCCESS;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
static pj_status_t speech_on_event(pjmedia_event *event,
|
|
95
|
+
void *user_data)
|
|
96
|
+
{
|
|
97
|
+
struct flite_t *flite = (struct flite_t*)user_data;
|
|
98
|
+
|
|
99
|
+
if (event->type == PJMEDIA_EVENT_CALLBACK) {
|
|
100
|
+
if (flite->cb)
|
|
101
|
+
(*flite->cb)(&flite->base, flite->base.port_data.pdata);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return PJ_SUCCESS;
|
|
105
|
+
}
|
|
106
|
+
|
|
67
107
|
PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
|
|
68
108
|
unsigned clock_rate,
|
|
69
109
|
unsigned channel_count,
|
|
70
110
|
unsigned samples_per_frame,
|
|
71
111
|
unsigned bits_per_sample,
|
|
72
|
-
void (*cb)(pjmedia_port*,
|
|
73
|
-
void *user_data,
|
|
74
|
-
int result),
|
|
75
|
-
void *user_data,
|
|
76
112
|
const char *voice,
|
|
77
113
|
pjmedia_port **p_port)
|
|
78
114
|
{
|
|
@@ -107,10 +143,7 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
|
|
|
107
143
|
if (!strcasecmp(voice, "awb")) {
|
|
108
144
|
flite->v = globals.awb;
|
|
109
145
|
} else if (!strcasecmp(voice, "kal")) {
|
|
110
|
-
|
|
111
|
-
* so kal talks a little bit too fast ...
|
|
112
|
-
* for now: "symlink" kal to kal16
|
|
113
|
-
*/ flite->v = globals.kal16;
|
|
146
|
+
flite->v = globals.kal; // this uses SamplingRate of 8000. All others use 16000
|
|
114
147
|
} else if (!strcasecmp(voice, "rms")) {
|
|
115
148
|
flite->v = globals.rms;
|
|
116
149
|
} else if (!strcasecmp(voice, "slt")) {
|
|
@@ -122,9 +155,6 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
|
|
|
122
155
|
return 0;
|
|
123
156
|
}
|
|
124
157
|
|
|
125
|
-
flite->flite_cb = cb;
|
|
126
|
-
flite->flite_cb_user_data = user_data;
|
|
127
|
-
|
|
128
158
|
TRACE_((THIS_FILE, "flite_device created: %u/%u/%u/%u", clock_rate,
|
|
129
159
|
channel_count, samples_per_frame, bits_per_sample));
|
|
130
160
|
|
|
@@ -140,9 +170,12 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_speak( pjmedia_port *port,
|
|
|
140
170
|
free_wave(flite->w);
|
|
141
171
|
}
|
|
142
172
|
|
|
173
|
+
flite->options = options;
|
|
174
|
+
|
|
143
175
|
flite->w = flite_text_to_wave(text, flite->v);
|
|
144
|
-
if (flite->w->sample_rate != PJMEDIA_PIA_SRATE(&port->info)) {
|
|
145
|
-
|
|
176
|
+
if ((unsigned)flite->w->sample_rate != PJMEDIA_PIA_SRATE(&port->info)) {
|
|
177
|
+
printf("resampling from %i to %i\n", flite->w->sample_rate, PJMEDIA_PIA_SRATE(&port->info));
|
|
178
|
+
cst_wave_resample(flite->w, PJMEDIA_PIA_SRATE(&port->info));
|
|
146
179
|
}
|
|
147
180
|
flite->written_samples = 0;
|
|
148
181
|
|
|
@@ -158,23 +191,48 @@ static pj_status_t flite_get_frame(pjmedia_port *port,
|
|
|
158
191
|
struct flite_t *flite = (struct flite_t*)port;
|
|
159
192
|
|
|
160
193
|
if(!flite->w) {
|
|
161
|
-
printf("flite no data\n");
|
|
194
|
+
//printf("flite no data\n");
|
|
162
195
|
frame->type = PJMEDIA_FRAME_TYPE_NONE;
|
|
163
196
|
return PJ_SUCCESS;
|
|
164
197
|
}
|
|
165
198
|
|
|
166
|
-
printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
|
|
167
|
-
if (flite->written_samples + PJMEDIA_PIA_SPF(&port->info) > flite->w->num_samples) {
|
|
168
|
-
printf("flite
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
199
|
+
//printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
|
|
200
|
+
if (flite->written_samples + PJMEDIA_PIA_SPF(&port->info) > (unsigned)flite->w->num_samples) {
|
|
201
|
+
printf("flite end of speech\n");
|
|
202
|
+
|
|
203
|
+
if(flite->cb) {
|
|
204
|
+
if (!flite->subscribed) {
|
|
205
|
+
pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
|
|
206
|
+
flite, flite);
|
|
207
|
+
flite->subscribed = (status == PJ_SUCCESS)? PJ_TRUE:
|
|
208
|
+
PJ_FALSE;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (flite->subscribed) {
|
|
212
|
+
pjmedia_event event;
|
|
213
|
+
|
|
214
|
+
pjmedia_event_init(&event, PJMEDIA_EVENT_CALLBACK,
|
|
215
|
+
NULL, flite);
|
|
216
|
+
pjmedia_event_publish(NULL, flite, &event,
|
|
217
|
+
PJMEDIA_EVENT_PUBLISH_POST_EVENT);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
pj_bool_t no_loop = (flite->options & PJMEDIA_SPEECH_NO_LOOP);
|
|
222
|
+
|
|
223
|
+
if(no_loop) {
|
|
224
|
+
free_wave(flite->w);
|
|
225
|
+
frame->type = PJMEDIA_FRAME_TYPE_NONE;
|
|
226
|
+
return PJ_SUCCESS;
|
|
227
|
+
} else {
|
|
228
|
+
flite->written_samples = 0;
|
|
229
|
+
}
|
|
172
230
|
}
|
|
173
231
|
|
|
174
232
|
memcpy(frame->buf, flite->w->samples + flite->written_samples, PJMEDIA_PIA_SPF(&port->info)*2);
|
|
175
233
|
flite->written_samples += PJMEDIA_PIA_SPF(&port->info);
|
|
176
234
|
frame->type = PJMEDIA_FRAME_TYPE_AUDIO;
|
|
177
|
-
printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
|
|
235
|
+
//printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
|
|
178
236
|
|
|
179
237
|
return PJ_SUCCESS;
|
|
180
238
|
}
|
|
@@ -187,8 +245,16 @@ static pj_status_t flite_on_destroy(pjmedia_port *port)
|
|
|
187
245
|
printf("flite_on_destroy\n");
|
|
188
246
|
|
|
189
247
|
struct flite_t *flite = (struct flite_t*)port;
|
|
248
|
+
|
|
249
|
+
pj_assert(port->info.signature == SIGNATURE);
|
|
250
|
+
|
|
190
251
|
free_wave(flite->w);
|
|
191
252
|
|
|
253
|
+
if (flite->subscribed) {
|
|
254
|
+
pjmedia_event_unsubscribe(NULL, &speech_on_event, flite, flite);
|
|
255
|
+
flite->subscribed = PJ_FALSE;
|
|
256
|
+
}
|
|
257
|
+
|
|
192
258
|
return PJ_SUCCESS;
|
|
193
259
|
}
|
|
194
260
|
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
/* $Id: pocketsphinx_port.c 0000 2024-03-09 mayamatakeshi $ */
|
|
2
|
+
/*
|
|
3
|
+
* Copyright (C) 2008-2009 Teluu Inc. (http://www.teluu.com)
|
|
4
|
+
* Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
|
|
5
|
+
*
|
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
* (at your option) any later version.
|
|
10
|
+
*
|
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
* GNU General Public License for more details.
|
|
15
|
+
*
|
|
16
|
+
* You should have received a copy of the GNU General Public License
|
|
17
|
+
* along with this program; if not, write to the Free Software
|
|
18
|
+
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
#include <pocketsphinx_port.h>
|
|
22
|
+
#include <pjmedia/errno.h>
|
|
23
|
+
#include <pjmedia/port.h>
|
|
24
|
+
#include <pj/assert.h>
|
|
25
|
+
#include <pj/pool.h>
|
|
26
|
+
#include <pj/string.h>
|
|
27
|
+
|
|
28
|
+
#include <pocketsphinx.h>
|
|
29
|
+
|
|
30
|
+
#define SIGNATURE PJMEDIA_SIGNATURE('p', 'i', 'n', 'x')
|
|
31
|
+
#define THIS_FILE "pocketsphinx_port.c"
|
|
32
|
+
|
|
33
|
+
#if 0
|
|
34
|
+
# define TRACE_(expr) PJ_LOG(4,expr)
|
|
35
|
+
#else
|
|
36
|
+
# define TRACE_(expr)
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
|
|
40
|
+
pjmedia_frame *frame);
|
|
41
|
+
static pj_status_t pocketsphinx_on_destroy(pjmedia_port *this_port);
|
|
42
|
+
|
|
43
|
+
struct pocketsphinx_t
|
|
44
|
+
{
|
|
45
|
+
struct pjmedia_port base;
|
|
46
|
+
|
|
47
|
+
ps_decoder_t *decoder;
|
|
48
|
+
ps_config_t *config;
|
|
49
|
+
ps_endpointer_t *ep;
|
|
50
|
+
|
|
51
|
+
unsigned in_spf;
|
|
52
|
+
unsigned out_spf;
|
|
53
|
+
|
|
54
|
+
short *samples;
|
|
55
|
+
unsigned sample_count;
|
|
56
|
+
|
|
57
|
+
pj_bool_t subscribed;
|
|
58
|
+
void (*cb)(pjmedia_port*, void*, char*);
|
|
59
|
+
void *cb_user_data;
|
|
60
|
+
|
|
61
|
+
char transcript[4096];
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
static pj_status_t speech_on_event(pjmedia_event *event,
|
|
65
|
+
void *user_data)
|
|
66
|
+
{
|
|
67
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*)user_data;
|
|
68
|
+
|
|
69
|
+
if (event->type == PJMEDIA_EVENT_CALLBACK) {
|
|
70
|
+
if (port->cb)
|
|
71
|
+
(*port->cb)(&port->base, port->cb_user_data, port->transcript);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return PJ_SUCCESS;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_create( pj_pool_t *pool,
|
|
78
|
+
unsigned clock_rate,
|
|
79
|
+
unsigned channel_count,
|
|
80
|
+
unsigned samples_per_frame,
|
|
81
|
+
unsigned bits_per_sample,
|
|
82
|
+
void (*cb)(pjmedia_port*, void *user_data, char *transcript),
|
|
83
|
+
void *cb_user_data,
|
|
84
|
+
pjmedia_port **p_port)
|
|
85
|
+
{
|
|
86
|
+
struct pocketsphinx_t *port;
|
|
87
|
+
const pj_str_t name = pj_str("pocketsphinx");
|
|
88
|
+
|
|
89
|
+
PJ_ASSERT_RETURN(pool && clock_rate && channel_count == 1 &&
|
|
90
|
+
samples_per_frame && bits_per_sample == 16 &&
|
|
91
|
+
p_port != NULL, PJ_EINVAL);
|
|
92
|
+
|
|
93
|
+
PJ_ASSERT_RETURN(pool && p_port, PJ_EINVAL);
|
|
94
|
+
|
|
95
|
+
port = PJ_POOL_ZALLOC_T(pool, struct pocketsphinx_t);
|
|
96
|
+
PJ_ASSERT_RETURN(pool != NULL, PJ_ENOMEM);
|
|
97
|
+
|
|
98
|
+
pjmedia_port_info_init(&port->base.info, &name, SIGNATURE, clock_rate,
|
|
99
|
+
channel_count, bits_per_sample, samples_per_frame);
|
|
100
|
+
|
|
101
|
+
port->base.put_frame = &pocketsphinx_put_frame;
|
|
102
|
+
port->base.on_destroy = &pocketsphinx_on_destroy;
|
|
103
|
+
|
|
104
|
+
port->cb = cb;
|
|
105
|
+
port->cb_user_data = cb_user_data;
|
|
106
|
+
|
|
107
|
+
port->config = ps_config_init(NULL);
|
|
108
|
+
ps_default_search_args(port->config);
|
|
109
|
+
|
|
110
|
+
if ((port->decoder = ps_init(port->config)) == NULL) {
|
|
111
|
+
TRACE_((THIS_FILE, "pocketsphinx port: decoder init failed\n"));
|
|
112
|
+
return !PJ_SUCCESS;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if ((port->ep = ps_endpointer_init(0, 0.0, 0, clock_rate, 0)) == NULL) {
|
|
116
|
+
TRACE_((THIS_FILE, "pocketsphinx port: endpointer init failed\n"));
|
|
117
|
+
return !PJ_SUCCESS;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
port->in_spf = samples_per_frame;
|
|
121
|
+
port->out_spf = ps_endpointer_frame_size(port->ep);
|
|
122
|
+
|
|
123
|
+
port->samples = (short*) pj_pool_alloc(pool, port->out_spf * sizeof(short));
|
|
124
|
+
if (port->samples == NULL) {
|
|
125
|
+
TRACE_(("Failed to allocate buffer for samples\n"));
|
|
126
|
+
return !PJ_SUCCESS;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
TRACE_((THIS_FILE, "pocketsphinx port created: %u/%u/%u/%u", clock_rate,
|
|
130
|
+
channel_count, samples_per_frame, bits_per_sample));
|
|
131
|
+
|
|
132
|
+
printf("pocketsphinx_create in_spf=%i out_spf=%i\n", port->in_spf, port->out_spf);
|
|
133
|
+
*p_port = &port->base;
|
|
134
|
+
return PJ_SUCCESS;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
void feed(struct pocketsphinx_t *port, short *frame) {
|
|
138
|
+
const int16 *speech;
|
|
139
|
+
int prev_in_speech = ps_endpointer_in_speech(port->ep);
|
|
140
|
+
speech = ps_endpointer_process(port->ep, frame);
|
|
141
|
+
if (speech != NULL) {
|
|
142
|
+
const char *hyp;
|
|
143
|
+
if (!prev_in_speech) {
|
|
144
|
+
printf("pocketsphinx speech start at %.2f\n", ps_endpointer_speech_start(port->ep));
|
|
145
|
+
ps_start_utt(port->decoder);
|
|
146
|
+
}
|
|
147
|
+
if (ps_process_raw(port->decoder, speech, port->out_spf, FALSE, FALSE) < 0) {
|
|
148
|
+
printf("pocketsphinx ps_process_raw() failed\n");
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
|
|
152
|
+
//printf("pocketsphinx partial result: %s\n", hyp);
|
|
153
|
+
}
|
|
154
|
+
if (!ps_endpointer_in_speech(port->ep)) {
|
|
155
|
+
printf("Speech end at %.2f\n", ps_endpointer_speech_end(port->ep));
|
|
156
|
+
ps_end_utt(port->decoder);
|
|
157
|
+
if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
|
|
158
|
+
printf("pocketsphinx speech: %s\n", hyp);
|
|
159
|
+
if(strlen(hyp) == 0) return;
|
|
160
|
+
|
|
161
|
+
strncpy(port->transcript, hyp, sizeof(port->transcript) - 1);
|
|
162
|
+
|
|
163
|
+
// Ensure the destination string is null-terminated
|
|
164
|
+
port->transcript[sizeof(port->transcript) - 1] = '\0';
|
|
165
|
+
|
|
166
|
+
if(port->cb) {
|
|
167
|
+
if (!port->subscribed) {
|
|
168
|
+
pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
|
|
169
|
+
port, port);
|
|
170
|
+
port->subscribed = (status == PJ_SUCCESS)? PJ_TRUE:
|
|
171
|
+
PJ_FALSE;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (port->subscribed) {
|
|
175
|
+
pjmedia_event event;
|
|
176
|
+
|
|
177
|
+
pjmedia_event_init(&event, PJMEDIA_EVENT_CALLBACK,
|
|
178
|
+
NULL, port);
|
|
179
|
+
pjmedia_event_publish(NULL, port, &event,
|
|
180
|
+
PJMEDIA_EVENT_PUBLISH_POST_EVENT);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
unsigned feed_one(struct pocketsphinx_t *port, pjmedia_frame *frame){
|
|
189
|
+
unsigned used_samples = port->out_spf - port->sample_count;
|
|
190
|
+
memcpy((short*)port->samples + port->sample_count, frame->buf, used_samples * sizeof(short));
|
|
191
|
+
feed(port, port->samples);
|
|
192
|
+
port->sample_count = 0;
|
|
193
|
+
return used_samples;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
void feed_all(struct pocketsphinx_t *port, pjmedia_frame *frame) {
|
|
197
|
+
unsigned samples = frame->size / 2;
|
|
198
|
+
unsigned used_samples = 0;
|
|
199
|
+
if(port->sample_count > 0) {
|
|
200
|
+
used_samples = feed_one(port, frame);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
short *out_frame = frame->buf + used_samples;
|
|
204
|
+
samples -= used_samples;
|
|
205
|
+
unsigned count = 0;
|
|
206
|
+
while(samples >= port->out_spf) {
|
|
207
|
+
feed(port, out_frame);
|
|
208
|
+
count++;
|
|
209
|
+
out_frame += (count * port->out_spf);
|
|
210
|
+
samples -= port->out_spf;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if(samples) {
|
|
214
|
+
memcpy(port->samples, out_frame, samples * sizeof(short));
|
|
215
|
+
port->sample_count = samples;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
|
|
220
|
+
pjmedia_frame *frame)
|
|
221
|
+
{
|
|
222
|
+
if(frame->type != PJMEDIA_FRAME_TYPE_AUDIO) return PJ_SUCCESS;
|
|
223
|
+
|
|
224
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
|
|
225
|
+
|
|
226
|
+
if(port->in_spf == port->out_spf) {
|
|
227
|
+
//printf("feed\n");
|
|
228
|
+
feed(port, (short*)frame->buf);
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if(port->in_spf > port->out_spf) {
|
|
233
|
+
//printf("feed_all\n");
|
|
234
|
+
feed_all(port, frame);
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
unsigned samples = frame->size / 2;
|
|
239
|
+
if(samples + port->sample_count >= port->out_spf) {
|
|
240
|
+
// enough to feed once
|
|
241
|
+
//printf("feed_one\n");
|
|
242
|
+
feed_one(port, frame);
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// not enough to feed.
|
|
247
|
+
//printf("not enough to feed\n");
|
|
248
|
+
memcpy((short*)port->samples + port->sample_count, frame->buf, samples * sizeof(short));
|
|
249
|
+
port->sample_count += samples;
|
|
250
|
+
|
|
251
|
+
return PJ_SUCCESS;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/*
|
|
255
|
+
* Destroy port.
|
|
256
|
+
*/
|
|
257
|
+
static pj_status_t pocketsphinx_on_destroy(pjmedia_port *this_port)
|
|
258
|
+
{
|
|
259
|
+
struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
|
|
260
|
+
|
|
261
|
+
ps_endpointer_free(port->ep);
|
|
262
|
+
ps_free(port->decoder);
|
|
263
|
+
ps_config_free(port->config);
|
|
264
|
+
|
|
265
|
+
if (port->subscribed) {
|
|
266
|
+
pjmedia_event_unsubscribe(NULL, &speech_on_event, port, port);
|
|
267
|
+
port->subscribed = PJ_FALSE;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return PJ_SUCCESS;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
|