sip-lab 1.22.0 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +5 -4
  2. package/binding.gyp +4 -0
  3. package/build_deps.sh +21 -1
  4. package/index.js +19 -0
  5. package/package.json +3 -2
  6. package/pocketsphinx/model/CMakeLists.txt +3 -0
  7. package/pocketsphinx/model/en-us/cmudict-en-us.dict +134782 -0
  8. package/pocketsphinx/model/en-us/en-us/README +34 -0
  9. package/pocketsphinx/model/en-us/en-us/feat.params +12 -0
  10. package/pocketsphinx/model/en-us/en-us/mdef +0 -0
  11. package/pocketsphinx/model/en-us/en-us/means +0 -0
  12. package/pocketsphinx/model/en-us/en-us/noisedict +5 -0
  13. package/pocketsphinx/model/en-us/en-us/sendump +0 -0
  14. package/pocketsphinx/model/en-us/en-us/transition_matrices +0 -0
  15. package/pocketsphinx/model/en-us/en-us/variances +0 -0
  16. package/pocketsphinx/model/en-us/en-us-phone.lm.bin +0 -0
  17. package/pocketsphinx/model/en-us/en-us.lm.bin +0 -0
  18. package/prebuilds/linux-x64/sip-lab.node +0 -0
  19. package/samples/artifacts/hello_good_morning.wav +0 -0
  20. package/samples/play_wav_and_speech_recog.bad_transcript.pcmu8000.js +182 -0
  21. package/samples/speech_synth_and_recog.speex16000.js +186 -0
  22. package/samples/start_play_wav_with_end_of_file_event.js +269 -0
  23. package/samples/start_play_wav_with_no_loop.js +257 -0
  24. package/samples/tcp_and_extra_headers.js +47 -1
  25. package/samples/text_to_speech.js +22 -3
  26. package/src/addon.cpp +72 -0
  27. package/src/event_templates.cpp +20 -7
  28. package/src/event_templates.hpp +6 -0
  29. package/src/pjmedia/include/pjmedia/flite_port.h +10 -4
  30. package/src/pjmedia/include/pjmedia/pocketsphinx_port.h +19 -0
  31. package/src/pjmedia/src/pjmedia/flite_port.c +91 -25
  32. package/src/pjmedia/src/pjmedia/pocketsphinx_port.c +273 -0
  33. package/src/sip.cpp +707 -507
  34. package/src/sip.hpp +5 -0
package/src/addon.cpp CHANGED
@@ -603,6 +603,41 @@ Napi::Value call_start_speech_synth(const Napi::CallbackInfo &info) {
603
603
  return env.Null();
604
604
  }
605
605
 
606
+ Napi::Value call_start_speech_recog(const Napi::CallbackInfo &info) {
607
+ Napi::Env env = info.Env();
608
+
609
+ if (info.Length() != 2) {
610
+ Napi::Error::New(env,
611
+ "Wrong number of arguments. Expected: call_id, params.")
612
+ .ThrowAsJavaScriptException();
613
+ return env.Null();
614
+ }
615
+
616
+ if (!info[0].IsNumber()) {
617
+ Napi::TypeError::New(env, "call_id must be number.")
618
+ .ThrowAsJavaScriptException();
619
+ return env.Null();
620
+ }
621
+ int call_id = info[0].As<Napi::Number>().Int32Value();
622
+
623
+ if (!info[1].IsString()) {
624
+ Napi::TypeError::New(env, "params must be a JSON string.")
625
+ .ThrowAsJavaScriptException();
626
+ return env.Null();
627
+ }
628
+ const string json = info[1].As<Napi::String>().Utf8Value();
629
+
630
+ int res = pjw_call_start_speech_recog(call_id, json.c_str());
631
+
632
+ if (res != 0) {
633
+ Napi::Error::New(env, pjw_get_error()).ThrowAsJavaScriptException();
634
+ return env.Null();
635
+ }
636
+
637
+ return env.Null();
638
+ }
639
+
640
+
606
641
  Napi::Value call_stop_record_wav(const Napi::CallbackInfo &info) {
607
642
  Napi::Env env = info.Env();
608
643
 
@@ -704,6 +739,40 @@ Napi::Value call_stop_fax(const Napi::CallbackInfo &info) {
704
739
  return env.Null();
705
740
  }
706
741
 
742
+ Napi::Value call_stop_speech_synth(const Napi::CallbackInfo &info) {
743
+ Napi::Env env = info.Env();
744
+
745
+ if (info.Length() != 2) {
746
+ Napi::Error::New(env, "Wrong number of arguments. Expected: call_id")
747
+ .ThrowAsJavaScriptException();
748
+ return env.Null();
749
+ }
750
+
751
+ if (!info[0].IsNumber()) {
752
+ Napi::TypeError::New(env, "call_id must be number.")
753
+ .ThrowAsJavaScriptException();
754
+ return env.Null();
755
+ }
756
+ int call_id = info[0].As<Napi::Number>().Int32Value();
757
+
758
+ if (!info[1].IsString()) {
759
+ Napi::TypeError::New(env, "params must be a JSON string.")
760
+ .ThrowAsJavaScriptException();
761
+ return env.Null();
762
+ }
763
+ const string json = info[1].As<Napi::String>().Utf8Value();
764
+
765
+ int res = pjw_call_stop_speech_synth(call_id, json.c_str());
766
+
767
+ if (res != 0) {
768
+ Napi::Error::New(env, pjw_get_error()).ThrowAsJavaScriptException();
769
+ return env.Null();
770
+ }
771
+
772
+ return env.Null();
773
+ }
774
+
775
+
707
776
  Napi::Value call_get_stream_stat(const Napi::CallbackInfo &info) {
708
777
  Napi::Env env = info.Env();
709
778
 
@@ -1302,11 +1371,14 @@ Napi::Object init(Napi::Env env, Napi::Object exports) {
1302
1371
 
1303
1372
  exports.Set("call_start_speech_synth", Napi::Function::New(env, call_start_speech_synth));
1304
1373
 
1374
+ exports.Set("call_start_speech_recog", Napi::Function::New(env, call_start_speech_recog));
1375
+
1305
1376
  exports.Set("call_stop_record_wav",
1306
1377
  Napi::Function::New(env, call_stop_record_wav));
1307
1378
  exports.Set("call_stop_play_wav",
1308
1379
  Napi::Function::New(env, call_stop_play_wav));
1309
1380
  exports.Set("call_stop_fax", Napi::Function::New(env, call_stop_fax));
1381
+ exports.Set("call_stop_speech_synth", Napi::Function::New(env, call_stop_speech_synth));
1310
1382
  exports.Set("call_get_stream_stat",
1311
1383
  Napi::Function::New(env, call_get_stream_stat));
1312
1384
  // exports.Set("call_refer", Napi::Function::New(env, call_refer));
@@ -49,14 +49,9 @@ int make_evt_dtmf(char *dest, int size, long call_id, int digits_len,
49
49
 
50
50
  int make_evt_call_ended(char *dest, int size, long call_id, int sip_msg_len,
51
51
  const char *sip_msg) {
52
- printf("make_evt_call_ended sip_msg_len=%i sip_msg=%s\n", sip_msg_len,
52
+ printf("make_evt_call_ended sip_msg_len=%i sip_msg=%p\n", sip_msg_len,
53
53
  sip_msg);
54
- if (!sip_msg || sip_msg == (char *)0xc000000000000) {
55
- // received invalid pointer to sip_msg so do not add the message to the
56
- // event
57
- return snprintf(dest, size, "{\"event\": \"call_ended\", \"call_id\": %ld}",
58
- call_id);
59
- } else if (sip_msg_len > 500 && sip_msg_len < 2000 && sip_msg) {
54
+ if (sip_msg_len > 500 && sip_msg_len < 2000 && sip_msg) {
60
55
  /* sip_msg_len sometimes show up as a large value like sip_msg_len=11560297
61
56
  * which seems to be a bug in pjsip */
62
57
  return snprintf(dest, size,
@@ -104,6 +99,24 @@ int make_evt_fax_result(char *dest, int size, long call_id, int result) {
104
99
  result);
105
100
  }
106
101
 
102
+ int make_evt_end_of_file(char *dest, int size, long call_id) {
103
+ return snprintf(
104
+ dest, size,
105
+ "{\"event\": \"end_of_file\", \"call_id\": %ld}", call_id);
106
+ }
107
+
108
+ int make_evt_end_of_speech(char *dest, int size, long call_id) {
109
+ return snprintf(
110
+ dest, size,
111
+ "{\"event\": \"end_of_speech\", \"call_id\": %ld}", call_id);
112
+ }
113
+
114
+ int make_evt_speech_transcript(char *dest, int size, long call_id, char* transcript) {
115
+ return snprintf(
116
+ dest, size,
117
+ "{\"event\": \"speech_transcript\", \"call_id\": %ld, \"transcript\": \"%s\"}", call_id, transcript);
118
+ }
119
+
107
120
  int make_evt_tcp_msg(char *dest, int size, long call_id, const char *protocol, char *data, int data_len) {
108
121
  return snprintf(
109
122
  dest, size,
@@ -34,6 +34,12 @@ int make_evt_registration_status(char *dest, int size, long account_id,
34
34
 
35
35
  int make_evt_fax_result(char *dest, int size, long call_id, int result);
36
36
 
37
+ int make_evt_end_of_file(char *dest, int size, long call_id);
38
+
39
+ int make_evt_end_of_speech(char *dest, int size, long call_id);
40
+
41
+ int make_evt_speech_transcript(char *dest, int size, long call_id, char* transcript);
42
+
37
43
  int make_evt_tcp_msg(char *dest, int size, long call_id, const char *protocol, char *data, int data_len);
38
44
 
39
45
  #endif
@@ -5,18 +5,24 @@
5
5
 
6
6
  PJ_BEGIN_DECL
7
7
 
8
+ enum pjmedia_filte_option
9
+ {
10
+ PJMEDIA_SPEECH_NO_LOOP = 1
11
+ };
12
+
8
13
  PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
9
14
  unsigned clock_rate,
10
15
  unsigned channel_count,
11
16
  unsigned samples_per_frame,
12
17
  unsigned bits_per_sample,
13
- void (*cb)(pjmedia_port*,
14
- void *user_data,
15
- int result),
16
- void *user_data,
17
18
  const char *voice,
18
19
  pjmedia_port **p_port);
19
20
 
21
+ PJ_DEF(pj_status_t) pjmedia_flite_port_set_eof_cb(pjmedia_port *port,
22
+ void *user_data,
23
+ void (*cb)(pjmedia_port *port,
24
+ void *usr_data));
25
+
20
26
  PJ_DEF(pj_status_t) pjmedia_flite_port_speak( pjmedia_port *port,
21
27
  const char *text,
22
28
  unsigned options);
@@ -0,0 +1,19 @@
1
+ #ifndef __POCKETSPHINX_PORT_H__
2
+ #define __POCKETSPHINX_PORT_H__
3
+
4
+ #include <pjmedia/port.h>
5
+
6
+ PJ_BEGIN_DECL
7
+
8
+ PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_create( pj_pool_t *pool,
9
+ unsigned clock_rate,
10
+ unsigned channel_count,
11
+ unsigned samples_per_frame,
12
+ unsigned bits_per_sample,
13
+ void (*cb)(pjmedia_port*, void *user_data, char *transcript),
14
+ void *cb_user_data,
15
+ pjmedia_port **p_port);
16
+
17
+ PJ_END_DECL
18
+
19
+ #endif /* __POCKETSPHINX_PORT_H__ */
@@ -53,26 +53,62 @@ static struct {
53
53
 
54
54
  struct flite_t {
55
55
  struct pjmedia_port base;
56
- void (*flite_cb)(pjmedia_port*, void*, int);
57
- void *flite_cb_user_data;
56
+ unsigned options;
57
+
58
58
  cst_voice *v;
59
- int written_samples;
59
+ unsigned written_samples;
60
60
  cst_wave *w;
61
- char *buffer;
61
+
62
+ pj_bool_t subscribed;
63
+ void (*cb)(pjmedia_port*, void*);
62
64
  };
63
65
 
64
66
  #define free_wave(w) if (w) {delete_wave(w) ; w = NULL; }
65
67
  #define FLITE_BLOCK_SIZE 1024 * 32
66
68
 
69
+ /*
70
+ * Register a callback to be called when we reach the end of speech
71
+ */
72
+ PJ_DEF(pj_status_t) pjmedia_flite_port_set_eof_cb(pjmedia_port *port,
73
+ void *user_data,
74
+ void (*cb)(pjmedia_port *port,
75
+ void *usr_data))
76
+ {
77
+ struct flite_t *flite;
78
+
79
+ /* Sanity check */
80
+ PJ_ASSERT_RETURN(port, -PJ_EINVAL);
81
+
82
+ /* Check that this is really a flite port */
83
+ PJ_ASSERT_RETURN(port->info.signature == SIGNATURE, -PJ_EINVALIDOP);
84
+
85
+ flite = (struct flite_t*) port;
86
+
87
+ flite->base.port_data.pdata = user_data;
88
+ flite->cb = cb;
89
+
90
+ return PJ_SUCCESS;
91
+ }
92
+
93
+
94
+ static pj_status_t speech_on_event(pjmedia_event *event,
95
+ void *user_data)
96
+ {
97
+ struct flite_t *flite = (struct flite_t*)user_data;
98
+
99
+ if (event->type == PJMEDIA_EVENT_CALLBACK) {
100
+ if (flite->cb)
101
+ (*flite->cb)(&flite->base, flite->base.port_data.pdata);
102
+ }
103
+
104
+ return PJ_SUCCESS;
105
+ }
106
+
67
107
  PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
68
108
  unsigned clock_rate,
69
109
  unsigned channel_count,
70
110
  unsigned samples_per_frame,
71
111
  unsigned bits_per_sample,
72
- void (*cb)(pjmedia_port*,
73
- void *user_data,
74
- int result),
75
- void *user_data,
76
112
  const char *voice,
77
113
  pjmedia_port **p_port)
78
114
  {
@@ -107,10 +143,7 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
107
143
  if (!strcasecmp(voice, "awb")) {
108
144
  flite->v = globals.awb;
109
145
  } else if (!strcasecmp(voice, "kal")) {
110
- /* "kal" is 8kHz and the native rate is set to 16kHz
111
- * so kal talks a little bit too fast ...
112
- * for now: "symlink" kal to kal16
113
- */ flite->v = globals.kal16;
146
+ flite->v = globals.kal; // this uses SamplingRate of 8000. All others use 16000
114
147
  } else if (!strcasecmp(voice, "rms")) {
115
148
  flite->v = globals.rms;
116
149
  } else if (!strcasecmp(voice, "slt")) {
@@ -122,9 +155,6 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
122
155
  return 0;
123
156
  }
124
157
 
125
- flite->flite_cb = cb;
126
- flite->flite_cb_user_data = user_data;
127
-
128
158
  TRACE_((THIS_FILE, "flite_device created: %u/%u/%u/%u", clock_rate,
129
159
  channel_count, samples_per_frame, bits_per_sample));
130
160
 
@@ -140,9 +170,12 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_speak( pjmedia_port *port,
140
170
  free_wave(flite->w);
141
171
  }
142
172
 
173
+ flite->options = options;
174
+
143
175
  flite->w = flite_text_to_wave(text, flite->v);
144
- if (flite->w->sample_rate != PJMEDIA_PIA_SRATE(&port->info)) {
145
- cst_wave_resample(flite->w, PJMEDIA_PIA_SRATE(&port->info));
176
+ if ((unsigned)flite->w->sample_rate != PJMEDIA_PIA_SRATE(&port->info)) {
177
+ printf("resampling from %i to %i\n", flite->w->sample_rate, PJMEDIA_PIA_SRATE(&port->info));
178
+ cst_wave_resample(flite->w, PJMEDIA_PIA_SRATE(&port->info));
146
179
  }
147
180
  flite->written_samples = 0;
148
181
 
@@ -158,23 +191,48 @@ static pj_status_t flite_get_frame(pjmedia_port *port,
158
191
  struct flite_t *flite = (struct flite_t*)port;
159
192
 
160
193
  if(!flite->w) {
161
- printf("flite no data\n");
194
+ //printf("flite no data\n");
162
195
  frame->type = PJMEDIA_FRAME_TYPE_NONE;
163
196
  return PJ_SUCCESS;
164
197
  }
165
198
 
166
- printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
167
- if (flite->written_samples + PJMEDIA_PIA_SPF(&port->info) > flite->w->num_samples) {
168
- printf("flite no more data\n");
169
- free_wave(flite->w);
170
- frame->type = PJMEDIA_FRAME_TYPE_NONE;
171
- return PJ_SUCCESS;
199
+ //printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
200
+ if (flite->written_samples + PJMEDIA_PIA_SPF(&port->info) > (unsigned)flite->w->num_samples) {
201
+ printf("flite end of speech\n");
202
+
203
+ if(flite->cb) {
204
+ if (!flite->subscribed) {
205
+ pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
206
+ flite, flite);
207
+ flite->subscribed = (status == PJ_SUCCESS)? PJ_TRUE:
208
+ PJ_FALSE;
209
+ }
210
+
211
+ if (flite->subscribed) {
212
+ pjmedia_event event;
213
+
214
+ pjmedia_event_init(&event, PJMEDIA_EVENT_CALLBACK,
215
+ NULL, flite);
216
+ pjmedia_event_publish(NULL, flite, &event,
217
+ PJMEDIA_EVENT_PUBLISH_POST_EVENT);
218
+ }
219
+ }
220
+
221
+ pj_bool_t no_loop = (flite->options & PJMEDIA_SPEECH_NO_LOOP);
222
+
223
+ if(no_loop) {
224
+ free_wave(flite->w);
225
+ frame->type = PJMEDIA_FRAME_TYPE_NONE;
226
+ return PJ_SUCCESS;
227
+ } else {
228
+ flite->written_samples = 0;
229
+ }
172
230
  }
173
231
 
174
232
  memcpy(frame->buf, flite->w->samples + flite->written_samples, PJMEDIA_PIA_SPF(&port->info)*2);
175
233
  flite->written_samples += PJMEDIA_PIA_SPF(&port->info);
176
234
  frame->type = PJMEDIA_FRAME_TYPE_AUDIO;
177
- printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
235
+ //printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
178
236
 
179
237
  return PJ_SUCCESS;
180
238
  }
@@ -187,8 +245,16 @@ static pj_status_t flite_on_destroy(pjmedia_port *port)
187
245
  printf("flite_on_destroy\n");
188
246
 
189
247
  struct flite_t *flite = (struct flite_t*)port;
248
+
249
+ pj_assert(port->info.signature == SIGNATURE);
250
+
190
251
  free_wave(flite->w);
191
252
 
253
+ if (flite->subscribed) {
254
+ pjmedia_event_unsubscribe(NULL, &speech_on_event, flite, flite);
255
+ flite->subscribed = PJ_FALSE;
256
+ }
257
+
192
258
  return PJ_SUCCESS;
193
259
  }
194
260
 
@@ -0,0 +1,273 @@
1
+ /* $Id: pocketsphinx_port.c 0000 2024-03-09 mayamatakeshi $ */
2
+ /*
3
+ * Copyright (C) 2008-2009 Teluu Inc. (http://www.teluu.com)
4
+ * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5
+ *
6
+ * This program is free software; you can redistribute it and/or modify
7
+ * it under the terms of the GNU General Public License as published by
8
+ * the Free Software Foundation; either version 2 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ * GNU General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU General Public License
17
+ * along with this program; if not, write to the Free Software
18
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include <pocketsphinx_port.h>
22
+ #include <pjmedia/errno.h>
23
+ #include <pjmedia/port.h>
24
+ #include <pj/assert.h>
25
+ #include <pj/pool.h>
26
+ #include <pj/string.h>
27
+
28
+ #include <pocketsphinx.h>
29
+
30
+ #define SIGNATURE PJMEDIA_SIGNATURE('p', 'i', 'n', 'x')
31
+ #define THIS_FILE "pocketsphinx_port.c"
32
+
33
+ #if 0
34
+ # define TRACE_(expr) PJ_LOG(4,expr)
35
+ #else
36
+ # define TRACE_(expr)
37
+ #endif
38
+
39
+ static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
40
+ pjmedia_frame *frame);
41
+ static pj_status_t pocketsphinx_on_destroy(pjmedia_port *this_port);
42
+
43
+ struct pocketsphinx_t
44
+ {
45
+ struct pjmedia_port base;
46
+
47
+ ps_decoder_t *decoder;
48
+ ps_config_t *config;
49
+ ps_endpointer_t *ep;
50
+
51
+ unsigned in_spf;
52
+ unsigned out_spf;
53
+
54
+ short *samples;
55
+ unsigned sample_count;
56
+
57
+ pj_bool_t subscribed;
58
+ void (*cb)(pjmedia_port*, void*, char*);
59
+ void *cb_user_data;
60
+
61
+ char transcript[4096];
62
+ };
63
+
64
+ static pj_status_t speech_on_event(pjmedia_event *event,
65
+ void *user_data)
66
+ {
67
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*)user_data;
68
+
69
+ if (event->type == PJMEDIA_EVENT_CALLBACK) {
70
+ if (port->cb)
71
+ (*port->cb)(&port->base, port->cb_user_data, port->transcript);
72
+ }
73
+
74
+ return PJ_SUCCESS;
75
+ }
76
+
77
+ PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_create( pj_pool_t *pool,
78
+ unsigned clock_rate,
79
+ unsigned channel_count,
80
+ unsigned samples_per_frame,
81
+ unsigned bits_per_sample,
82
+ void (*cb)(pjmedia_port*, void *user_data, char *transcript),
83
+ void *cb_user_data,
84
+ pjmedia_port **p_port)
85
+ {
86
+ struct pocketsphinx_t *port;
87
+ const pj_str_t name = pj_str("pocketsphinx");
88
+
89
+ PJ_ASSERT_RETURN(pool && clock_rate && channel_count == 1 &&
90
+ samples_per_frame && bits_per_sample == 16 &&
91
+ p_port != NULL, PJ_EINVAL);
92
+
93
+ PJ_ASSERT_RETURN(pool && p_port, PJ_EINVAL);
94
+
95
+ port = PJ_POOL_ZALLOC_T(pool, struct pocketsphinx_t);
96
+ PJ_ASSERT_RETURN(pool != NULL, PJ_ENOMEM);
97
+
98
+ pjmedia_port_info_init(&port->base.info, &name, SIGNATURE, clock_rate,
99
+ channel_count, bits_per_sample, samples_per_frame);
100
+
101
+ port->base.put_frame = &pocketsphinx_put_frame;
102
+ port->base.on_destroy = &pocketsphinx_on_destroy;
103
+
104
+ port->cb = cb;
105
+ port->cb_user_data = cb_user_data;
106
+
107
+ port->config = ps_config_init(NULL);
108
+ ps_default_search_args(port->config);
109
+
110
+ if ((port->decoder = ps_init(port->config)) == NULL) {
111
+ TRACE_((THIS_FILE, "pocketsphinx port: decoder init failed\n"));
112
+ return !PJ_SUCCESS;
113
+ }
114
+
115
+ if ((port->ep = ps_endpointer_init(0, 0.0, 0, clock_rate, 0)) == NULL) {
116
+ TRACE_((THIS_FILE, "pocketsphinx port: endpointer init failed\n"));
117
+ return !PJ_SUCCESS;
118
+ }
119
+
120
+ port->in_spf = samples_per_frame;
121
+ port->out_spf = ps_endpointer_frame_size(port->ep);
122
+
123
+ port->samples = (short*) pj_pool_alloc(pool, port->out_spf * sizeof(short));
124
+ if (port->samples == NULL) {
125
+ TRACE_(("Failed to allocate buffer for samples\n"));
126
+ return !PJ_SUCCESS;
127
+ }
128
+
129
+ TRACE_((THIS_FILE, "pocketsphinx port created: %u/%u/%u/%u", clock_rate,
130
+ channel_count, samples_per_frame, bits_per_sample));
131
+
132
+ printf("pocketsphinx_create in_spf=%i out_spf=%i\n", port->in_spf, port->out_spf);
133
+ *p_port = &port->base;
134
+ return PJ_SUCCESS;
135
+ }
136
+
137
+ void feed(struct pocketsphinx_t *port, short *frame) {
138
+ const int16 *speech;
139
+ int prev_in_speech = ps_endpointer_in_speech(port->ep);
140
+ speech = ps_endpointer_process(port->ep, frame);
141
+ if (speech != NULL) {
142
+ const char *hyp;
143
+ if (!prev_in_speech) {
144
+ printf("pocketsphinx speech start at %.2f\n", ps_endpointer_speech_start(port->ep));
145
+ ps_start_utt(port->decoder);
146
+ }
147
+ if (ps_process_raw(port->decoder, speech, port->out_spf, FALSE, FALSE) < 0) {
148
+ printf("pocketsphinx ps_process_raw() failed\n");
149
+ return;
150
+ }
151
+ if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
152
+ //printf("pocketsphinx partial result: %s\n", hyp);
153
+ }
154
+ if (!ps_endpointer_in_speech(port->ep)) {
155
+ printf("Speech end at %.2f\n", ps_endpointer_speech_end(port->ep));
156
+ ps_end_utt(port->decoder);
157
+ if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
158
+ printf("pocketsphinx speech: %s\n", hyp);
159
+ if(strlen(hyp) == 0) return;
160
+
161
+ strncpy(port->transcript, hyp, sizeof(port->transcript) - 1);
162
+
163
+ // Ensure the destination string is null-terminated
164
+ port->transcript[sizeof(port->transcript) - 1] = '\0';
165
+
166
+ if(port->cb) {
167
+ if (!port->subscribed) {
168
+ pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
169
+ port, port);
170
+ port->subscribed = (status == PJ_SUCCESS)? PJ_TRUE:
171
+ PJ_FALSE;
172
+ }
173
+
174
+ if (port->subscribed) {
175
+ pjmedia_event event;
176
+
177
+ pjmedia_event_init(&event, PJMEDIA_EVENT_CALLBACK,
178
+ NULL, port);
179
+ pjmedia_event_publish(NULL, port, &event,
180
+ PJMEDIA_EVENT_PUBLISH_POST_EVENT);
181
+ }
182
+ }
183
+ }
184
+ }
185
+ }
186
+ }
187
+
188
+ unsigned feed_one(struct pocketsphinx_t *port, pjmedia_frame *frame){
189
+ unsigned used_samples = port->out_spf - port->sample_count;
190
+ memcpy((short*)port->samples + port->sample_count, frame->buf, used_samples * sizeof(short));
191
+ feed(port, port->samples);
192
+ port->sample_count = 0;
193
+ return used_samples;
194
+ }
195
+
196
+ void feed_all(struct pocketsphinx_t *port, pjmedia_frame *frame) {
197
+ unsigned samples = frame->size / 2;
198
+ unsigned used_samples = 0;
199
+ if(port->sample_count > 0) {
200
+ used_samples = feed_one(port, frame);
201
+ }
202
+
203
+ short *out_frame = frame->buf + used_samples;
204
+ samples -= used_samples;
205
+ unsigned count = 0;
206
+ while(samples >= port->out_spf) {
207
+ feed(port, out_frame);
208
+ count++;
209
+ out_frame += (count * port->out_spf);
210
+ samples -= port->out_spf;
211
+ }
212
+
213
+ if(samples) {
214
+ memcpy(port->samples, out_frame, samples * sizeof(short));
215
+ port->sample_count = samples;
216
+ }
217
+ }
218
+
219
+ static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
220
+ pjmedia_frame *frame)
221
+ {
222
+ if(frame->type != PJMEDIA_FRAME_TYPE_AUDIO) return PJ_SUCCESS;
223
+
224
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
225
+
226
+ if(port->in_spf == port->out_spf) {
227
+ //printf("feed\n");
228
+ feed(port, (short*)frame->buf);
229
+ return;
230
+ }
231
+
232
+ if(port->in_spf > port->out_spf) {
233
+ //printf("feed_all\n");
234
+ feed_all(port, frame);
235
+ return;
236
+ }
237
+
238
+ unsigned samples = frame->size / 2;
239
+ if(samples + port->sample_count >= port->out_spf) {
240
+ // enough to feed once
241
+ //printf("feed_one\n");
242
+ feed_one(port, frame);
243
+ return;
244
+ }
245
+
246
+ // not enough to feed.
247
+ //printf("not enough to feed\n");
248
+ memcpy((short*)port->samples + port->sample_count, frame->buf, samples * sizeof(short));
249
+ port->sample_count += samples;
250
+
251
+ return PJ_SUCCESS;
252
+ }
253
+
254
+ /*
255
+ * Destroy port.
256
+ */
257
+ static pj_status_t pocketsphinx_on_destroy(pjmedia_port *this_port)
258
+ {
259
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
260
+
261
+ ps_endpointer_free(port->ep);
262
+ ps_free(port->decoder);
263
+ ps_config_free(port->config);
264
+
265
+ if (port->subscribed) {
266
+ pjmedia_event_unsubscribe(NULL, &speech_on_event, port, port);
267
+ port->subscribed = PJ_FALSE;
268
+ }
269
+
270
+ return PJ_SUCCESS;
271
+ }
272
+
273
+