sip-lab 1.23.0 → 1.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -143,10 +143,7 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
143
143
  if (!strcasecmp(voice, "awb")) {
144
144
  flite->v = globals.awb;
145
145
  } else if (!strcasecmp(voice, "kal")) {
146
- /* "kal" is 8kHz and the native rate is set to 16kHz
147
- * so kal talks a little bit too fast ...
148
- * for now: "symlink" kal to kal16
149
- */ flite->v = globals.kal16;
146
+ flite->v = globals.kal; // this uses SamplingRate of 8000. All others use 16000
150
147
  } else if (!strcasecmp(voice, "rms")) {
151
148
  flite->v = globals.rms;
152
149
  } else if (!strcasecmp(voice, "slt")) {
@@ -194,12 +191,12 @@ static pj_status_t flite_get_frame(pjmedia_port *port,
194
191
  struct flite_t *flite = (struct flite_t*)port;
195
192
 
196
193
  if(!flite->w) {
197
- printf("flite no data\n");
194
+ //printf("flite no data\n");
198
195
  frame->type = PJMEDIA_FRAME_TYPE_NONE;
199
196
  return PJ_SUCCESS;
200
197
  }
201
198
 
202
- printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
199
+ //printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
203
200
  if (flite->written_samples + PJMEDIA_PIA_SPF(&port->info) > (unsigned)flite->w->num_samples) {
204
201
  printf("flite end of speech\n");
205
202
 
@@ -235,7 +232,7 @@ static pj_status_t flite_get_frame(pjmedia_port *port,
235
232
  memcpy(frame->buf, flite->w->samples + flite->written_samples, PJMEDIA_PIA_SPF(&port->info)*2);
236
233
  flite->written_samples += PJMEDIA_PIA_SPF(&port->info);
237
234
  frame->type = PJMEDIA_FRAME_TYPE_AUDIO;
238
- printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
235
+ //printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
239
236
 
240
237
  return PJ_SUCCESS;
241
238
  }
@@ -18,7 +18,7 @@
18
18
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
19
  */
20
20
 
21
- #include <pjmedia/pocketsphinx.h>
21
+ #include <pocketsphinx_port.h>
22
22
  #include <pjmedia/errno.h>
23
23
  #include <pjmedia/port.h>
24
24
  #include <pj/assert.h>
@@ -44,43 +44,31 @@ struct pocketsphinx_t
44
44
  {
45
45
  struct pjmedia_port base;
46
46
 
47
- pj_bool_t subscribed;
48
- void (*cb)(pjmedia_port*, void*, char*);
49
- };
50
-
51
- /*
52
- * Register a callback to be called when we get translation
53
- */
54
- PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_set_speech_cb(pjmedia_port *port,
55
- void *user_data,
56
- void (*cb)(pjmedia_port *port,
57
- void *usr_data, char *transcript))
58
- {
59
- struct pocketsphinx_t *flite;
47
+ ps_decoder_t *decoder;
48
+ ps_config_t *config;
49
+ ps_endpointer_t *ep;
60
50
 
61
- /* Sanity check */
62
- PJ_ASSERT_RETURN(port, -PJ_EINVAL);
51
+ unsigned in_spf;
52
+ unsigned out_spf;
63
53
 
64
- /* Check that this is really a pocketsphinx port */
65
- PJ_ASSERT_RETURN(port->info.signature == SIGNATURE, -PJ_EINVALIDOP);
54
+ short *samples;
55
+ unsigned sample_count;
66
56
 
67
- pocketsphinx = (struct pocketsphinx_t*) port;
68
-
69
- pocketsphinx->base.port_data.pdata = user_data;
70
- pocketsphinx->cb = cb;
71
-
72
- return PJ_SUCCESS;
73
- }
57
+ pj_bool_t subscribed;
58
+ void (*cb)(pjmedia_port*, void*, char*);
59
+ void *cb_user_data;
74
60
 
61
+ char transcript[4096];
62
+ };
75
63
 
76
64
  static pj_status_t speech_on_event(pjmedia_event *event,
77
65
  void *user_data)
78
66
  {
79
- struct pocketsphinx_t *pocketsphinx = (struct pocketsphinx_t*)user_data;
67
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*)user_data;
80
68
 
81
69
  if (event->type == PJMEDIA_EVENT_CALLBACK) {
82
- if (pocketsphinx->cb)
83
- (*pocketsphinx->cb)(&flite->base, flite->base.port_data.pdata, "transcript");
70
+ if (port->cb)
71
+ (*port->cb)(&port->base, port->cb_user_data, port->transcript);
84
72
  }
85
73
 
86
74
  return PJ_SUCCESS;
@@ -91,52 +79,176 @@ PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_create( pj_pool_t *pool,
91
79
  unsigned channel_count,
92
80
  unsigned samples_per_frame,
93
81
  unsigned bits_per_sample,
82
+ void (*cb)(pjmedia_port*, void *user_data, char *transcript),
83
+ void *cb_user_data,
94
84
  pjmedia_port **p_port)
95
85
  {
96
- struct pocketsphinx *pocketsphinx;
86
+ struct pocketsphinx_t *port;
97
87
  const pj_str_t name = pj_str("pocketsphinx");
98
88
 
99
- PJ_ASSERT_RETURN(pool && clock_rate && channel_count &&
89
+ PJ_ASSERT_RETURN(pool && clock_rate && channel_count == 1 &&
100
90
  samples_per_frame && bits_per_sample == 16 &&
101
91
  p_port != NULL, PJ_EINVAL);
102
92
 
103
93
  PJ_ASSERT_RETURN(pool && p_port, PJ_EINVAL);
104
94
 
105
- pocketsphinx = PJ_POOL_ZALLOC_T(pool, struct pocketsphinx);
95
+ port = PJ_POOL_ZALLOC_T(pool, struct pocketsphinx_t);
106
96
  PJ_ASSERT_RETURN(pool != NULL, PJ_ENOMEM);
107
97
 
108
- pjmedia_port_info_init(&pocketsphinx->base.info, &name, SIGNATURE, clock_rate,
98
+ pjmedia_port_info_init(&port->base.info, &name, SIGNATURE, clock_rate,
109
99
  channel_count, bits_per_sample, samples_per_frame);
110
100
 
111
- pocketsphinx->base.put_frame = &pocketsphinx_put_frame;
112
- pocketsphinx->base.on_destroy = &pocketsphinx_on_destroy;
101
+ port->base.put_frame = &pocketsphinx_put_frame;
102
+ port->base.on_destroy = &pocketsphinx_on_destroy;
113
103
 
114
- pocketsphinx->dtmf_cb = cb;
115
- pocketsphinx->dtmf_cb_user_data = user_data;
104
+ port->cb = cb;
105
+ port->cb_user_data = cb_user_data;
116
106
 
117
- dtmf_rx_init(&pocketsphinx->state, NULL, NULL);
118
- dtmf_rx_set_realtime_callback(&pocketsphinx->state,
119
- &pocketsphinx_digit_callback,
120
- (void*)pocketsphinx);
107
+ port->config = ps_config_init(NULL);
108
+ ps_default_search_args(port->config);
109
+
110
+ if ((port->decoder = ps_init(port->config)) == NULL) {
111
+ TRACE_((THIS_FILE, "pocketsphinx port: decoder init failed\n"));
112
+ return !PJ_SUCCESS;
113
+ }
114
+
115
+ if ((port->ep = ps_endpointer_init(0, 0.0, 0, clock_rate, 0)) == NULL) {
116
+ TRACE_((THIS_FILE, "pocketsphinx port: endpointer init failed\n"));
117
+ return !PJ_SUCCESS;
118
+ }
119
+
120
+ port->in_spf = samples_per_frame;
121
+ port->out_spf = ps_endpointer_frame_size(port->ep);
121
122
 
122
- TRACE_((THIS_FILE, "pocketsphinx created: %u/%u/%u/%u", clock_rate,
123
+ port->samples = (short*) pj_pool_alloc(pool, port->out_spf * sizeof(short));
124
+ if (port->samples == NULL) {
125
+ TRACE_(("Failed to allocate buffer for samples\n"));
126
+ return !PJ_SUCCESS;
127
+ }
128
+
129
+ TRACE_((THIS_FILE, "pocketsphinx port created: %u/%u/%u/%u", clock_rate,
123
130
  channel_count, samples_per_frame, bits_per_sample));
124
131
 
125
- *p_port = &pocketsphinx->base;
132
+ printf("pocketsphinx_create in_spf=%i out_spf=%i\n", port->in_spf, port->out_spf);
133
+ *p_port = &port->base;
126
134
  return PJ_SUCCESS;
127
135
  }
128
136
 
137
+ void feed(struct pocketsphinx_t *port, short *frame) {
138
+ const int16 *speech;
139
+ int prev_in_speech = ps_endpointer_in_speech(port->ep);
140
+ speech = ps_endpointer_process(port->ep, frame);
141
+ if (speech != NULL) {
142
+ const char *hyp;
143
+ if (!prev_in_speech) {
144
+ printf("pocketsphinx speech start at %.2f\n", ps_endpointer_speech_start(port->ep));
145
+ ps_start_utt(port->decoder);
146
+ }
147
+ if (ps_process_raw(port->decoder, speech, port->out_spf, FALSE, FALSE) < 0) {
148
+ printf("pocketsphinx ps_process_raw() failed\n");
149
+ return;
150
+ }
151
+ if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
152
+ //printf("pocketsphinx partial result: %s\n", hyp);
153
+ }
154
+ if (!ps_endpointer_in_speech(port->ep)) {
155
+ printf("Speech end at %.2f\n", ps_endpointer_speech_end(port->ep));
156
+ ps_end_utt(port->decoder);
157
+ if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
158
+ printf("pocketsphinx speech: %s\n", hyp);
159
+ if(strlen(hyp) == 0) return;
160
+
161
+ strncpy(port->transcript, hyp, sizeof(port->transcript) - 1);
162
+
163
+ // Ensure the destination string is null-terminated
164
+ port->transcript[sizeof(port->transcript) - 1] = '\0';
165
+
166
+ if(port->cb) {
167
+ if (!port->subscribed) {
168
+ pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
169
+ port, port);
170
+ port->subscribed = (status == PJ_SUCCESS)? PJ_TRUE:
171
+ PJ_FALSE;
172
+ }
173
+
174
+ if (port->subscribed) {
175
+ pjmedia_event event;
176
+
177
+ pjmedia_event_init(&event, PJMEDIA_EVENT_CALLBACK,
178
+ NULL, port);
179
+ pjmedia_event_publish(NULL, port, &event,
180
+ PJMEDIA_EVENT_PUBLISH_POST_EVENT);
181
+ }
182
+ }
183
+ }
184
+ }
185
+ }
186
+ }
187
+
188
+ unsigned feed_one(struct pocketsphinx_t *port, pjmedia_frame *frame){
189
+ unsigned used_samples = port->out_spf - port->sample_count;
190
+ memcpy((short*)port->samples + port->sample_count, frame->buf, used_samples * sizeof(short));
191
+ feed(port, port->samples);
192
+ port->sample_count = 0;
193
+ return used_samples;
194
+ }
195
+
196
+ void feed_all(struct pocketsphinx_t *port, pjmedia_frame *frame) {
197
+ unsigned samples = frame->size / 2;
198
+ unsigned used_samples = 0;
199
+ if(port->sample_count > 0) {
200
+ used_samples = feed_one(port, frame);
201
+ }
202
+
203
+ short *out_frame = frame->buf + used_samples;
204
+ samples -= used_samples;
205
+ unsigned count = 0;
206
+ while(samples >= port->out_spf) {
207
+ feed(port, out_frame);
208
+ count++;
209
+ out_frame += (count * port->out_spf);
210
+ samples -= port->out_spf;
211
+ }
212
+
213
+ if(samples) {
214
+ memcpy(port->samples, out_frame, samples * sizeof(short));
215
+ port->sample_count = samples;
216
+ }
217
+ }
218
+
129
219
  static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
130
220
  pjmedia_frame *frame)
131
221
  {
132
222
  if(frame->type != PJMEDIA_FRAME_TYPE_AUDIO) return PJ_SUCCESS;
133
223
 
134
- struct pocketsphinx *dport = (struct pocketsphinx*) this_port;
135
- dtmf_rx(&dport->state, (const pj_int16_t*)frame->buf,
136
- PJMEDIA_PIA_SPF(&dport->base.info));
224
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
137
225
 
138
- return PJ_SUCCESS;
226
+ if(port->in_spf == port->out_spf) {
227
+ //printf("feed\n");
228
+ feed(port, (short*)frame->buf);
229
+ return;
230
+ }
139
231
 
232
+ if(port->in_spf > port->out_spf) {
233
+ //printf("feed_all\n");
234
+ feed_all(port, frame);
235
+ return;
236
+ }
237
+
238
+ unsigned samples = frame->size / 2;
239
+ if(samples + port->sample_count >= port->out_spf) {
240
+ // enough to feed once
241
+ //printf("feed_one\n");
242
+ feed_one(port, frame);
243
+ return;
244
+ }
245
+
246
+ // not enough to feed.
247
+ //printf("not enough to feed\n");
248
+ memcpy((short*)port->samples + port->sample_count, frame->buf, samples * sizeof(short));
249
+ port->sample_count += samples;
250
+
251
+ return PJ_SUCCESS;
140
252
  }
141
253
 
142
254
  /*
@@ -144,6 +256,17 @@ static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
144
256
  */
145
257
  static pj_status_t pocketsphinx_on_destroy(pjmedia_port *this_port)
146
258
  {
259
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
260
+
261
+ ps_endpointer_free(port->ep);
262
+ ps_free(port->decoder);
263
+ ps_config_free(port->config);
264
+
265
+ if (port->subscribed) {
266
+ pjmedia_event_unsubscribe(NULL, &speech_on_event, port, port);
267
+ port->subscribed = PJ_FALSE;
268
+ }
269
+
147
270
  return PJ_SUCCESS;
148
271
  }
149
272