sip-lab 1.23.0 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@
18
18
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
19
  */
20
20
 
21
- #include <pjmedia/pocketsphinx.h>
21
+ #include <pocketsphinx_port.h>
22
22
  #include <pjmedia/errno.h>
23
23
  #include <pjmedia/port.h>
24
24
  #include <pj/assert.h>
@@ -44,43 +44,31 @@ struct pocketsphinx_t
44
44
  {
45
45
  struct pjmedia_port base;
46
46
 
47
- pj_bool_t subscribed;
48
- void (*cb)(pjmedia_port*, void*, char*);
49
- };
50
-
51
- /*
52
- * Register a callback to be called when we get translation
53
- */
54
- PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_set_speech_cb(pjmedia_port *port,
55
- void *user_data,
56
- void (*cb)(pjmedia_port *port,
57
- void *usr_data, char *transcript))
58
- {
59
- struct pocketsphinx_t *flite;
47
+ ps_decoder_t *decoder;
48
+ ps_config_t *config;
49
+ ps_endpointer_t *ep;
60
50
 
61
- /* Sanity check */
62
- PJ_ASSERT_RETURN(port, -PJ_EINVAL);
51
+ unsigned in_spf;
52
+ unsigned out_spf;
63
53
 
64
- /* Check that this is really a pocketsphinx port */
65
- PJ_ASSERT_RETURN(port->info.signature == SIGNATURE, -PJ_EINVALIDOP);
54
+ short *samples;
55
+ unsigned sample_count;
66
56
 
67
- pocketsphinx = (struct pocketsphinx_t*) port;
68
-
69
- pocketsphinx->base.port_data.pdata = user_data;
70
- pocketsphinx->cb = cb;
71
-
72
- return PJ_SUCCESS;
73
- }
57
+ pj_bool_t subscribed;
58
+ void (*cb)(pjmedia_port*, void*, char*);
59
+ void *cb_user_data;
74
60
 
61
+ char transcript[4096];
62
+ };
75
63
 
76
64
  static pj_status_t speech_on_event(pjmedia_event *event,
77
65
  void *user_data)
78
66
  {
79
- struct pocketsphinx_t *pocketsphinx = (struct pocketsphinx_t*)user_data;
67
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*)user_data;
80
68
 
81
69
  if (event->type == PJMEDIA_EVENT_CALLBACK) {
82
- if (pocketsphinx->cb)
83
- (*pocketsphinx->cb)(&flite->base, flite->base.port_data.pdata, "transcript");
70
+ if (port->cb)
71
+ (*port->cb)(&port->base, port->cb_user_data, port->transcript);
84
72
  }
85
73
 
86
74
  return PJ_SUCCESS;
@@ -91,52 +79,176 @@ PJ_DEF(pj_status_t) pjmedia_pocketsphinx_port_create( pj_pool_t *pool,
91
79
  unsigned channel_count,
92
80
  unsigned samples_per_frame,
93
81
  unsigned bits_per_sample,
82
+ void (*cb)(pjmedia_port*, void *user_data, char *transcript),
83
+ void *cb_user_data,
94
84
  pjmedia_port **p_port)
95
85
  {
96
- struct pocketsphinx *pocketsphinx;
86
+ struct pocketsphinx_t *port;
97
87
  const pj_str_t name = pj_str("pocketsphinx");
98
88
 
99
- PJ_ASSERT_RETURN(pool && clock_rate && channel_count &&
89
+ PJ_ASSERT_RETURN(pool && clock_rate && channel_count == 1 &&
100
90
  samples_per_frame && bits_per_sample == 16 &&
101
91
  p_port != NULL, PJ_EINVAL);
102
92
 
103
93
  PJ_ASSERT_RETURN(pool && p_port, PJ_EINVAL);
104
94
 
105
- pocketsphinx = PJ_POOL_ZALLOC_T(pool, struct pocketsphinx);
95
+ port = PJ_POOL_ZALLOC_T(pool, struct pocketsphinx_t);
106
96
  PJ_ASSERT_RETURN(pool != NULL, PJ_ENOMEM);
107
97
 
108
- pjmedia_port_info_init(&pocketsphinx->base.info, &name, SIGNATURE, clock_rate,
98
+ pjmedia_port_info_init(&port->base.info, &name, SIGNATURE, clock_rate,
109
99
  channel_count, bits_per_sample, samples_per_frame);
110
100
 
111
- pocketsphinx->base.put_frame = &pocketsphinx_put_frame;
112
- pocketsphinx->base.on_destroy = &pocketsphinx_on_destroy;
101
+ port->base.put_frame = &pocketsphinx_put_frame;
102
+ port->base.on_destroy = &pocketsphinx_on_destroy;
113
103
 
114
- pocketsphinx->dtmf_cb = cb;
115
- pocketsphinx->dtmf_cb_user_data = user_data;
104
+ port->cb = cb;
105
+ port->cb_user_data = cb_user_data;
116
106
 
117
- dtmf_rx_init(&pocketsphinx->state, NULL, NULL);
118
- dtmf_rx_set_realtime_callback(&pocketsphinx->state,
119
- &pocketsphinx_digit_callback,
120
- (void*)pocketsphinx);
107
+ port->config = ps_config_init(NULL);
108
+ ps_default_search_args(port->config);
109
+
110
+ if ((port->decoder = ps_init(port->config)) == NULL) {
111
+ TRACE_((THIS_FILE, "pocketsphinx port: decoder init failed\n"));
112
+ return !PJ_SUCCESS;
113
+ }
114
+
115
+ if ((port->ep = ps_endpointer_init(0, 0.0, 0, clock_rate, 0)) == NULL) {
116
+ TRACE_((THIS_FILE, "pocketsphinx port: endpointer init failed\n"));
117
+ return !PJ_SUCCESS;
118
+ }
119
+
120
+ port->in_spf = samples_per_frame;
121
+ port->out_spf = ps_endpointer_frame_size(port->ep);
121
122
 
122
- TRACE_((THIS_FILE, "pocketsphinx created: %u/%u/%u/%u", clock_rate,
123
+ port->samples = (short*) pj_pool_alloc(pool, port->out_spf * sizeof(short));
124
+ if (port->samples == NULL) {
125
+ TRACE_(("Failed to allocate buffer for samples\n"));
126
+ return !PJ_SUCCESS;
127
+ }
128
+
129
+ TRACE_((THIS_FILE, "pocketsphinx port created: %u/%u/%u/%u", clock_rate,
123
130
  channel_count, samples_per_frame, bits_per_sample));
124
131
 
125
- *p_port = &pocketsphinx->base;
132
+ printf("pocketsphinx_create in_spf=%i out_spf=%i\n", port->in_spf, port->out_spf);
133
+ *p_port = &port->base;
126
134
  return PJ_SUCCESS;
127
135
  }
128
136
 
137
+ void feed(struct pocketsphinx_t *port, short *frame) {
138
+ const int16 *speech;
139
+ int prev_in_speech = ps_endpointer_in_speech(port->ep);
140
+ speech = ps_endpointer_process(port->ep, frame);
141
+ if (speech != NULL) {
142
+ const char *hyp;
143
+ if (!prev_in_speech) {
144
+ printf("pocketsphinx speech start at %.2f\n", ps_endpointer_speech_start(port->ep));
145
+ ps_start_utt(port->decoder);
146
+ }
147
+ if (ps_process_raw(port->decoder, speech, port->out_spf, FALSE, FALSE) < 0) {
148
+ printf("pocketsphinx ps_process_raw() failed\n");
149
+ return;
150
+ }
151
+ if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
152
+ //printf("pocketsphinx partial result: %s\n", hyp);
153
+ }
154
+ if (!ps_endpointer_in_speech(port->ep)) {
155
+ printf("Speech end at %.2f\n", ps_endpointer_speech_end(port->ep));
156
+ ps_end_utt(port->decoder);
157
+ if ((hyp = ps_get_hyp(port->decoder, NULL)) != NULL) {
158
+ printf("pocketsphinx speech: %s\n", hyp);
159
+ if(strlen(hyp) == 0) return;
160
+
161
+ strncpy(port->transcript, hyp, sizeof(port->transcript) - 1);
162
+
163
+ // Ensure the destination string is null-terminated
164
+ port->transcript[sizeof(port->transcript) - 1] = '\0';
165
+
166
+ if(port->cb) {
167
+ if (!port->subscribed) {
168
+ pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
169
+ port, port);
170
+ port->subscribed = (status == PJ_SUCCESS)? PJ_TRUE:
171
+ PJ_FALSE;
172
+ }
173
+
174
+ if (port->subscribed) {
175
+ pjmedia_event event;
176
+
177
+ pjmedia_event_init(&event, PJMEDIA_EVENT_CALLBACK,
178
+ NULL, port);
179
+ pjmedia_event_publish(NULL, port, &event,
180
+ PJMEDIA_EVENT_PUBLISH_POST_EVENT);
181
+ }
182
+ }
183
+ }
184
+ }
185
+ }
186
+ }
187
+
188
+ unsigned feed_one(struct pocketsphinx_t *port, pjmedia_frame *frame){
189
+ unsigned used_samples = port->out_spf - port->sample_count;
190
+ memcpy((short*)port->samples + port->sample_count, frame->buf, used_samples * sizeof(short));
191
+ feed(port, port->samples);
192
+ port->sample_count = 0;
193
+ return used_samples;
194
+ }
195
+
196
+ void feed_all(struct pocketsphinx_t *port, pjmedia_frame *frame) {
197
+ unsigned samples = frame->size / 2;
198
+ unsigned used_samples = 0;
199
+ if(port->sample_count > 0) {
200
+ used_samples = feed_one(port, frame);
201
+ }
202
+
203
+ short *out_frame = frame->buf + used_samples;
204
+ samples -= used_samples;
205
+ unsigned count = 0;
206
+ while(samples >= port->out_spf) {
207
+ feed(port, out_frame);
208
+ count++;
209
+ out_frame += (count * port->out_spf);
210
+ samples -= port->out_spf;
211
+ }
212
+
213
+ if(samples) {
214
+ memcpy(port->samples, out_frame, samples * sizeof(short));
215
+ port->sample_count = samples;
216
+ }
217
+ }
218
+
129
219
  static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
130
220
  pjmedia_frame *frame)
131
221
  {
132
222
  if(frame->type != PJMEDIA_FRAME_TYPE_AUDIO) return PJ_SUCCESS;
133
223
 
134
- struct pocketsphinx *dport = (struct pocketsphinx*) this_port;
135
- dtmf_rx(&dport->state, (const pj_int16_t*)frame->buf,
136
- PJMEDIA_PIA_SPF(&dport->base.info));
224
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
137
225
 
138
- return PJ_SUCCESS;
226
+ if(port->in_spf == port->out_spf) {
227
+ //printf("feed\n");
228
+ feed(port, (short*)frame->buf);
229
+ return;
230
+ }
139
231
 
232
+ if(port->in_spf > port->out_spf) {
233
+ //printf("feed_all\n");
234
+ feed_all(port, frame);
235
+ return;
236
+ }
237
+
238
+ unsigned samples = frame->size / 2;
239
+ if(samples + port->sample_count >= port->out_spf) {
240
+ // enough to feed once
241
+ //printf("feed_one\n");
242
+ feed_one(port, frame);
243
+ return;
244
+ }
245
+
246
+ // not enough to feed.
247
+ //printf("not enough to feed\n");
248
+ memcpy((short*)port->samples + port->sample_count, frame->buf, samples * sizeof(short));
249
+ port->sample_count += samples;
250
+
251
+ return PJ_SUCCESS;
140
252
  }
141
253
 
142
254
  /*
@@ -144,6 +256,17 @@ static pj_status_t pocketsphinx_put_frame(pjmedia_port *this_port,
144
256
  */
145
257
  static pj_status_t pocketsphinx_on_destroy(pjmedia_port *this_port)
146
258
  {
259
+ struct pocketsphinx_t *port = (struct pocketsphinx_t*) this_port;
260
+
261
+ ps_endpointer_free(port->ep);
262
+ ps_free(port->decoder);
263
+ ps_config_free(port->config);
264
+
265
+ if (port->subscribed) {
266
+ pjmedia_event_unsubscribe(NULL, &speech_on_event, port, port);
267
+ port->subscribed = PJ_FALSE;
268
+ }
269
+
147
270
  return PJ_SUCCESS;
148
271
  }
149
272