sip-lab 1.27.0 → 1.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,10 +14,10 @@ It permits to:
14
14
  - send/receive audio using SRTP
15
15
  - do speech synth using flite
16
16
  - do speech recog using pocketsphinx (but only works well with sampling rate of 16000)
17
+ - do speech synth/recog using [ws_speech_server](https://github.com/MayamaTakeshi/ws_speech_server) (this permits to use google/amazon/azure/etc speech services)
17
18
 
18
19
  TODO:
19
20
  - add support for video playing/recording from/to file
20
- - add support for speech synth/recog via websocket server to permit to access Google Speech, Whisper, Amazon Poly etc.
21
21
  - add support for T.38 fax
22
22
  - add support for SIP over WebSocket
23
23
  - add support for WebRTC
@@ -47,6 +47,34 @@ The above script has detailed comments.
47
47
 
48
48
  Please read it to undestand how to write your own test scripts.
49
49
 
50
+
51
+ ### Samples
52
+
53
+ See general sample scripts in folder samples.
54
+
55
+ There are additional samples scripts in folder samples_extra but they require [ws_speech_server](https://github.com/MayamaTakeshi/ws_speech_server) to be running locally (and it should be started with GOOGLE_APPLICATION_CREDENTIALS set).
56
+
57
+ To run ws_speech_server, do this:
58
+ ```
59
+ https://github.com/MayamaTakeshi/ws_speech_server
60
+ cd ws_speech_server
61
+ npm i
62
+ npm run build
63
+ cp config/default.js.sample config/default.js
64
+ export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials/file
65
+ node src/App.bs.js
66
+ ```
67
+
68
+ Then you should be able to test with dtmf language:
69
+ ```
70
+ node samples_extra/ws_speech_server.dtmf.js
71
+ ```
72
+ or with google speech service:
73
+ ```
74
+ node samples_extra/ws_speech_server.google.js
75
+ ```
76
+
77
+
50
78
  ### About the code
51
79
 
52
80
  Although the code in written in *.cpp/*.hpp named files, this is not actually a C++ project.
package/binding.gyp CHANGED
@@ -19,10 +19,12 @@
19
19
  "3rdParty/spandsp/src",
20
20
  "3rdParty/pocketsphinx/include",
21
21
  "3rdParty/pocketsphinx/build/include",
22
+ "3rdParty/pjwebsock/websock",
22
23
  "<!@(node -p \"require('node-addon-api').include\")",
23
24
  ],
24
25
  'conditions': [
25
26
  [ 'OS!="win"', {
27
+ 'cflags': ['-g'],
26
28
  'cflags_cc': [
27
29
  '-g',
28
30
  '-fexceptions',
@@ -120,6 +122,12 @@
120
122
  'src/pjmedia/src/pjmedia/fax_port.c',
121
123
  'src/pjmedia/src/pjmedia/flite_port.c',
122
124
  'src/pjmedia/src/pjmedia/pocketsphinx_port.c',
125
+ 'src/pjmedia/src/pjmedia/ws_speech_port.cpp',
126
+ '3rdParty/pjwebsock/websock/http.c',
127
+ '3rdParty/pjwebsock/websock/websock_transport_tcp.c',
128
+ '3rdParty/pjwebsock/websock/websock_transport_tls.c',
129
+ '3rdParty/pjwebsock/websock/websock.c',
130
+ '3rdParty/pjwebsock/websock/websock_transport.c',
123
131
  ],
124
132
  },
125
133
  ],
package/build_deps.sh CHANGED
@@ -102,6 +102,15 @@ then
102
102
  fi
103
103
 
104
104
 
105
+ cd $START_DIR/3rdParty
106
+ if [[ ! -d pjwebsock ]]
107
+ then
108
+ git clone https://github.com/jimying/pjwebsock
109
+ cd pjwebsock
110
+ git checkout a0616ea27f01d5e3bdfd5b801fb1499473a0b0cb
111
+ fi
112
+
113
+
105
114
  #cd $START_DIR/3rdParty
106
115
  #if [[ ! -d openssl ]]
107
116
  #then
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sip-lab",
3
- "version": "1.27.0",
3
+ "version": "1.28.0",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "engines": {
Binary file
@@ -0,0 +1,217 @@
1
+ var sip = require ('../index.js')
2
+ var Zeq = require('@mayama/zeq')
3
+ var z = new Zeq()
4
+ var m = require('data-matching')
5
+ var sip_msg = require('sip-matching')
6
+ var sdp = require('sdp-matching')
7
+
8
+ var assert = require('assert')
9
+
10
+ async function test() {
11
+ //sip.set_log_level(6)
12
+ sip.dtmf_aggregation_on(500)
13
+
14
+ z.trap_events(sip.event_source, 'event', (evt) => {
15
+ var e = evt.args[0]
16
+ return e
17
+ })
18
+
19
+ console.log(sip.start((data) => { console.log(data)} ))
20
+
21
+ t1 = sip.transport.create({address: "127.0.0.1", type: 'udp'})
22
+ t2 = sip.transport.create({address: "127.0.0.1", type: 'udp'})
23
+
24
+ console.log("t1", t1)
25
+ console.log("t2", t2)
26
+
27
+ sip.set_codecs("PCMA/8000/1:128")
28
+
29
+ flags = 0
30
+
31
+ oc = sip.call.create(t1.id, {from_uri: 'sip:alice@test.com', to_uri: `sip:bob@${t2.address}:${t2.port}`})
32
+
33
+ await z.wait([
34
+ {
35
+ event: "incoming_call",
36
+ call_id: m.collect("call_id"),
37
+ },
38
+ {
39
+ event: 'response',
40
+ call_id: oc.id,
41
+ method: 'INVITE',
42
+ msg: sip_msg({
43
+ $rs: '100',
44
+ $rr: 'Trying',
45
+ '$(hdrcnt(via))': 1,
46
+ 'hdr_call_id': m.collect('sip_call_id'),
47
+ $fU: 'alice',
48
+ $fd: 'test.com',
49
+ $tU: 'bob',
50
+ 'hdr_l': '0',
51
+ }),
52
+ },
53
+ ], 1000)
54
+
55
+ ic = {
56
+ id: z.store.call_id,
57
+ sip_call_id: z.store.sip_call_id,
58
+ }
59
+
60
+ sip.call.respond(ic.id, {code: 200, reason: 'OK'})
61
+
62
+ await z.wait([
63
+ {
64
+ event: 'media_update',
65
+ call_id: oc.id,
66
+ status: 'ok',
67
+ },
68
+ {
69
+ event: 'media_update',
70
+ call_id: ic.id,
71
+ status: 'ok',
72
+ },
73
+ {
74
+ event: 'response',
75
+ call_id: oc.id,
76
+ method: 'INVITE',
77
+ msg: sip_msg({
78
+ $rs: '200',
79
+ $rr: 'OK',
80
+ '$(hdrcnt(VIA))': 1,
81
+ $fU: 'alice',
82
+ $fd: 'test.com',
83
+ $tU: 'bob',
84
+ 'hdr_content_type': 'application/sdp',
85
+ $rb: '!{_}a=sendrecv',
86
+ }),
87
+ },
88
+ ], 1000)
89
+
90
+ sip.call.reinvite(oc.id)
91
+
92
+ await z.wait([
93
+ {
94
+ event: 'reinvite',
95
+ call_id: ic.id
96
+ },
97
+ ], 1000)
98
+
99
+ sip.call.respond(ic.id, {code: 200, reason: 'OK'})
100
+
101
+ await z.wait([
102
+ {
103
+ event: 'response',
104
+ call_id: oc.id,
105
+ method: 'INVITE',
106
+ msg: sip_msg({
107
+ $rs: '100',
108
+ }),
109
+ },
110
+ {
111
+ event: 'response',
112
+ call_id: oc.id,
113
+ method: 'INVITE',
114
+ msg: sip_msg({
115
+ $rs: '200',
116
+ $rr: 'OK',
117
+ $rb: '!{_}a=sendrecv',
118
+ }),
119
+ },
120
+ {
121
+ event: 'media_update',
122
+ call_id: oc.id,
123
+ status: 'ok',
124
+ },
125
+ {
126
+ event: 'media_update',
127
+ call_id: ic.id,
128
+ status: 'ok',
129
+ },
130
+ ], 500)
131
+
132
+ sip.call.reinvite(oc.id, false, 0)
133
+
134
+ await z.wait([
135
+ {
136
+ event: 'reinvite',
137
+ call_id: ic.id
138
+ },
139
+ ], 1000)
140
+
141
+ sip.call.respond(ic.id, {code: 200, reason: 'OK'})
142
+
143
+ await z.wait([
144
+ {
145
+ event: 'response',
146
+ call_id: oc.id,
147
+ method: 'INVITE',
148
+ msg: sip_msg({
149
+ $rs: '100',
150
+ }),
151
+ },
152
+ {
153
+ event: 'response',
154
+ call_id: oc.id,
155
+ method: 'INVITE',
156
+ msg: sip_msg({
157
+ $rs: '200',
158
+ $rr: 'OK',
159
+ $rb: '!{_}a=sendrecv',
160
+ }),
161
+ },
162
+ {
163
+ event: 'media_update',
164
+ call_id: oc.id,
165
+ status: 'ok',
166
+ },
167
+ {
168
+ event: 'media_update',
169
+ call_id: ic.id,
170
+ status: 'ok',
171
+ },
172
+ ], 500)
173
+
174
+ oc_stat = sip.call.get_stream_stat(oc.id, {media_id: 0})
175
+ ic_stat = sip.call.get_stream_stat(ic.id, {media_id: 0})
176
+
177
+ console.log(oc_stat)
178
+ console.log(ic_stat)
179
+
180
+ oc_stat = JSON.parse(oc_stat)
181
+ ic_stat = JSON.parse(ic_stat)
182
+
183
+ assert(oc_stat.CodecInfo == 'PCMA/8000/1')
184
+ assert(ic_stat.CodecInfo == 'PCMA/8000/1')
185
+
186
+ sip.call.terminate(oc.id)
187
+
188
+ await z.wait([
189
+ {
190
+ event: 'call_ended',
191
+ call_id: oc.id,
192
+ },
193
+ {
194
+ event: 'call_ended',
195
+ call_id: ic.id,
196
+ },
197
+ {
198
+ event: 'response',
199
+ call_id: oc.id,
200
+ method: 'BYE',
201
+ msg: sip_msg({
202
+ $rs: '200',
203
+ $rr: 'OK',
204
+ }),
205
+ },
206
+ ], 1000)
207
+
208
+ console.log("Success")
209
+
210
+ sip.stop()
211
+ }
212
+
213
+ test()
214
+ .catch(e => {
215
+ console.error(e)
216
+ process.exit(1)
217
+ })
@@ -115,7 +115,7 @@ async function test() {
115
115
  sip.call.start_speech_recog(oc.id)
116
116
  sip.call.start_speech_recog(ic.id)
117
117
 
118
- await z.sleep(100)
118
+ await z.sleep(200)
119
119
 
120
120
  sip.call.start_play_wav(oc.id, {file: 'samples/artifacts/hello_good_morning.wav', end_of_file_event: true, no_loop: true})
121
121
  sip.call.start_play_wav(ic.id, {file: 'samples/artifacts/hello_good_morning.wav', end_of_file_event: true, no_loop: true})
@@ -130,16 +130,16 @@ async function test() {
130
130
  call_id: oc.id,
131
131
  },
132
132
  {
133
- event: 'speech_transcript',
133
+ event: 'speech',
134
134
  call_id: oc.id,
135
135
  //transcript: 'hello good morning', // bad transcript (will not match)
136
136
  },
137
137
  {
138
- event: 'speech_transcript',
138
+ event: 'speech',
139
139
  call_id: ic.id,
140
140
  //transcript: 'hello good morning', // bad transcript (will not match)
141
141
  },
142
- ], 4000)
142
+ ], 5000)
143
143
 
144
144
  sip.call.stop_record_wav(oc.id)
145
145
  sip.call.stop_record_wav(ic.id)
@@ -117,25 +117,25 @@ async function test() {
117
117
 
118
118
  await z.sleep(100)
119
119
 
120
- sip.call.start_speech_synth(oc.id, {voice: 'kal16', text: 'Good morning.', end_of_speech_event: true, no_loop: true})
121
- sip.call.start_speech_synth(ic.id, {voice: 'kal16', text: 'How are you?', end_of_speech_event: true, no_loop: true})
120
+ sip.call.start_speech_synth(oc.id, {voice: 'kal16', text: 'Good morning.'})
121
+ sip.call.start_speech_synth(ic.id, {voice: 'kal16', text: 'How are you?'})
122
122
 
123
123
  await z.wait([
124
124
  {
125
- event: 'end_of_speech',
125
+ event: 'speech_synth_complete',
126
126
  call_id: ic.id,
127
127
  },
128
128
  {
129
- event: 'end_of_speech',
129
+ event: 'speech_synth_complete',
130
130
  call_id: oc.id,
131
131
  },
132
132
  {
133
- event: 'speech_transcript',
133
+ event: 'speech',
134
134
  call_id: oc.id,
135
135
  transcript: 'how are you',
136
136
  },
137
137
  {
138
- event: 'speech_transcript',
138
+ event: 'speech',
139
139
  call_id: ic.id,
140
140
  transcript: 'good morning',
141
141
  },
@@ -130,26 +130,19 @@ async function test() {
130
130
  },
131
131
  ], 3000)
132
132
 
133
- sip.call.start_speech_synth(oc.id, {voice: 'slt', text: 'Hello World.', end_of_speech_event: true})
134
- sip.call.start_speech_synth(ic.id, {voice: 'kal', text: 'How are you?', end_of_speech_event: true, no_loop: true})
133
+ sip.call.start_speech_synth(oc.id, {voice: 'slt', text: 'Hello World.'})
134
+ sip.call.start_speech_synth(ic.id, {voice: 'kal', text: 'How are you?'})
135
135
 
136
136
  await z.wait([
137
137
  {
138
- event: 'end_of_speech',
138
+ event: 'speech_synth_complete',
139
139
  call_id: ic.id,
140
140
  },
141
141
  {
142
- event: 'end_of_speech',
142
+ event: 'speech_synth_complete',
143
143
  call_id: oc.id,
144
144
  },
145
- ], 2000)
146
-
147
- await z.wait([
148
- {
149
- event: 'end_of_speech',
150
- call_id: oc.id,
151
- },
152
- ], 2000)
145
+ ], 3000)
153
146
 
154
147
  sip.call.stop_speech_synth(oc.id) // this is not actually necessary. It is used just to confirm the command works
155
148
  sip.call.stop_speech_synth(ic.id) // this is not actually necessary. It is used just to confirm the command works
@@ -105,16 +105,16 @@ int make_evt_end_of_file(char *dest, int size, long call_id) {
105
105
  "{\"event\": \"end_of_file\", \"call_id\": %ld}", call_id);
106
106
  }
107
107
 
108
- int make_evt_end_of_speech(char *dest, int size, long call_id) {
108
+ int make_evt_speech_synth_complete(char *dest, int size, long call_id) {
109
109
  return snprintf(
110
110
  dest, size,
111
- "{\"event\": \"end_of_speech\", \"call_id\": %ld}", call_id);
111
+ "{\"event\": \"speech_synth_complete\", \"call_id\": %ld}", call_id);
112
112
  }
113
113
 
114
- int make_evt_speech_transcript(char *dest, int size, long call_id, char* transcript) {
114
+ int make_evt_speech(char *dest, int size, long call_id, char* transcript) {
115
115
  return snprintf(
116
116
  dest, size,
117
- "{\"event\": \"speech_transcript\", \"call_id\": %ld, \"transcript\": \"%s\"}", call_id, transcript);
117
+ "{\"event\": \"speech\", \"call_id\": %ld, \"transcript\": \"%s\"}", call_id, transcript);
118
118
  }
119
119
 
120
120
  int make_evt_tcp_msg(char *dest, int size, long call_id, const char *protocol, char *data, int data_len) {
@@ -122,3 +122,10 @@ int make_evt_tcp_msg(char *dest, int size, long call_id, const char *protocol, c
122
122
  dest, size,
123
123
  "{\"event\": \"%s_msg\", \"call_id\": %ld}\n%.*s", protocol, call_id, data_len, data);
124
124
  }
125
+
126
+ int make_evt_ws_speech_event(char *dest, int size, long call_id, char *data, int data_len) {
127
+ return snprintf(
128
+ dest, size,
129
+ "{\"event\": \"ws_speech_event\", \"call_id\": %ld, \"data\": %.*s}", call_id, data_len, data);
130
+ }
131
+
@@ -36,10 +36,12 @@ int make_evt_fax_result(char *dest, int size, long call_id, int result);
36
36
 
37
37
  int make_evt_end_of_file(char *dest, int size, long call_id);
38
38
 
39
- int make_evt_end_of_speech(char *dest, int size, long call_id);
39
+ int make_evt_speech_synth_complete(char *dest, int size, long call_id);
40
40
 
41
- int make_evt_speech_transcript(char *dest, int size, long call_id, char* transcript);
41
+ int make_evt_speech(char *dest, int size, long call_id, char* transcript);
42
42
 
43
43
  int make_evt_tcp_msg(char *dest, int size, long call_id, const char *protocol, char *data, int data_len);
44
44
 
45
+ int make_evt_ws_speech_event(char *dest, int size, long call_id, char *data, int data_len);
46
+
45
47
  #endif
@@ -5,7 +5,7 @@
5
5
 
6
6
  PJ_BEGIN_DECL
7
7
 
8
- enum pjmedia_filte_option
8
+ enum pjmedia_flite_option
9
9
  {
10
10
  PJMEDIA_SPEECH_NO_LOOP = 1
11
11
  };
@@ -25,7 +25,7 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_set_eof_cb(pjmedia_port *port,
25
25
 
26
26
  PJ_DEF(pj_status_t) pjmedia_flite_port_speak( pjmedia_port *port,
27
27
  const char *text,
28
- unsigned options);
28
+ int times);
29
29
 
30
30
  PJ_END_DECL
31
31
 
@@ -0,0 +1,37 @@
1
+ #ifndef __WS_SPEECH_PORT_H__
2
+ #define __WS_SPEECH_PORT_H__
3
+
4
+ #include <pjmedia/port.h>
5
+ #include "websock.h"
6
+
7
+ PJ_BEGIN_DECL
8
+
9
+ enum ws_speech_event
10
+ {
11
+ WS_SPEECH_EVENT_CONNECTED,
12
+ WS_SPEECH_EVENT_CONNECTION_ERROR,
13
+ WS_SPEECH_EVENT_DISCONNECTED,
14
+ WS_SPEECH_EVENT_TEXT_MSG
15
+ };
16
+
17
+ PJ_DEF(pj_status_t) pjmedia_ws_speech_port_create( pj_pool_t *pool,
18
+ unsigned clock_rate,
19
+ unsigned channel_count,
20
+ unsigned samples_per_frame,
21
+ unsigned bits_per_sample,
22
+ pj_websock_endpoint *ws_endpt,
23
+ const char *server_url,
24
+ const char *ss_engine,
25
+ const char *ss_voice,
26
+ const char *ss_language,
27
+ const char *ss_text,
28
+ int ss_times,
29
+ const char *sr_engine,
30
+ const char *sr_language,
31
+ void (*cb)(pjmedia_port*, void *user_data, enum ws_speech_event, char *data, int len),
32
+ void *cb_user_data,
33
+ pjmedia_port **p_port);
34
+
35
+ PJ_END_DECL
36
+
37
+ #endif /* __WS_SPEECH_PORT_H__ */
@@ -53,7 +53,6 @@ static struct {
53
53
 
54
54
  struct flite_t {
55
55
  struct pjmedia_port base;
56
- unsigned options;
57
56
 
58
57
  cst_voice *v;
59
58
  unsigned written_samples;
@@ -61,6 +60,8 @@ struct flite_t {
61
60
 
62
61
  pj_bool_t subscribed;
63
62
  void (*cb)(pjmedia_port*, void*);
63
+
64
+ int times;
64
65
  };
65
66
 
66
67
  #define free_wave(w) if (w) {delete_wave(w) ; w = NULL; }
@@ -112,6 +113,7 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
112
113
  const char *voice,
113
114
  pjmedia_port **p_port)
114
115
  {
116
+ printf("pjmedia_flite_port_create\n");
115
117
  struct flite_t *flite;
116
118
  const pj_str_t name = pj_str("flite_data");
117
119
 
@@ -164,13 +166,14 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_create( pj_pool_t *pool,
164
166
 
165
167
  PJ_DEF(pj_status_t) pjmedia_flite_port_speak( pjmedia_port *port,
166
168
  const char *text,
167
- unsigned options) {
169
+ int times) {
170
+ printf("pjmedia_flite_port_speak. text=%s times=%i\n", text, times);
168
171
  struct flite_t *flite = (struct flite_t*)port;
169
172
  if(flite->w) {
170
173
  free_wave(flite->w);
171
174
  }
172
175
 
173
- flite->options = options;
176
+ flite->times = times;
174
177
 
175
178
  flite->w = flite_text_to_wave(text, flite->v);
176
179
  if ((unsigned)flite->w->sample_rate != PJMEDIA_PIA_SRATE(&port->info)) {
@@ -185,22 +188,25 @@ PJ_DEF(pj_status_t) pjmedia_flite_port_speak( pjmedia_port *port,
185
188
  // called when pjmedia needs data to be sent out
186
189
  static pj_status_t flite_get_frame(pjmedia_port *port,
187
190
  pjmedia_frame *frame) {
191
+ printf("flite_get_frame\n");
188
192
 
189
193
  PJ_ASSERT_RETURN(port && frame, PJ_EINVAL);
190
194
 
191
195
  struct flite_t *flite = (struct flite_t*)port;
192
196
 
193
- if(!flite->w) {
194
- //printf("flite no data\n");
197
+ if(flite->times <= 0 || !flite->w) {
198
+ printf("flite no data\n");
195
199
  frame->type = PJMEDIA_FRAME_TYPE_NONE;
196
200
  return PJ_SUCCESS;
197
201
  }
198
202
 
199
- //printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
203
+ printf("written_samples=%i num_samples=%i\n", flite->written_samples, flite->w->num_samples);
200
204
  if (flite->written_samples + PJMEDIA_PIA_SPF(&port->info) > (unsigned)flite->w->num_samples) {
201
205
  printf("flite end of speech\n");
202
206
 
203
- if(flite->cb) {
207
+ flite->times--;
208
+
209
+ if(flite->times <= 0 && flite->cb) {
204
210
  if (!flite->subscribed) {
205
211
  pj_status_t status = pjmedia_event_subscribe(NULL, &speech_on_event,
206
212
  flite, flite);
@@ -218,10 +224,9 @@ static pj_status_t flite_get_frame(pjmedia_port *port,
218
224
  }
219
225
  }
220
226
 
221
- pj_bool_t no_loop = (flite->options & PJMEDIA_SPEECH_NO_LOOP);
222
-
223
- if(no_loop) {
227
+ if(flite->times <= 0) {
224
228
  free_wave(flite->w);
229
+ flite->w = NULL;
225
230
  frame->type = PJMEDIA_FRAME_TYPE_NONE;
226
231
  return PJ_SUCCESS;
227
232
  } else {
@@ -232,7 +237,7 @@ static pj_status_t flite_get_frame(pjmedia_port *port,
232
237
  memcpy(frame->buf, flite->w->samples + flite->written_samples, PJMEDIA_PIA_SPF(&port->info)*2);
233
238
  flite->written_samples += PJMEDIA_PIA_SPF(&port->info);
234
239
  frame->type = PJMEDIA_FRAME_TYPE_AUDIO;
235
- //printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
240
+ printf("flite data written samples=%i\n", PJMEDIA_PIA_SPF(&port->info));
236
241
 
237
242
  return PJ_SUCCESS;
238
243
  }