sip-lab 1.27.0 → 1.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -1
- package/binding.gyp +8 -0
- package/build_deps.sh +9 -0
- package/package.json +1 -1
- package/prebuilds/linux-x64/sip-lab.node +0 -0
- package/samples/pcma.js +217 -0
- package/samples/play_wav_and_speech_recog.bad_transcript.pcmu8000.js +4 -4
- package/samples/speech_synth_and_recog.speex16000.js +6 -6
- package/samples/text_to_speech.js +5 -12
- package/src/event_templates.cpp +11 -4
- package/src/event_templates.hpp +4 -2
- package/src/pjmedia/include/pjmedia/flite_port.h +2 -2
- package/src/pjmedia/include/pjmedia/ws_speech_port.h +37 -0
- package/src/pjmedia/src/pjmedia/flite_port.c +16 -11
- package/src/pjmedia/src/pjmedia/ws_speech_port.cpp +377 -0
- package/src/sip.cpp +344 -105
package/src/sip.cpp
CHANGED
|
@@ -21,10 +21,13 @@
|
|
|
21
21
|
#include "idmanager.hpp"
|
|
22
22
|
#include "event_templates.hpp"
|
|
23
23
|
|
|
24
|
+
#include "websock.h"
|
|
25
|
+
|
|
24
26
|
#include "dtmfdet.h"
|
|
25
27
|
#include "fax_port.h"
|
|
26
28
|
#include "flite_port.h"
|
|
27
29
|
#include "pocketsphinx_port.h"
|
|
30
|
+
#include "ws_speech_port.h"
|
|
28
31
|
|
|
29
32
|
#include <ctime>
|
|
30
33
|
|
|
@@ -61,9 +64,14 @@ IdManager g_dialog_ids(IDS_MAX);
|
|
|
61
64
|
#define DEFAULT_CODEC_QUALITY (5)
|
|
62
65
|
|
|
63
66
|
static pjsip_endpoint *g_sip_endpt;
|
|
64
|
-
static pj_caching_pool
|
|
67
|
+
static pj_caching_pool g_cp;
|
|
65
68
|
static pj_pool_t *g_pool;
|
|
66
69
|
static pjmedia_endpt *g_med_endpt;
|
|
70
|
+
static pj_timer_heap_t *g_timer_heap = NULL;
|
|
71
|
+
static pj_websock_endpoint *g_ws_endpt = NULL;
|
|
72
|
+
|
|
73
|
+
#define CERT_FILE "./cert/test.pem"
|
|
74
|
+
#define CERT_KEY "./cert/test.key"
|
|
67
75
|
|
|
68
76
|
// static pj_thread_t *g_thread = NULL;
|
|
69
77
|
// static pj_bool_t g_thread_quit_flag;
|
|
@@ -303,10 +311,15 @@ struct Subscription {
|
|
|
303
311
|
bool initialized;
|
|
304
312
|
};
|
|
305
313
|
|
|
314
|
+
#define IMPLEMENTATION_FLITE 1
|
|
315
|
+
#define IMPLEMENTATION_POCKETSPHINX 2
|
|
316
|
+
#define IMPLEMENTATION_WS_SPEECH 3
|
|
317
|
+
|
|
306
318
|
struct ConfBridgePort {
|
|
307
319
|
unsigned slot;
|
|
308
320
|
pjmedia_port *port;
|
|
309
321
|
short connection_mode;
|
|
322
|
+
short implementation;
|
|
310
323
|
};
|
|
311
324
|
|
|
312
325
|
#define FP_DTMFDET 0
|
|
@@ -624,8 +637,8 @@ bool prepare_dtmfdet(Call *call, AudioEndpoint *ae);
|
|
|
624
637
|
bool prepare_wav_player(Call *call, AudioEndpoint *ae, const char *file, unsigned flags, bool end_of_file_event);
|
|
625
638
|
bool prepare_wav_writer(Call *call, AudioEndpoint *ae, const char *file);
|
|
626
639
|
bool prepare_fax(Call *call, AudioEndpoint *ae, bool is_sender, const char *file, unsigned flags);
|
|
627
|
-
bool
|
|
628
|
-
bool
|
|
640
|
+
bool prepare_speech_synth(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *voice, const char *language, const char *text, int times);
|
|
641
|
+
bool prepare_speech_recog(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *language);
|
|
629
642
|
|
|
630
643
|
void prepare_error_event(ostringstream *oss, char *scope, char *details);
|
|
631
644
|
// void prepare_pjsipcall_error_event(ostringstream *oss, char *scope, char
|
|
@@ -859,6 +872,7 @@ static void on_fax_result(pjmedia_port *port, void *user_data, int result) {
|
|
|
859
872
|
}
|
|
860
873
|
|
|
861
874
|
static void on_end_of_file(pjmedia_port *port, void *user_data) {
|
|
875
|
+
printf("on_end_of_file\n");
|
|
862
876
|
if (g_shutting_down)
|
|
863
877
|
return;
|
|
864
878
|
|
|
@@ -874,19 +888,19 @@ static void on_end_of_file(pjmedia_port *port, void *user_data) {
|
|
|
874
888
|
dispatch_event(evt);
|
|
875
889
|
}
|
|
876
890
|
|
|
877
|
-
static void
|
|
891
|
+
static void on_end_of_speech_synth(pjmedia_port *port, void *user_data) {
|
|
878
892
|
if (g_shutting_down)
|
|
879
893
|
return;
|
|
880
894
|
|
|
881
895
|
long call_id;
|
|
882
896
|
if (!g_call_ids.get_id((long)user_data, call_id)) {
|
|
883
897
|
printf(
|
|
884
|
-
"
|
|
898
|
+
"on_end_of_speech_synth: Failed to get call_id. Event will not be notified.\n");
|
|
885
899
|
return;
|
|
886
900
|
}
|
|
887
901
|
|
|
888
902
|
char evt[1024];
|
|
889
|
-
|
|
903
|
+
make_evt_speech_synth_complete(evt, sizeof(evt), call_id);
|
|
890
904
|
dispatch_event(evt);
|
|
891
905
|
}
|
|
892
906
|
|
|
@@ -903,10 +917,70 @@ static void on_speech_transcript(pjmedia_port*, void *user_data, char* transcrip
|
|
|
903
917
|
}
|
|
904
918
|
|
|
905
919
|
char evt[1024];
|
|
906
|
-
|
|
920
|
+
make_evt_speech(evt, sizeof(evt), call_id, transcript);
|
|
907
921
|
dispatch_event(evt);
|
|
908
922
|
}
|
|
909
923
|
|
|
924
|
+
static void on_ws_speech_event(pjmedia_port*, void *user_data, enum ws_speech_event e, char *data, int len) {
|
|
925
|
+
char evt[2048];
|
|
926
|
+
|
|
927
|
+
if (g_shutting_down)
|
|
928
|
+
return;
|
|
929
|
+
|
|
930
|
+
long call_id;
|
|
931
|
+
if (!g_call_ids.get_id((long)user_data, call_id)) {
|
|
932
|
+
addon_log(
|
|
933
|
+
L_DBG,
|
|
934
|
+
"on_ws_speech_event: Failed to get call_id. Event will not be notified.\n");
|
|
935
|
+
return;
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
if(e == WS_SPEECH_EVENT_TEXT_MSG) {
|
|
939
|
+
rapidjson::Document document;
|
|
940
|
+
|
|
941
|
+
// Parse the JSON string from the buffer with specified length
|
|
942
|
+
if (document.Parse(data, len).HasParseError()) {
|
|
943
|
+
addon_log(
|
|
944
|
+
L_DBG,
|
|
945
|
+
"on_ws_speech_event: Failed to parse JSON string.\n");
|
|
946
|
+
return;
|
|
947
|
+
}
|
|
948
|
+
if (!document.HasMember("evt") || !document["evt"].IsString()) {
|
|
949
|
+
make_evt_ws_speech_event(evt, sizeof(evt), call_id, data, len);
|
|
950
|
+
dispatch_event(evt);
|
|
951
|
+
return;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
if (strcmp(document["evt"].GetString(), "synth_complete") == 0) {
|
|
955
|
+
make_evt_speech_synth_complete(evt, sizeof(evt), call_id);
|
|
956
|
+
dispatch_event(evt);
|
|
957
|
+
return;
|
|
958
|
+
} else if (strcmp(document["evt"].GetString(), "speech") == 0) {
|
|
959
|
+
if (!document.HasMember("data") || !document["data"].IsObject()) {
|
|
960
|
+
make_evt_ws_speech_event(evt, sizeof(evt), call_id, data, len);
|
|
961
|
+
dispatch_event(evt);
|
|
962
|
+
return;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
const rapidjson::Value& evt_data = document["data"];
|
|
966
|
+
|
|
967
|
+
if (!evt_data.HasMember("transcript") || !evt_data["transcript"].IsString()) {
|
|
968
|
+
make_evt_ws_speech_event(evt, sizeof(evt), call_id, data, len);
|
|
969
|
+
dispatch_event(evt);
|
|
970
|
+
return;
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
make_evt_speech(evt, sizeof(evt), call_id, (char*)evt_data["transcript"].GetString());
|
|
974
|
+
dispatch_event(evt);
|
|
975
|
+
return;
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
make_evt_ws_speech_event(evt, sizeof(evt), call_id, data, len);
|
|
980
|
+
dispatch_event(evt);
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
|
|
910
984
|
void dispatch_event(const char *evt) {
|
|
911
985
|
addon_log(L_DBG, "dispach_event called with evt=%s\n", evt);
|
|
912
986
|
// g_event_sink(evt);
|
|
@@ -1233,27 +1307,37 @@ int __pjw_init() {
|
|
|
1233
1307
|
return 1;
|
|
1234
1308
|
}
|
|
1235
1309
|
|
|
1310
|
+
unsigned log_decor = pj_log_get_decor();
|
|
1311
|
+
log_decor |= PJ_LOG_HAS_LEVEL_TEXT;
|
|
1312
|
+
log_decor |= PJ_LOG_HAS_SENDER;
|
|
1313
|
+
pj_log_set_decor(log_decor);
|
|
1314
|
+
|
|
1315
|
+
|
|
1236
1316
|
status = pjlib_util_init();
|
|
1237
1317
|
if (status != PJ_SUCCESS) {
|
|
1238
1318
|
addon_log(L_DBG, "pj_lib_util_init failed\n");
|
|
1239
1319
|
return 1;
|
|
1240
1320
|
}
|
|
1241
1321
|
|
|
1322
|
+
pj_time_val now;
|
|
1323
|
+
pj_gettimeofday(&now);
|
|
1324
|
+
pj_srand((unsigned)now.sec);
|
|
1325
|
+
|
|
1242
1326
|
pthread_mutex_init(&g_mutex, NULL);
|
|
1243
1327
|
|
|
1244
1328
|
pj_log_set_level(0);
|
|
1245
1329
|
|
|
1246
|
-
pj_caching_pool_init(&
|
|
1330
|
+
pj_caching_pool_init(&g_cp, &pj_pool_factory_default_policy, 0);
|
|
1247
1331
|
|
|
1248
1332
|
char *sip_endpt_name = (char *)"mysip";
|
|
1249
1333
|
|
|
1250
|
-
status = pjsip_endpt_create(&
|
|
1334
|
+
status = pjsip_endpt_create(&g_cp.factory, sip_endpt_name, &g_sip_endpt);
|
|
1251
1335
|
if (status != PJ_SUCCESS) {
|
|
1252
1336
|
addon_log(L_DBG, "pjsip_endpt_create failed\n");
|
|
1253
1337
|
return 1;
|
|
1254
1338
|
}
|
|
1255
1339
|
|
|
1256
|
-
g_pool = pj_pool_create(&
|
|
1340
|
+
g_pool = pj_pool_create(&g_cp.factory, "tester", 1000, 1000, NULL);
|
|
1257
1341
|
|
|
1258
1342
|
/* Create event manager */
|
|
1259
1343
|
status = pjmedia_event_mgr_create(g_pool, 0, NULL);
|
|
@@ -1352,10 +1436,10 @@ int __pjw_init() {
|
|
|
1352
1436
|
return 1;
|
|
1353
1437
|
}
|
|
1354
1438
|
#if PJ_HAS_THREADS
|
|
1355
|
-
status = pjmedia_endpt_create2(&
|
|
1439
|
+
status = pjmedia_endpt_create2(&g_cp.factory, NULL, 1, &g_med_endpt);
|
|
1356
1440
|
#else
|
|
1357
1441
|
status = pjmedia_endpt_create2(
|
|
1358
|
-
&
|
|
1442
|
+
&g_cp.factory, pjsip_endpt_get_ioqueue(g_sip_endpt), 0, &g_med_endpt);
|
|
1359
1443
|
#endif
|
|
1360
1444
|
if (status != PJ_SUCCESS) {
|
|
1361
1445
|
addon_log(L_DBG, "pjmedia_endpt_create failed\n");
|
|
@@ -1448,6 +1532,33 @@ int __pjw_init() {
|
|
|
1448
1532
|
return 1;
|
|
1449
1533
|
}
|
|
1450
1534
|
|
|
1535
|
+
status = pj_timer_heap_create(g_pool, 128, &g_timer_heap);
|
|
1536
|
+
if (status != PJ_SUCCESS) {
|
|
1537
|
+
addon_log(L_DBG, "create timer heap error");
|
|
1538
|
+
return 1;
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
|
|
1542
|
+
pj_websock_ssl_cert cert;
|
|
1543
|
+
pj_bzero(&cert, sizeof(cert));
|
|
1544
|
+
cert.ca_file = pj_str(CERT_FILE);
|
|
1545
|
+
cert.cert_file = pj_str(CERT_FILE);
|
|
1546
|
+
cert.private_file = pj_str(CERT_KEY);
|
|
1547
|
+
|
|
1548
|
+
pj_websock_endpt_cfg opt;
|
|
1549
|
+
pj_websock_endpt_cfg_default(&opt);
|
|
1550
|
+
opt.pf = &g_cp.factory;
|
|
1551
|
+
opt.ioq = pjsip_endpt_get_ioqueue(g_sip_endpt);
|
|
1552
|
+
opt.timer_heap = g_timer_heap;
|
|
1553
|
+
opt.cert = &cert;
|
|
1554
|
+
opt.async_cnt = 3;
|
|
1555
|
+
|
|
1556
|
+
status = pj_websock_endpt_create(&opt, &g_ws_endpt);
|
|
1557
|
+
if (status != PJ_SUCCESS) {
|
|
1558
|
+
addon_log(L_DBG, "create websock endpoint error");
|
|
1559
|
+
return 1;
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1451
1562
|
return 0;
|
|
1452
1563
|
}
|
|
1453
1564
|
|
|
@@ -1510,8 +1621,8 @@ pjsip_transport *create_udp_transport(pjsip_endpoint *sip_endpt,
|
|
|
1510
1621
|
// pj_status_t status;
|
|
1511
1622
|
pjsip_transport *transport;
|
|
1512
1623
|
|
|
1513
|
-
int port = 5060;
|
|
1514
1624
|
for (int i = 0; i < 1000; ++i) {
|
|
1625
|
+
int port = 5060;
|
|
1515
1626
|
port += i;
|
|
1516
1627
|
transport = allocate_udp_transport(sip_endpt, ipaddr, port);
|
|
1517
1628
|
if (transport) {
|
|
@@ -1558,8 +1669,8 @@ pjsip_tpfactory *create_tcp_tpfactory(pjsip_endpoint *sip_endpt,
|
|
|
1558
1669
|
// pj_status_t status;
|
|
1559
1670
|
pjsip_tpfactory *tpfactory;
|
|
1560
1671
|
|
|
1561
|
-
int port = 6060;
|
|
1562
1672
|
for (int i = 0; i < 1000; ++i) {
|
|
1673
|
+
int port = 6060;
|
|
1563
1674
|
port += i;
|
|
1564
1675
|
tpfactory = allocate_tcp_tpfactory(sip_endpt, ipaddr, port);
|
|
1565
1676
|
if (tpfactory) {
|
|
@@ -1609,8 +1720,8 @@ pjsip_tpfactory *create_tls_tpfactory(pjsip_endpoint *sip_endpt,
|
|
|
1609
1720
|
// pj_status_t status;
|
|
1610
1721
|
pjsip_tpfactory *tpfactory;
|
|
1611
1722
|
|
|
1612
|
-
int port = 6060;
|
|
1613
1723
|
for (int i = 0; i < 1000; ++i) {
|
|
1724
|
+
int port = 6060;
|
|
1614
1725
|
port += i;
|
|
1615
1726
|
tpfactory = allocate_tls_tpfactory(sip_endpt, ipaddr, port);
|
|
1616
1727
|
if (tpfactory) {
|
|
@@ -3815,7 +3926,7 @@ out:
|
|
|
3815
3926
|
return 0;
|
|
3816
3927
|
}
|
|
3817
3928
|
|
|
3818
|
-
pj_status_t audio_endpoint_start_speech_synth(Call *call, AudioEndpoint *ae, const char * voice, const char *
|
|
3929
|
+
pj_status_t audio_endpoint_start_speech_synth(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *voice, const char *language, const char *text, int times) {
|
|
3819
3930
|
pj_status_t status;
|
|
3820
3931
|
|
|
3821
3932
|
if(!ae->stream_cbp.port) {
|
|
@@ -3829,12 +3940,10 @@ pj_status_t audio_endpoint_start_speech_synth(Call *call, AudioEndpoint *ae, con
|
|
|
3829
3940
|
return -1;
|
|
3830
3941
|
}
|
|
3831
3942
|
|
|
3832
|
-
if (!
|
|
3943
|
+
if (!prepare_speech_synth(call, ae, server_url, engine, voice, language, text, times)) {
|
|
3833
3944
|
return -1;
|
|
3834
3945
|
}
|
|
3835
3946
|
|
|
3836
|
-
pjmedia_flite_port_speak(ae->feature_cbps[FP_SPEECH_SYNTH].port, text, flags);
|
|
3837
|
-
|
|
3838
3947
|
return PJ_SUCCESS;
|
|
3839
3948
|
}
|
|
3840
3949
|
|
|
@@ -3854,21 +3963,23 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3854
3963
|
|
|
3855
3964
|
int media_id = -1;
|
|
3856
3965
|
|
|
3857
|
-
char *
|
|
3966
|
+
char *server_url = NULL;
|
|
3858
3967
|
|
|
3859
|
-
char *
|
|
3968
|
+
char *engine = NULL;
|
|
3860
3969
|
|
|
3861
|
-
|
|
3970
|
+
char *voice = NULL;
|
|
3862
3971
|
|
|
3863
|
-
|
|
3972
|
+
char *language = NULL;
|
|
3864
3973
|
|
|
3865
|
-
|
|
3974
|
+
char *text;
|
|
3975
|
+
|
|
3976
|
+
int times = 1;
|
|
3866
3977
|
|
|
3867
3978
|
char buffer[MAX_JSON_INPUT];
|
|
3868
3979
|
|
|
3869
3980
|
Document document;
|
|
3870
3981
|
|
|
3871
|
-
const char *valid_params[] = {"voice", "
|
|
3982
|
+
const char *valid_params[] = {"server_url", "engine", "voice", "language", "text", "times", "media_id", ""};
|
|
3872
3983
|
|
|
3873
3984
|
if (!g_call_ids.get(call_id, val)) {
|
|
3874
3985
|
set_error("Invalid call_id");
|
|
@@ -3891,6 +4002,14 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3891
4002
|
goto out;
|
|
3892
4003
|
}
|
|
3893
4004
|
|
|
4005
|
+
if (json_get_string_param(document, "server_url", true, &server_url) <= 0) {
|
|
4006
|
+
goto out;
|
|
4007
|
+
}
|
|
4008
|
+
|
|
4009
|
+
if (json_get_string_param(document, "engine", true, &engine) <= 0) {
|
|
4010
|
+
goto out;
|
|
4011
|
+
}
|
|
4012
|
+
|
|
3894
4013
|
if (json_get_string_param(document, "voice", false, &voice) <= 0) {
|
|
3895
4014
|
goto out;
|
|
3896
4015
|
}
|
|
@@ -3900,6 +4019,10 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3900
4019
|
goto out;
|
|
3901
4020
|
}
|
|
3902
4021
|
|
|
4022
|
+
if (json_get_string_param(document, "language", true, &language) <= 0) {
|
|
4023
|
+
goto out;
|
|
4024
|
+
}
|
|
4025
|
+
|
|
3903
4026
|
if (json_get_string_param(document, "text", false, &text) <= 0) {
|
|
3904
4027
|
goto out;
|
|
3905
4028
|
}
|
|
@@ -3909,21 +4032,31 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3909
4032
|
goto out;
|
|
3910
4033
|
}
|
|
3911
4034
|
|
|
3912
|
-
if (
|
|
4035
|
+
if (json_get_int_param(document, "times", true, ×) <= 0) {
|
|
3913
4036
|
goto out;
|
|
3914
4037
|
}
|
|
3915
4038
|
|
|
3916
|
-
|
|
4039
|
+
res = json_get_int_param(document, "media_id", true, &media_id);
|
|
4040
|
+
if (res <= 0) {
|
|
3917
4041
|
goto out;
|
|
3918
4042
|
}
|
|
3919
4043
|
|
|
3920
|
-
if(
|
|
3921
|
-
|
|
3922
|
-
|
|
4044
|
+
if(server_url || engine || language) {
|
|
4045
|
+
// If any is set then all must be set and must be non-empty string (required by ws_speech_port)
|
|
4046
|
+
if(!server_url || !server_url[0]) {
|
|
4047
|
+
set_error("server_url must be present and cannot be empty string");
|
|
4048
|
+
goto out;
|
|
4049
|
+
}
|
|
3923
4050
|
|
|
3924
|
-
|
|
3925
|
-
|
|
3926
|
-
|
|
4051
|
+
if(!engine || !engine[0]) {
|
|
4052
|
+
set_error("engine must be present and cannot be empty string");
|
|
4053
|
+
goto out;
|
|
4054
|
+
}
|
|
4055
|
+
|
|
4056
|
+
if(!language || !language[0]) {
|
|
4057
|
+
set_error("language must be present and cannot be empty string");
|
|
4058
|
+
goto out;
|
|
4059
|
+
}
|
|
3927
4060
|
}
|
|
3928
4061
|
|
|
3929
4062
|
if (NOT_FOUND_OPTIONAL == res) {
|
|
@@ -3932,7 +4065,7 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3932
4065
|
MediaEndpoint *me = (MediaEndpoint *)call->media[i];
|
|
3933
4066
|
if (me->type == ENDPOINT_TYPE_AUDIO) {
|
|
3934
4067
|
AudioEndpoint *ae = (AudioEndpoint *)me->endpoint.audio;
|
|
3935
|
-
status = audio_endpoint_start_speech_synth(call, ae, voice,
|
|
4068
|
+
status = audio_endpoint_start_speech_synth(call, ae, server_url, engine, voice, language, text, times);
|
|
3936
4069
|
if (status != PJ_SUCCESS) goto out;
|
|
3937
4070
|
}
|
|
3938
4071
|
}
|
|
@@ -3950,7 +4083,7 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3950
4083
|
|
|
3951
4084
|
ae = (AudioEndpoint *)me->endpoint.audio;
|
|
3952
4085
|
|
|
3953
|
-
audio_endpoint_start_speech_synth(call, ae, voice,
|
|
4086
|
+
audio_endpoint_start_speech_synth(call, ae, server_url, engine, voice, language, text, times);
|
|
3954
4087
|
}
|
|
3955
4088
|
|
|
3956
4089
|
out:
|
|
@@ -3962,7 +4095,7 @@ out:
|
|
|
3962
4095
|
return 0;
|
|
3963
4096
|
}
|
|
3964
4097
|
|
|
3965
|
-
pj_status_t audio_endpoint_start_speech_recog(Call *call, AudioEndpoint *ae) {
|
|
4098
|
+
pj_status_t audio_endpoint_start_speech_recog(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *language) {
|
|
3966
4099
|
pj_status_t status;
|
|
3967
4100
|
|
|
3968
4101
|
if(!ae->stream_cbp.port) {
|
|
@@ -3976,7 +4109,7 @@ pj_status_t audio_endpoint_start_speech_recog(Call *call, AudioEndpoint *ae) {
|
|
|
3976
4109
|
return -1;
|
|
3977
4110
|
}
|
|
3978
4111
|
|
|
3979
|
-
if (!
|
|
4112
|
+
if (!prepare_speech_recog(call, ae, server_url, engine, language)) {
|
|
3980
4113
|
return -1;
|
|
3981
4114
|
}
|
|
3982
4115
|
|
|
@@ -3999,21 +4132,17 @@ int pjw_call_start_speech_recog(long call_id, const char *json) {
|
|
|
3999
4132
|
|
|
4000
4133
|
int media_id = -1;
|
|
4001
4134
|
|
|
4002
|
-
char *
|
|
4003
|
-
|
|
4004
|
-
char *text;
|
|
4005
|
-
|
|
4006
|
-
bool end_of_speech_event = false;
|
|
4135
|
+
char *server_url = NULL;
|
|
4007
4136
|
|
|
4008
|
-
|
|
4137
|
+
char *engine = NULL;
|
|
4009
4138
|
|
|
4010
|
-
|
|
4139
|
+
char *language = NULL;
|
|
4011
4140
|
|
|
4012
4141
|
char buffer[MAX_JSON_INPUT];
|
|
4013
4142
|
|
|
4014
4143
|
Document document;
|
|
4015
4144
|
|
|
4016
|
-
const char *valid_params[] = {"media_id", ""};
|
|
4145
|
+
const char *valid_params[] = {"server_url", "engine", "language", "media_id", ""};
|
|
4017
4146
|
|
|
4018
4147
|
if (!g_call_ids.get(call_id, val)) {
|
|
4019
4148
|
set_error("Invalid call_id");
|
|
@@ -4036,18 +4165,48 @@ int pjw_call_start_speech_recog(long call_id, const char *json) {
|
|
|
4036
4165
|
goto out;
|
|
4037
4166
|
}
|
|
4038
4167
|
|
|
4168
|
+
if (json_get_string_param(document, "server_url", true, &server_url) <= 0) {
|
|
4169
|
+
goto out;
|
|
4170
|
+
}
|
|
4171
|
+
|
|
4172
|
+
if (json_get_string_param(document, "engine", true, &engine) <= 0) {
|
|
4173
|
+
goto out;
|
|
4174
|
+
}
|
|
4175
|
+
|
|
4176
|
+
if (json_get_string_param(document, "language", true, &language) <= 0) {
|
|
4177
|
+
goto out;
|
|
4178
|
+
}
|
|
4179
|
+
|
|
4039
4180
|
res = json_get_int_param(document, "media_id", true, &media_id);
|
|
4040
4181
|
if (res <= 0) {
|
|
4041
4182
|
goto out;
|
|
4042
4183
|
}
|
|
4043
4184
|
|
|
4185
|
+
if(server_url || engine || language) {
|
|
4186
|
+
// If any is set then all must be set and must be non-empty string (required by ws_speech_port)
|
|
4187
|
+
if(!server_url || !server_url[0]) {
|
|
4188
|
+
set_error("server_url must be present and cannot be empty string");
|
|
4189
|
+
goto out;
|
|
4190
|
+
}
|
|
4191
|
+
|
|
4192
|
+
if(!engine || !engine[0]) {
|
|
4193
|
+
set_error("engine must be present and cannot be empty string");
|
|
4194
|
+
goto out;
|
|
4195
|
+
}
|
|
4196
|
+
|
|
4197
|
+
if(!language || !language[0]) {
|
|
4198
|
+
set_error("language must be present and cannot be empty string");
|
|
4199
|
+
goto out;
|
|
4200
|
+
}
|
|
4201
|
+
}
|
|
4202
|
+
|
|
4044
4203
|
if (NOT_FOUND_OPTIONAL == res) {
|
|
4045
4204
|
// start on all audio media endpoints
|
|
4046
4205
|
for (int i = 0; i < call->media_count; i++) {
|
|
4047
4206
|
MediaEndpoint *me = (MediaEndpoint *)call->media[i];
|
|
4048
4207
|
if (me->type == ENDPOINT_TYPE_AUDIO) {
|
|
4049
4208
|
AudioEndpoint *ae = (AudioEndpoint *)me->endpoint.audio;
|
|
4050
|
-
status = audio_endpoint_start_speech_recog(call, ae);
|
|
4209
|
+
status = audio_endpoint_start_speech_recog(call, ae, server_url, engine, language);
|
|
4051
4210
|
if (status != PJ_SUCCESS) goto out;
|
|
4052
4211
|
}
|
|
4053
4212
|
}
|
|
@@ -4065,7 +4224,7 @@ int pjw_call_start_speech_recog(long call_id, const char *json) {
|
|
|
4065
4224
|
|
|
4066
4225
|
ae = (AudioEndpoint *)me->endpoint.audio;
|
|
4067
4226
|
|
|
4068
|
-
audio_endpoint_start_speech_recog(call, ae);
|
|
4227
|
+
audio_endpoint_start_speech_recog(call, ae, server_url, engine, language);
|
|
4069
4228
|
}
|
|
4070
4229
|
|
|
4071
4230
|
out:
|
|
@@ -4682,12 +4841,6 @@ bool start_tcp_media(Call *call, MediaEndpoint *me,
|
|
|
4682
4841
|
void close_audio_endpoint_ports_and_conf(Call *call, AudioEndpoint *ae) {
|
|
4683
4842
|
pj_status_t status;
|
|
4684
4843
|
|
|
4685
|
-
audio_endpoint_remove_port(call, ae, &ae->stream_cbp);
|
|
4686
|
-
|
|
4687
|
-
for(int i=0 ; i<MAX_FP ; i++) {
|
|
4688
|
-
audio_endpoint_remove_port(call, ae, &ae->feature_cbps[i]);
|
|
4689
|
-
}
|
|
4690
|
-
|
|
4691
4844
|
if (ae->master_port) {
|
|
4692
4845
|
status = pjmedia_master_port_stop(ae->master_port);
|
|
4693
4846
|
if(status != PJ_SUCCESS) {
|
|
@@ -4700,6 +4853,12 @@ void close_audio_endpoint_ports_and_conf(Call *call, AudioEndpoint *ae) {
|
|
|
4700
4853
|
ae->master_port = NULL;
|
|
4701
4854
|
}
|
|
4702
4855
|
|
|
4856
|
+
audio_endpoint_remove_port(call, ae, &ae->stream_cbp);
|
|
4857
|
+
|
|
4858
|
+
for(int i=0 ; i<MAX_FP ; i++) {
|
|
4859
|
+
audio_endpoint_remove_port(call, ae, &ae->feature_cbps[i]);
|
|
4860
|
+
}
|
|
4861
|
+
|
|
4703
4862
|
if (ae->conf) {
|
|
4704
4863
|
status = pjmedia_conf_destroy(ae->conf);
|
|
4705
4864
|
if(status != PJ_SUCCESS) {
|
|
@@ -4751,6 +4910,19 @@ bool restart_media_stream(Call *call, MediaEndpoint *me,
|
|
|
4751
4910
|
pjmedia_port *old_port = ae->stream_cbp.port;
|
|
4752
4911
|
pjmedia_port *new_port;
|
|
4753
4912
|
|
|
4913
|
+
bool master_port_was_stopped = false;
|
|
4914
|
+
|
|
4915
|
+
if(ae->master_port) {
|
|
4916
|
+
status = pjmedia_master_port_stop(ae->master_port);
|
|
4917
|
+
if(status != PJ_SUCCESS) {
|
|
4918
|
+
make_evt_media_update(evt, sizeof(evt), call->id,
|
|
4919
|
+
"setup_failed (pjmedia_master_port_stop failed)", "");
|
|
4920
|
+
dispatch_event(evt);
|
|
4921
|
+
return false;
|
|
4922
|
+
}
|
|
4923
|
+
master_port_was_stopped = true;
|
|
4924
|
+
}
|
|
4925
|
+
|
|
4754
4926
|
status =
|
|
4755
4927
|
pjmedia_stream_info_from_sdp(&stream_info, call->inv->dlg->pool,
|
|
4756
4928
|
g_med_endpt, local_sdp, remote_sdp, idx);
|
|
@@ -4906,7 +5078,17 @@ bool restart_media_stream(Call *call, MediaEndpoint *me,
|
|
|
4906
5078
|
}
|
|
4907
5079
|
}
|
|
4908
5080
|
}
|
|
4909
|
-
|
|
5081
|
+
|
|
5082
|
+
if(master_port_was_stopped) {
|
|
5083
|
+
status = pjmedia_master_port_start(ae->master_port);
|
|
5084
|
+
if(status != PJ_SUCCESS) {
|
|
5085
|
+
make_evt_media_update(evt, sizeof(evt), call->id,
|
|
5086
|
+
"setup_failed (pjmedia_master_port_start failed)", "");
|
|
5087
|
+
dispatch_event(evt);
|
|
5088
|
+
return false;
|
|
5089
|
+
}
|
|
5090
|
+
}
|
|
5091
|
+
|
|
4910
5092
|
return true;
|
|
4911
5093
|
}
|
|
4912
5094
|
|
|
@@ -5037,19 +5219,6 @@ static void on_media_update(pjsip_inv_session *inv, pj_status_t status) {
|
|
|
5037
5219
|
static void on_state_changed(pjsip_inv_session *inv, pjsip_event *e) {
|
|
5038
5220
|
addon_log(L_DBG, "on_state_changed\n");
|
|
5039
5221
|
|
|
5040
|
-
// The below is just to document know-how for future improvements
|
|
5041
|
-
/*
|
|
5042
|
-
addon_log(L_DBG, "on_state_changed e->type=%i\n", e->type);
|
|
5043
|
-
if(e->type == PJSIP_EVENT_TSX_STATE && e->body.tsx_state.type ==
|
|
5044
|
-
PJSIP_EVENT_RX_MSG) {
|
|
5045
|
-
// Read http://trac.pjsip.org/repos/wiki/SIP_Message_Buffer_Event
|
|
5046
|
-
addon_log(L_DBG, "Msg=%s\n",
|
|
5047
|
-
e->body.tsx_state.src.rdata->msg_info.msg_buf);
|
|
5048
|
-
}
|
|
5049
|
-
*/
|
|
5050
|
-
|
|
5051
|
-
printf("e->type=%d\n", e->type);
|
|
5052
|
-
|
|
5053
5222
|
/*
|
|
5054
5223
|
pj_str_t *method_name = &rdata->msg_info.msg->line.req.method.name;
|
|
5055
5224
|
addon_log(L_DBG, "on_rx_request %.*s\n", method_name->slen,
|
|
@@ -5107,7 +5276,7 @@ static void on_state_changed(pjsip_inv_session *inv, pjsip_event *e) {
|
|
|
5107
5276
|
char evt[2048];
|
|
5108
5277
|
int sip_msg_len = 0;
|
|
5109
5278
|
char *sip_msg = (char *)"";
|
|
5110
|
-
if
|
|
5279
|
+
if(e->type == PJSIP_EVENT_TSX_STATE && e->body.tsx_state.type == PJSIP_EVENT_RX_MSG) {
|
|
5111
5280
|
sip_msg_len = e->body.rx_msg.rdata->msg_info.len;
|
|
5112
5281
|
sip_msg = e->body.rx_msg.rdata->msg_info.msg_buf;
|
|
5113
5282
|
}
|
|
@@ -6682,8 +6851,6 @@ bool is_media_active(Call *call, MediaEndpoint *me) {
|
|
|
6682
6851
|
void close_media_endpoint(Call *call, MediaEndpoint *me) {
|
|
6683
6852
|
printf("close_media_endpoint %p\n", (void*)me);
|
|
6684
6853
|
|
|
6685
|
-
pj_status_t status;
|
|
6686
|
-
|
|
6687
6854
|
if(!me) return;
|
|
6688
6855
|
|
|
6689
6856
|
if (ENDPOINT_TYPE_AUDIO == me->type) {
|
|
@@ -6878,7 +7045,7 @@ bool prepare_fax(Call *call, AudioEndpoint *ae, bool is_sender, const char *file
|
|
|
6878
7045
|
return connect_feature_port_to_stream_port(call, ae, fp);
|
|
6879
7046
|
}
|
|
6880
7047
|
|
|
6881
|
-
bool
|
|
7048
|
+
bool prepare_speech_synth(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *voice, const char *language, const char *text, int times) {
|
|
6882
7049
|
pj_status_t status;
|
|
6883
7050
|
|
|
6884
7051
|
ConfBridgePort *fp = &ae->feature_cbps[FP_SPEECH_SYNTH];
|
|
@@ -6888,38 +7055,85 @@ bool prepare_flite(Call *call, AudioEndpoint *ae, const char *voice, bool end_of
|
|
|
6888
7055
|
return true;
|
|
6889
7056
|
}
|
|
6890
7057
|
|
|
6891
|
-
|
|
6892
|
-
|
|
6893
|
-
|
|
6894
|
-
|
|
6895
|
-
|
|
6896
|
-
|
|
6897
|
-
|
|
6898
|
-
|
|
6899
|
-
|
|
6900
|
-
|
|
6901
|
-
|
|
7058
|
+
if(!server_url) {
|
|
7059
|
+
status = pjmedia_flite_port_create(
|
|
7060
|
+
call->inv->pool,
|
|
7061
|
+
PJMEDIA_PIA_SRATE(&ae->stream_cbp.port->info),
|
|
7062
|
+
PJMEDIA_PIA_CCNT(&ae->stream_cbp.port->info),
|
|
7063
|
+
PJMEDIA_PIA_SPF(&ae->stream_cbp.port->info),
|
|
7064
|
+
PJMEDIA_PIA_BITS(&ae->stream_cbp.port->info),
|
|
7065
|
+
voice,
|
|
7066
|
+
&fp->port);
|
|
7067
|
+
if (status != PJ_SUCCESS) {
|
|
7068
|
+
set_error("pjmedia_flite_port_create failed");
|
|
7069
|
+
return false;
|
|
7070
|
+
}
|
|
6902
7071
|
|
|
6903
|
-
|
|
6904
|
-
status = pjmedia_flite_port_set_eof_cb(fp->port, (void*)call, on_end_of_speech);
|
|
7072
|
+
status = pjmedia_flite_port_set_eof_cb(fp->port, (void*)call, on_end_of_speech_synth);
|
|
6905
7073
|
if (status != PJ_SUCCESS) {
|
|
6906
7074
|
set_error("pjmedia_flite_port_set_eof_cb failed");
|
|
6907
7075
|
return false;
|
|
6908
7076
|
}
|
|
6909
|
-
}
|
|
6910
7077
|
|
|
6911
|
-
|
|
6912
|
-
|
|
6913
|
-
|
|
6914
|
-
|
|
6915
|
-
|
|
7078
|
+
status = pjmedia_conf_add_port(ae->conf, call->inv->pool, fp->port, NULL, &fp->slot);
|
|
7079
|
+
if (status != PJ_SUCCESS) {
|
|
7080
|
+
set_error("pjmedia_conf_add_port failed");
|
|
7081
|
+
return false;
|
|
7082
|
+
}
|
|
6916
7083
|
|
|
6917
|
-
|
|
7084
|
+
fp->connection_mode = CONNECTION_MODE_SOURCE;
|
|
6918
7085
|
|
|
6919
|
-
|
|
7086
|
+
if(!connect_feature_port_to_stream_port(call, ae, fp)) {
|
|
7087
|
+
return false;
|
|
7088
|
+
}
|
|
7089
|
+
|
|
7090
|
+
printf("calling pjmedia_flite_port_speak\n");
|
|
7091
|
+
pjmedia_flite_port_speak(ae->feature_cbps[FP_SPEECH_SYNTH].port, text, times);
|
|
7092
|
+
|
|
7093
|
+
ae->feature_cbps[FP_SPEECH_SYNTH].implementation = IMPLEMENTATION_FLITE;
|
|
7094
|
+
} else {
|
|
7095
|
+
status = pjmedia_ws_speech_port_create(
|
|
7096
|
+
call->inv->pool,
|
|
7097
|
+
PJMEDIA_PIA_SRATE(&ae->stream_cbp.port->info),
|
|
7098
|
+
PJMEDIA_PIA_CCNT(&ae->stream_cbp.port->info),
|
|
7099
|
+
PJMEDIA_PIA_SPF(&ae->stream_cbp.port->info),
|
|
7100
|
+
PJMEDIA_PIA_BITS(&ae->stream_cbp.port->info),
|
|
7101
|
+
g_ws_endpt,
|
|
7102
|
+
server_url,
|
|
7103
|
+
engine,
|
|
7104
|
+
voice,
|
|
7105
|
+
language,
|
|
7106
|
+
text,
|
|
7107
|
+
times,
|
|
7108
|
+
NULL,
|
|
7109
|
+
NULL,
|
|
7110
|
+
on_ws_speech_event,
|
|
7111
|
+
call,
|
|
7112
|
+
&fp->port);
|
|
7113
|
+
if (status != PJ_SUCCESS) {
|
|
7114
|
+
set_error("pjmedia_ws_speech_port_create for synth failed");
|
|
7115
|
+
return false;
|
|
7116
|
+
}
|
|
7117
|
+
|
|
7118
|
+
status = pjmedia_conf_add_port(ae->conf, call->inv->pool, fp->port, NULL, &fp->slot);
|
|
7119
|
+
if (status != PJ_SUCCESS) {
|
|
7120
|
+
set_error("pjmedia_conf_add_port failed");
|
|
7121
|
+
return false;
|
|
7122
|
+
}
|
|
7123
|
+
|
|
7124
|
+
fp->connection_mode = CONNECTION_MODE_SOURCE;
|
|
7125
|
+
|
|
7126
|
+
if(!connect_feature_port_to_stream_port(call, ae, fp)) {
|
|
7127
|
+
return false;
|
|
7128
|
+
}
|
|
7129
|
+
|
|
7130
|
+
ae->feature_cbps[FP_SPEECH_SYNTH].implementation = IMPLEMENTATION_WS_SPEECH;
|
|
7131
|
+
}
|
|
7132
|
+
|
|
7133
|
+
return PJ_SUCCESS;
|
|
6920
7134
|
}
|
|
6921
7135
|
|
|
6922
|
-
bool
|
|
7136
|
+
bool prepare_speech_recog(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *language) {
|
|
6923
7137
|
pj_status_t status;
|
|
6924
7138
|
|
|
6925
7139
|
ConfBridgePort *fp = &ae->feature_cbps[FP_SPEECH_RECOG];
|
|
@@ -6929,17 +7143,42 @@ bool prepare_pocketsphinx(Call *call, AudioEndpoint *ae) {
|
|
|
6929
7143
|
return true;
|
|
6930
7144
|
}
|
|
6931
7145
|
|
|
6932
|
-
|
|
6933
|
-
|
|
6934
|
-
|
|
6935
|
-
|
|
6936
|
-
|
|
6937
|
-
|
|
6938
|
-
|
|
6939
|
-
|
|
6940
|
-
|
|
6941
|
-
|
|
6942
|
-
|
|
7146
|
+
if(!server_url) {
|
|
7147
|
+
status = pjmedia_pocketsphinx_port_create(
|
|
7148
|
+
call->inv->pool, PJMEDIA_PIA_SRATE(&ae->stream_cbp.port->info),
|
|
7149
|
+
PJMEDIA_PIA_CCNT(&ae->stream_cbp.port->info),
|
|
7150
|
+
PJMEDIA_PIA_SPF(&ae->stream_cbp.port->info),
|
|
7151
|
+
PJMEDIA_PIA_BITS(&ae->stream_cbp.port->info),
|
|
7152
|
+
on_speech_transcript,
|
|
7153
|
+
call,
|
|
7154
|
+
&fp->port);
|
|
7155
|
+
if (status != PJ_SUCCESS) {
|
|
7156
|
+
set_error("pjmedia_pocketsphinx_port_create failed");
|
|
7157
|
+
return false;
|
|
7158
|
+
}
|
|
7159
|
+
} else {
|
|
7160
|
+
status = pjmedia_ws_speech_port_create(
|
|
7161
|
+
call->inv->pool,
|
|
7162
|
+
PJMEDIA_PIA_SRATE(&ae->stream_cbp.port->info),
|
|
7163
|
+
PJMEDIA_PIA_CCNT(&ae->stream_cbp.port->info),
|
|
7164
|
+
PJMEDIA_PIA_SPF(&ae->stream_cbp.port->info),
|
|
7165
|
+
PJMEDIA_PIA_BITS(&ae->stream_cbp.port->info),
|
|
7166
|
+
g_ws_endpt,
|
|
7167
|
+
server_url,
|
|
7168
|
+
NULL,
|
|
7169
|
+
NULL,
|
|
7170
|
+
NULL,
|
|
7171
|
+
NULL,
|
|
7172
|
+
NULL,
|
|
7173
|
+
engine,
|
|
7174
|
+
language,
|
|
7175
|
+
on_ws_speech_event,
|
|
7176
|
+
call,
|
|
7177
|
+
&fp->port);
|
|
7178
|
+
if (status != PJ_SUCCESS) {
|
|
7179
|
+
set_error("pjmedia_ws_speech_port_create for recog failed");
|
|
7180
|
+
return false;
|
|
7181
|
+
}
|
|
6943
7182
|
}
|
|
6944
7183
|
|
|
6945
7184
|
status = pjmedia_conf_add_port(ae->conf, call->inv->pool, fp->port, NULL, &fp->slot);
|
|
@@ -8394,7 +8633,7 @@ static int digit_buffer_thread(void *arg) {
|
|
|
8394
8633
|
bool start_digit_buffer_thread() {
|
|
8395
8634
|
pj_status_t status;
|
|
8396
8635
|
pj_pool_t *pool =
|
|
8397
|
-
pj_pool_create(&
|
|
8636
|
+
pj_pool_create(&g_cp.factory, "digit_buffer_checker", 1000, 1000, NULL);
|
|
8398
8637
|
pj_thread_t *t;
|
|
8399
8638
|
status = pj_thread_create(pool, "digit_buffer_checker", &digit_buffer_thread,
|
|
8400
8639
|
NULL, 0, 0, &t);
|