sip-lab 1.27.1 → 1.28.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -2
- package/binding.gyp +8 -0
- package/build_deps.sh +9 -0
- package/package.json +2 -1
- package/prebuilds/linux-x64/sip-lab.node +0 -0
- package/samples/play_wav_and_speech_recog.bad_transcript.pcmu8000.js +4 -4
- package/samples/speech_synth_and_recog.speex16000.js +6 -6
- package/samples/text_to_speech.js +5 -12
- package/samples_extra/ws_speech_server.dtmf.js +194 -0
- package/samples_extra/ws_speech_server.google.js +190 -0
- package/src/event_templates.cpp +11 -4
- package/src/event_templates.hpp +4 -2
- package/src/pjmedia/include/pjmedia/flite_port.h +2 -2
- package/src/pjmedia/include/pjmedia/ws_speech_port.h +37 -0
- package/src/pjmedia/src/pjmedia/flite_port.c +16 -11
- package/src/pjmedia/src/pjmedia/ws_speech_port.cpp +377 -0
- package/src/sip.cpp +311 -95
package/src/sip.cpp
CHANGED
|
@@ -21,10 +21,13 @@
|
|
|
21
21
|
#include "idmanager.hpp"
|
|
22
22
|
#include "event_templates.hpp"
|
|
23
23
|
|
|
24
|
+
#include "websock.h"
|
|
25
|
+
|
|
24
26
|
#include "dtmfdet.h"
|
|
25
27
|
#include "fax_port.h"
|
|
26
28
|
#include "flite_port.h"
|
|
27
29
|
#include "pocketsphinx_port.h"
|
|
30
|
+
#include "ws_speech_port.h"
|
|
28
31
|
|
|
29
32
|
#include <ctime>
|
|
30
33
|
|
|
@@ -61,9 +64,14 @@ IdManager g_dialog_ids(IDS_MAX);
|
|
|
61
64
|
#define DEFAULT_CODEC_QUALITY (5)
|
|
62
65
|
|
|
63
66
|
static pjsip_endpoint *g_sip_endpt;
|
|
64
|
-
static pj_caching_pool
|
|
67
|
+
static pj_caching_pool g_cp;
|
|
65
68
|
static pj_pool_t *g_pool;
|
|
66
69
|
static pjmedia_endpt *g_med_endpt;
|
|
70
|
+
static pj_timer_heap_t *g_timer_heap = NULL;
|
|
71
|
+
static pj_websock_endpoint *g_ws_endpt = NULL;
|
|
72
|
+
|
|
73
|
+
#define CERT_FILE "./cert/test.pem"
|
|
74
|
+
#define CERT_KEY "./cert/test.key"
|
|
67
75
|
|
|
68
76
|
// static pj_thread_t *g_thread = NULL;
|
|
69
77
|
// static pj_bool_t g_thread_quit_flag;
|
|
@@ -303,10 +311,15 @@ struct Subscription {
|
|
|
303
311
|
bool initialized;
|
|
304
312
|
};
|
|
305
313
|
|
|
314
|
+
#define IMPLEMENTATION_FLITE 1
|
|
315
|
+
#define IMPLEMENTATION_POCKETSPHINX 2
|
|
316
|
+
#define IMPLEMENTATION_WS_SPEECH 3
|
|
317
|
+
|
|
306
318
|
struct ConfBridgePort {
|
|
307
319
|
unsigned slot;
|
|
308
320
|
pjmedia_port *port;
|
|
309
321
|
short connection_mode;
|
|
322
|
+
short implementation;
|
|
310
323
|
};
|
|
311
324
|
|
|
312
325
|
#define FP_DTMFDET 0
|
|
@@ -624,8 +637,8 @@ bool prepare_dtmfdet(Call *call, AudioEndpoint *ae);
|
|
|
624
637
|
bool prepare_wav_player(Call *call, AudioEndpoint *ae, const char *file, unsigned flags, bool end_of_file_event);
|
|
625
638
|
bool prepare_wav_writer(Call *call, AudioEndpoint *ae, const char *file);
|
|
626
639
|
bool prepare_fax(Call *call, AudioEndpoint *ae, bool is_sender, const char *file, unsigned flags);
|
|
627
|
-
bool
|
|
628
|
-
bool
|
|
640
|
+
bool prepare_speech_synth(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *voice, const char *language, const char *text, int times);
|
|
641
|
+
bool prepare_speech_recog(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *language);
|
|
629
642
|
|
|
630
643
|
void prepare_error_event(ostringstream *oss, char *scope, char *details);
|
|
631
644
|
// void prepare_pjsipcall_error_event(ostringstream *oss, char *scope, char
|
|
@@ -859,6 +872,7 @@ static void on_fax_result(pjmedia_port *port, void *user_data, int result) {
|
|
|
859
872
|
}
|
|
860
873
|
|
|
861
874
|
static void on_end_of_file(pjmedia_port *port, void *user_data) {
|
|
875
|
+
printf("on_end_of_file\n");
|
|
862
876
|
if (g_shutting_down)
|
|
863
877
|
return;
|
|
864
878
|
|
|
@@ -874,19 +888,19 @@ static void on_end_of_file(pjmedia_port *port, void *user_data) {
|
|
|
874
888
|
dispatch_event(evt);
|
|
875
889
|
}
|
|
876
890
|
|
|
877
|
-
static void
|
|
891
|
+
static void on_end_of_speech_synth(pjmedia_port *port, void *user_data) {
|
|
878
892
|
if (g_shutting_down)
|
|
879
893
|
return;
|
|
880
894
|
|
|
881
895
|
long call_id;
|
|
882
896
|
if (!g_call_ids.get_id((long)user_data, call_id)) {
|
|
883
897
|
printf(
|
|
884
|
-
"
|
|
898
|
+
"on_end_of_speech_synth: Failed to get call_id. Event will not be notified.\n");
|
|
885
899
|
return;
|
|
886
900
|
}
|
|
887
901
|
|
|
888
902
|
char evt[1024];
|
|
889
|
-
|
|
903
|
+
make_evt_speech_synth_complete(evt, sizeof(evt), call_id);
|
|
890
904
|
dispatch_event(evt);
|
|
891
905
|
}
|
|
892
906
|
|
|
@@ -903,10 +917,70 @@ static void on_speech_transcript(pjmedia_port*, void *user_data, char* transcrip
|
|
|
903
917
|
}
|
|
904
918
|
|
|
905
919
|
char evt[1024];
|
|
906
|
-
|
|
920
|
+
make_evt_speech(evt, sizeof(evt), call_id, transcript);
|
|
921
|
+
dispatch_event(evt);
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
static void on_ws_speech_event(pjmedia_port*, void *user_data, enum ws_speech_event e, char *data, int len) {
|
|
925
|
+
char evt[2048];
|
|
926
|
+
|
|
927
|
+
if (g_shutting_down)
|
|
928
|
+
return;
|
|
929
|
+
|
|
930
|
+
long call_id;
|
|
931
|
+
if (!g_call_ids.get_id((long)user_data, call_id)) {
|
|
932
|
+
addon_log(
|
|
933
|
+
L_DBG,
|
|
934
|
+
"on_ws_speech_event: Failed to get call_id. Event will not be notified.\n");
|
|
935
|
+
return;
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
if(e == WS_SPEECH_EVENT_TEXT_MSG) {
|
|
939
|
+
rapidjson::Document document;
|
|
940
|
+
|
|
941
|
+
// Parse the JSON string from the buffer with specified length
|
|
942
|
+
if (document.Parse(data, len).HasParseError()) {
|
|
943
|
+
addon_log(
|
|
944
|
+
L_DBG,
|
|
945
|
+
"on_ws_speech_event: Failed to parse JSON string.\n");
|
|
946
|
+
return;
|
|
947
|
+
}
|
|
948
|
+
if (!document.HasMember("evt") || !document["evt"].IsString()) {
|
|
949
|
+
make_evt_ws_speech_event(evt, sizeof(evt), call_id, data, len);
|
|
950
|
+
dispatch_event(evt);
|
|
951
|
+
return;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
if (strcmp(document["evt"].GetString(), "synth_complete") == 0) {
|
|
955
|
+
make_evt_speech_synth_complete(evt, sizeof(evt), call_id);
|
|
956
|
+
dispatch_event(evt);
|
|
957
|
+
return;
|
|
958
|
+
} else if (strcmp(document["evt"].GetString(), "speech") == 0) {
|
|
959
|
+
if (!document.HasMember("data") || !document["data"].IsObject()) {
|
|
960
|
+
make_evt_ws_speech_event(evt, sizeof(evt), call_id, data, len);
|
|
961
|
+
dispatch_event(evt);
|
|
962
|
+
return;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
const rapidjson::Value& evt_data = document["data"];
|
|
966
|
+
|
|
967
|
+
if (!evt_data.HasMember("transcript") || !evt_data["transcript"].IsString()) {
|
|
968
|
+
make_evt_ws_speech_event(evt, sizeof(evt), call_id, data, len);
|
|
969
|
+
dispatch_event(evt);
|
|
970
|
+
return;
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
make_evt_speech(evt, sizeof(evt), call_id, (char*)evt_data["transcript"].GetString());
|
|
974
|
+
dispatch_event(evt);
|
|
975
|
+
return;
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
make_evt_ws_speech_event(evt, sizeof(evt), call_id, data, len);
|
|
907
980
|
dispatch_event(evt);
|
|
908
981
|
}
|
|
909
982
|
|
|
983
|
+
|
|
910
984
|
void dispatch_event(const char *evt) {
|
|
911
985
|
addon_log(L_DBG, "dispach_event called with evt=%s\n", evt);
|
|
912
986
|
// g_event_sink(evt);
|
|
@@ -1233,27 +1307,37 @@ int __pjw_init() {
|
|
|
1233
1307
|
return 1;
|
|
1234
1308
|
}
|
|
1235
1309
|
|
|
1310
|
+
unsigned log_decor = pj_log_get_decor();
|
|
1311
|
+
log_decor |= PJ_LOG_HAS_LEVEL_TEXT;
|
|
1312
|
+
log_decor |= PJ_LOG_HAS_SENDER;
|
|
1313
|
+
pj_log_set_decor(log_decor);
|
|
1314
|
+
|
|
1315
|
+
|
|
1236
1316
|
status = pjlib_util_init();
|
|
1237
1317
|
if (status != PJ_SUCCESS) {
|
|
1238
1318
|
addon_log(L_DBG, "pj_lib_util_init failed\n");
|
|
1239
1319
|
return 1;
|
|
1240
1320
|
}
|
|
1241
1321
|
|
|
1322
|
+
pj_time_val now;
|
|
1323
|
+
pj_gettimeofday(&now);
|
|
1324
|
+
pj_srand((unsigned)now.sec);
|
|
1325
|
+
|
|
1242
1326
|
pthread_mutex_init(&g_mutex, NULL);
|
|
1243
1327
|
|
|
1244
1328
|
pj_log_set_level(0);
|
|
1245
1329
|
|
|
1246
|
-
pj_caching_pool_init(&
|
|
1330
|
+
pj_caching_pool_init(&g_cp, &pj_pool_factory_default_policy, 0);
|
|
1247
1331
|
|
|
1248
1332
|
char *sip_endpt_name = (char *)"mysip";
|
|
1249
1333
|
|
|
1250
|
-
status = pjsip_endpt_create(&
|
|
1334
|
+
status = pjsip_endpt_create(&g_cp.factory, sip_endpt_name, &g_sip_endpt);
|
|
1251
1335
|
if (status != PJ_SUCCESS) {
|
|
1252
1336
|
addon_log(L_DBG, "pjsip_endpt_create failed\n");
|
|
1253
1337
|
return 1;
|
|
1254
1338
|
}
|
|
1255
1339
|
|
|
1256
|
-
g_pool = pj_pool_create(&
|
|
1340
|
+
g_pool = pj_pool_create(&g_cp.factory, "tester", 1000, 1000, NULL);
|
|
1257
1341
|
|
|
1258
1342
|
/* Create event manager */
|
|
1259
1343
|
status = pjmedia_event_mgr_create(g_pool, 0, NULL);
|
|
@@ -1352,10 +1436,10 @@ int __pjw_init() {
|
|
|
1352
1436
|
return 1;
|
|
1353
1437
|
}
|
|
1354
1438
|
#if PJ_HAS_THREADS
|
|
1355
|
-
status = pjmedia_endpt_create2(&
|
|
1439
|
+
status = pjmedia_endpt_create2(&g_cp.factory, NULL, 1, &g_med_endpt);
|
|
1356
1440
|
#else
|
|
1357
1441
|
status = pjmedia_endpt_create2(
|
|
1358
|
-
&
|
|
1442
|
+
&g_cp.factory, pjsip_endpt_get_ioqueue(g_sip_endpt), 0, &g_med_endpt);
|
|
1359
1443
|
#endif
|
|
1360
1444
|
if (status != PJ_SUCCESS) {
|
|
1361
1445
|
addon_log(L_DBG, "pjmedia_endpt_create failed\n");
|
|
@@ -1448,6 +1532,33 @@ int __pjw_init() {
|
|
|
1448
1532
|
return 1;
|
|
1449
1533
|
}
|
|
1450
1534
|
|
|
1535
|
+
status = pj_timer_heap_create(g_pool, 128, &g_timer_heap);
|
|
1536
|
+
if (status != PJ_SUCCESS) {
|
|
1537
|
+
addon_log(L_DBG, "create timer heap error");
|
|
1538
|
+
return 1;
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
|
|
1542
|
+
pj_websock_ssl_cert cert;
|
|
1543
|
+
pj_bzero(&cert, sizeof(cert));
|
|
1544
|
+
cert.ca_file = pj_str(CERT_FILE);
|
|
1545
|
+
cert.cert_file = pj_str(CERT_FILE);
|
|
1546
|
+
cert.private_file = pj_str(CERT_KEY);
|
|
1547
|
+
|
|
1548
|
+
pj_websock_endpt_cfg opt;
|
|
1549
|
+
pj_websock_endpt_cfg_default(&opt);
|
|
1550
|
+
opt.pf = &g_cp.factory;
|
|
1551
|
+
opt.ioq = pjsip_endpt_get_ioqueue(g_sip_endpt);
|
|
1552
|
+
opt.timer_heap = g_timer_heap;
|
|
1553
|
+
opt.cert = &cert;
|
|
1554
|
+
opt.async_cnt = 3;
|
|
1555
|
+
|
|
1556
|
+
status = pj_websock_endpt_create(&opt, &g_ws_endpt);
|
|
1557
|
+
if (status != PJ_SUCCESS) {
|
|
1558
|
+
addon_log(L_DBG, "create websock endpoint error");
|
|
1559
|
+
return 1;
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1451
1562
|
return 0;
|
|
1452
1563
|
}
|
|
1453
1564
|
|
|
@@ -3815,7 +3926,7 @@ out:
|
|
|
3815
3926
|
return 0;
|
|
3816
3927
|
}
|
|
3817
3928
|
|
|
3818
|
-
pj_status_t audio_endpoint_start_speech_synth(Call *call, AudioEndpoint *ae, const char * voice, const char *
|
|
3929
|
+
pj_status_t audio_endpoint_start_speech_synth(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *voice, const char *language, const char *text, int times) {
|
|
3819
3930
|
pj_status_t status;
|
|
3820
3931
|
|
|
3821
3932
|
if(!ae->stream_cbp.port) {
|
|
@@ -3829,12 +3940,10 @@ pj_status_t audio_endpoint_start_speech_synth(Call *call, AudioEndpoint *ae, con
|
|
|
3829
3940
|
return -1;
|
|
3830
3941
|
}
|
|
3831
3942
|
|
|
3832
|
-
if (!
|
|
3943
|
+
if (!prepare_speech_synth(call, ae, server_url, engine, voice, language, text, times)) {
|
|
3833
3944
|
return -1;
|
|
3834
3945
|
}
|
|
3835
3946
|
|
|
3836
|
-
pjmedia_flite_port_speak(ae->feature_cbps[FP_SPEECH_SYNTH].port, text, flags);
|
|
3837
|
-
|
|
3838
3947
|
return PJ_SUCCESS;
|
|
3839
3948
|
}
|
|
3840
3949
|
|
|
@@ -3854,21 +3963,23 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3854
3963
|
|
|
3855
3964
|
int media_id = -1;
|
|
3856
3965
|
|
|
3857
|
-
char *
|
|
3966
|
+
char *server_url = NULL;
|
|
3858
3967
|
|
|
3859
|
-
char *
|
|
3968
|
+
char *engine = NULL;
|
|
3860
3969
|
|
|
3861
|
-
|
|
3970
|
+
char *voice = NULL;
|
|
3862
3971
|
|
|
3863
|
-
|
|
3972
|
+
char *language = NULL;
|
|
3864
3973
|
|
|
3865
|
-
|
|
3974
|
+
char *text;
|
|
3975
|
+
|
|
3976
|
+
int times = 1;
|
|
3866
3977
|
|
|
3867
3978
|
char buffer[MAX_JSON_INPUT];
|
|
3868
3979
|
|
|
3869
3980
|
Document document;
|
|
3870
3981
|
|
|
3871
|
-
const char *valid_params[] = {"voice", "
|
|
3982
|
+
const char *valid_params[] = {"server_url", "engine", "voice", "language", "text", "times", "media_id", ""};
|
|
3872
3983
|
|
|
3873
3984
|
if (!g_call_ids.get(call_id, val)) {
|
|
3874
3985
|
set_error("Invalid call_id");
|
|
@@ -3891,6 +4002,14 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3891
4002
|
goto out;
|
|
3892
4003
|
}
|
|
3893
4004
|
|
|
4005
|
+
if (json_get_string_param(document, "server_url", true, &server_url) <= 0) {
|
|
4006
|
+
goto out;
|
|
4007
|
+
}
|
|
4008
|
+
|
|
4009
|
+
if (json_get_string_param(document, "engine", true, &engine) <= 0) {
|
|
4010
|
+
goto out;
|
|
4011
|
+
}
|
|
4012
|
+
|
|
3894
4013
|
if (json_get_string_param(document, "voice", false, &voice) <= 0) {
|
|
3895
4014
|
goto out;
|
|
3896
4015
|
}
|
|
@@ -3900,6 +4019,10 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3900
4019
|
goto out;
|
|
3901
4020
|
}
|
|
3902
4021
|
|
|
4022
|
+
if (json_get_string_param(document, "language", true, &language) <= 0) {
|
|
4023
|
+
goto out;
|
|
4024
|
+
}
|
|
4025
|
+
|
|
3903
4026
|
if (json_get_string_param(document, "text", false, &text) <= 0) {
|
|
3904
4027
|
goto out;
|
|
3905
4028
|
}
|
|
@@ -3909,21 +4032,31 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3909
4032
|
goto out;
|
|
3910
4033
|
}
|
|
3911
4034
|
|
|
3912
|
-
if (
|
|
4035
|
+
if (json_get_int_param(document, "times", true, ×) <= 0) {
|
|
3913
4036
|
goto out;
|
|
3914
4037
|
}
|
|
3915
4038
|
|
|
3916
|
-
|
|
4039
|
+
res = json_get_int_param(document, "media_id", true, &media_id);
|
|
4040
|
+
if (res <= 0) {
|
|
3917
4041
|
goto out;
|
|
3918
4042
|
}
|
|
3919
4043
|
|
|
3920
|
-
if(
|
|
3921
|
-
|
|
3922
|
-
|
|
4044
|
+
if(server_url || engine || language) {
|
|
4045
|
+
// If any is set then all must be set and must be non-empty string (required by ws_speech_port)
|
|
4046
|
+
if(!server_url || !server_url[0]) {
|
|
4047
|
+
set_error("server_url must be present and cannot be empty string");
|
|
4048
|
+
goto out;
|
|
4049
|
+
}
|
|
3923
4050
|
|
|
3924
|
-
|
|
3925
|
-
|
|
3926
|
-
|
|
4051
|
+
if(!engine || !engine[0]) {
|
|
4052
|
+
set_error("engine must be present and cannot be empty string");
|
|
4053
|
+
goto out;
|
|
4054
|
+
}
|
|
4055
|
+
|
|
4056
|
+
if(!language || !language[0]) {
|
|
4057
|
+
set_error("language must be present and cannot be empty string");
|
|
4058
|
+
goto out;
|
|
4059
|
+
}
|
|
3927
4060
|
}
|
|
3928
4061
|
|
|
3929
4062
|
if (NOT_FOUND_OPTIONAL == res) {
|
|
@@ -3932,7 +4065,7 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3932
4065
|
MediaEndpoint *me = (MediaEndpoint *)call->media[i];
|
|
3933
4066
|
if (me->type == ENDPOINT_TYPE_AUDIO) {
|
|
3934
4067
|
AudioEndpoint *ae = (AudioEndpoint *)me->endpoint.audio;
|
|
3935
|
-
status = audio_endpoint_start_speech_synth(call, ae, voice,
|
|
4068
|
+
status = audio_endpoint_start_speech_synth(call, ae, server_url, engine, voice, language, text, times);
|
|
3936
4069
|
if (status != PJ_SUCCESS) goto out;
|
|
3937
4070
|
}
|
|
3938
4071
|
}
|
|
@@ -3950,7 +4083,7 @@ int pjw_call_start_speech_synth(long call_id, const char *json) {
|
|
|
3950
4083
|
|
|
3951
4084
|
ae = (AudioEndpoint *)me->endpoint.audio;
|
|
3952
4085
|
|
|
3953
|
-
audio_endpoint_start_speech_synth(call, ae, voice,
|
|
4086
|
+
audio_endpoint_start_speech_synth(call, ae, server_url, engine, voice, language, text, times);
|
|
3954
4087
|
}
|
|
3955
4088
|
|
|
3956
4089
|
out:
|
|
@@ -3962,7 +4095,7 @@ out:
|
|
|
3962
4095
|
return 0;
|
|
3963
4096
|
}
|
|
3964
4097
|
|
|
3965
|
-
pj_status_t audio_endpoint_start_speech_recog(Call *call, AudioEndpoint *ae) {
|
|
4098
|
+
pj_status_t audio_endpoint_start_speech_recog(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *language) {
|
|
3966
4099
|
pj_status_t status;
|
|
3967
4100
|
|
|
3968
4101
|
if(!ae->stream_cbp.port) {
|
|
@@ -3976,7 +4109,7 @@ pj_status_t audio_endpoint_start_speech_recog(Call *call, AudioEndpoint *ae) {
|
|
|
3976
4109
|
return -1;
|
|
3977
4110
|
}
|
|
3978
4111
|
|
|
3979
|
-
if (!
|
|
4112
|
+
if (!prepare_speech_recog(call, ae, server_url, engine, language)) {
|
|
3980
4113
|
return -1;
|
|
3981
4114
|
}
|
|
3982
4115
|
|
|
@@ -3999,21 +4132,17 @@ int pjw_call_start_speech_recog(long call_id, const char *json) {
|
|
|
3999
4132
|
|
|
4000
4133
|
int media_id = -1;
|
|
4001
4134
|
|
|
4002
|
-
char *
|
|
4135
|
+
char *server_url = NULL;
|
|
4003
4136
|
|
|
4004
|
-
char *
|
|
4005
|
-
|
|
4006
|
-
bool end_of_speech_event = false;
|
|
4007
|
-
|
|
4008
|
-
unsigned flags = 0;
|
|
4137
|
+
char *engine = NULL;
|
|
4009
4138
|
|
|
4010
|
-
|
|
4139
|
+
char *language = NULL;
|
|
4011
4140
|
|
|
4012
4141
|
char buffer[MAX_JSON_INPUT];
|
|
4013
4142
|
|
|
4014
4143
|
Document document;
|
|
4015
4144
|
|
|
4016
|
-
const char *valid_params[] = {"media_id", ""};
|
|
4145
|
+
const char *valid_params[] = {"server_url", "engine", "language", "media_id", ""};
|
|
4017
4146
|
|
|
4018
4147
|
if (!g_call_ids.get(call_id, val)) {
|
|
4019
4148
|
set_error("Invalid call_id");
|
|
@@ -4036,18 +4165,48 @@ int pjw_call_start_speech_recog(long call_id, const char *json) {
|
|
|
4036
4165
|
goto out;
|
|
4037
4166
|
}
|
|
4038
4167
|
|
|
4168
|
+
if (json_get_string_param(document, "server_url", true, &server_url) <= 0) {
|
|
4169
|
+
goto out;
|
|
4170
|
+
}
|
|
4171
|
+
|
|
4172
|
+
if (json_get_string_param(document, "engine", true, &engine) <= 0) {
|
|
4173
|
+
goto out;
|
|
4174
|
+
}
|
|
4175
|
+
|
|
4176
|
+
if (json_get_string_param(document, "language", true, &language) <= 0) {
|
|
4177
|
+
goto out;
|
|
4178
|
+
}
|
|
4179
|
+
|
|
4039
4180
|
res = json_get_int_param(document, "media_id", true, &media_id);
|
|
4040
4181
|
if (res <= 0) {
|
|
4041
4182
|
goto out;
|
|
4042
4183
|
}
|
|
4043
4184
|
|
|
4185
|
+
if(server_url || engine || language) {
|
|
4186
|
+
// If any is set then all must be set and must be non-empty string (required by ws_speech_port)
|
|
4187
|
+
if(!server_url || !server_url[0]) {
|
|
4188
|
+
set_error("server_url must be present and cannot be empty string");
|
|
4189
|
+
goto out;
|
|
4190
|
+
}
|
|
4191
|
+
|
|
4192
|
+
if(!engine || !engine[0]) {
|
|
4193
|
+
set_error("engine must be present and cannot be empty string");
|
|
4194
|
+
goto out;
|
|
4195
|
+
}
|
|
4196
|
+
|
|
4197
|
+
if(!language || !language[0]) {
|
|
4198
|
+
set_error("language must be present and cannot be empty string");
|
|
4199
|
+
goto out;
|
|
4200
|
+
}
|
|
4201
|
+
}
|
|
4202
|
+
|
|
4044
4203
|
if (NOT_FOUND_OPTIONAL == res) {
|
|
4045
4204
|
// start on all audio media endpoints
|
|
4046
4205
|
for (int i = 0; i < call->media_count; i++) {
|
|
4047
4206
|
MediaEndpoint *me = (MediaEndpoint *)call->media[i];
|
|
4048
4207
|
if (me->type == ENDPOINT_TYPE_AUDIO) {
|
|
4049
4208
|
AudioEndpoint *ae = (AudioEndpoint *)me->endpoint.audio;
|
|
4050
|
-
status = audio_endpoint_start_speech_recog(call, ae);
|
|
4209
|
+
status = audio_endpoint_start_speech_recog(call, ae, server_url, engine, language);
|
|
4051
4210
|
if (status != PJ_SUCCESS) goto out;
|
|
4052
4211
|
}
|
|
4053
4212
|
}
|
|
@@ -4065,7 +4224,7 @@ int pjw_call_start_speech_recog(long call_id, const char *json) {
|
|
|
4065
4224
|
|
|
4066
4225
|
ae = (AudioEndpoint *)me->endpoint.audio;
|
|
4067
4226
|
|
|
4068
|
-
audio_endpoint_start_speech_recog(call, ae);
|
|
4227
|
+
audio_endpoint_start_speech_recog(call, ae, server_url, engine, language);
|
|
4069
4228
|
}
|
|
4070
4229
|
|
|
4071
4230
|
out:
|
|
@@ -5060,19 +5219,6 @@ static void on_media_update(pjsip_inv_session *inv, pj_status_t status) {
|
|
|
5060
5219
|
static void on_state_changed(pjsip_inv_session *inv, pjsip_event *e) {
|
|
5061
5220
|
addon_log(L_DBG, "on_state_changed\n");
|
|
5062
5221
|
|
|
5063
|
-
// The below is just to document know-how for future improvements
|
|
5064
|
-
/*
|
|
5065
|
-
addon_log(L_DBG, "on_state_changed e->type=%i\n", e->type);
|
|
5066
|
-
if(e->type == PJSIP_EVENT_TSX_STATE && e->body.tsx_state.type ==
|
|
5067
|
-
PJSIP_EVENT_RX_MSG) {
|
|
5068
|
-
// Read http://trac.pjsip.org/repos/wiki/SIP_Message_Buffer_Event
|
|
5069
|
-
addon_log(L_DBG, "Msg=%s\n",
|
|
5070
|
-
e->body.tsx_state.src.rdata->msg_info.msg_buf);
|
|
5071
|
-
}
|
|
5072
|
-
*/
|
|
5073
|
-
|
|
5074
|
-
printf("e->type=%d\n", e->type);
|
|
5075
|
-
|
|
5076
5222
|
/*
|
|
5077
5223
|
pj_str_t *method_name = &rdata->msg_info.msg->line.req.method.name;
|
|
5078
5224
|
addon_log(L_DBG, "on_rx_request %.*s\n", method_name->slen,
|
|
@@ -5130,7 +5276,7 @@ static void on_state_changed(pjsip_inv_session *inv, pjsip_event *e) {
|
|
|
5130
5276
|
char evt[2048];
|
|
5131
5277
|
int sip_msg_len = 0;
|
|
5132
5278
|
char *sip_msg = (char *)"";
|
|
5133
|
-
if
|
|
5279
|
+
if(e->type == PJSIP_EVENT_TSX_STATE && e->body.tsx_state.type == PJSIP_EVENT_RX_MSG) {
|
|
5134
5280
|
sip_msg_len = e->body.rx_msg.rdata->msg_info.len;
|
|
5135
5281
|
sip_msg = e->body.rx_msg.rdata->msg_info.msg_buf;
|
|
5136
5282
|
}
|
|
@@ -6705,8 +6851,6 @@ bool is_media_active(Call *call, MediaEndpoint *me) {
|
|
|
6705
6851
|
void close_media_endpoint(Call *call, MediaEndpoint *me) {
|
|
6706
6852
|
printf("close_media_endpoint %p\n", (void*)me);
|
|
6707
6853
|
|
|
6708
|
-
pj_status_t status;
|
|
6709
|
-
|
|
6710
6854
|
if(!me) return;
|
|
6711
6855
|
|
|
6712
6856
|
if (ENDPOINT_TYPE_AUDIO == me->type) {
|
|
@@ -6901,7 +7045,7 @@ bool prepare_fax(Call *call, AudioEndpoint *ae, bool is_sender, const char *file
|
|
|
6901
7045
|
return connect_feature_port_to_stream_port(call, ae, fp);
|
|
6902
7046
|
}
|
|
6903
7047
|
|
|
6904
|
-
bool
|
|
7048
|
+
bool prepare_speech_synth(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *voice, const char *language, const char *text, int times) {
|
|
6905
7049
|
pj_status_t status;
|
|
6906
7050
|
|
|
6907
7051
|
ConfBridgePort *fp = &ae->feature_cbps[FP_SPEECH_SYNTH];
|
|
@@ -6911,38 +7055,85 @@ bool prepare_flite(Call *call, AudioEndpoint *ae, const char *voice, bool end_of
|
|
|
6911
7055
|
return true;
|
|
6912
7056
|
}
|
|
6913
7057
|
|
|
6914
|
-
|
|
6915
|
-
|
|
6916
|
-
|
|
6917
|
-
|
|
6918
|
-
|
|
6919
|
-
|
|
6920
|
-
|
|
6921
|
-
|
|
6922
|
-
|
|
6923
|
-
|
|
6924
|
-
|
|
7058
|
+
if(!server_url) {
|
|
7059
|
+
status = pjmedia_flite_port_create(
|
|
7060
|
+
call->inv->pool,
|
|
7061
|
+
PJMEDIA_PIA_SRATE(&ae->stream_cbp.port->info),
|
|
7062
|
+
PJMEDIA_PIA_CCNT(&ae->stream_cbp.port->info),
|
|
7063
|
+
PJMEDIA_PIA_SPF(&ae->stream_cbp.port->info),
|
|
7064
|
+
PJMEDIA_PIA_BITS(&ae->stream_cbp.port->info),
|
|
7065
|
+
voice,
|
|
7066
|
+
&fp->port);
|
|
7067
|
+
if (status != PJ_SUCCESS) {
|
|
7068
|
+
set_error("pjmedia_flite_port_create failed");
|
|
7069
|
+
return false;
|
|
7070
|
+
}
|
|
6925
7071
|
|
|
6926
|
-
|
|
6927
|
-
status = pjmedia_flite_port_set_eof_cb(fp->port, (void*)call, on_end_of_speech);
|
|
7072
|
+
status = pjmedia_flite_port_set_eof_cb(fp->port, (void*)call, on_end_of_speech_synth);
|
|
6928
7073
|
if (status != PJ_SUCCESS) {
|
|
6929
7074
|
set_error("pjmedia_flite_port_set_eof_cb failed");
|
|
6930
7075
|
return false;
|
|
6931
7076
|
}
|
|
6932
|
-
}
|
|
6933
7077
|
|
|
6934
|
-
|
|
6935
|
-
|
|
6936
|
-
|
|
6937
|
-
|
|
6938
|
-
|
|
7078
|
+
status = pjmedia_conf_add_port(ae->conf, call->inv->pool, fp->port, NULL, &fp->slot);
|
|
7079
|
+
if (status != PJ_SUCCESS) {
|
|
7080
|
+
set_error("pjmedia_conf_add_port failed");
|
|
7081
|
+
return false;
|
|
7082
|
+
}
|
|
6939
7083
|
|
|
6940
|
-
|
|
7084
|
+
fp->connection_mode = CONNECTION_MODE_SOURCE;
|
|
6941
7085
|
|
|
6942
|
-
|
|
7086
|
+
if(!connect_feature_port_to_stream_port(call, ae, fp)) {
|
|
7087
|
+
return false;
|
|
7088
|
+
}
|
|
7089
|
+
|
|
7090
|
+
printf("calling pjmedia_flite_port_speak\n");
|
|
7091
|
+
pjmedia_flite_port_speak(ae->feature_cbps[FP_SPEECH_SYNTH].port, text, times);
|
|
7092
|
+
|
|
7093
|
+
ae->feature_cbps[FP_SPEECH_SYNTH].implementation = IMPLEMENTATION_FLITE;
|
|
7094
|
+
} else {
|
|
7095
|
+
status = pjmedia_ws_speech_port_create(
|
|
7096
|
+
call->inv->pool,
|
|
7097
|
+
PJMEDIA_PIA_SRATE(&ae->stream_cbp.port->info),
|
|
7098
|
+
PJMEDIA_PIA_CCNT(&ae->stream_cbp.port->info),
|
|
7099
|
+
PJMEDIA_PIA_SPF(&ae->stream_cbp.port->info),
|
|
7100
|
+
PJMEDIA_PIA_BITS(&ae->stream_cbp.port->info),
|
|
7101
|
+
g_ws_endpt,
|
|
7102
|
+
server_url,
|
|
7103
|
+
engine,
|
|
7104
|
+
voice,
|
|
7105
|
+
language,
|
|
7106
|
+
text,
|
|
7107
|
+
times,
|
|
7108
|
+
NULL,
|
|
7109
|
+
NULL,
|
|
7110
|
+
on_ws_speech_event,
|
|
7111
|
+
call,
|
|
7112
|
+
&fp->port);
|
|
7113
|
+
if (status != PJ_SUCCESS) {
|
|
7114
|
+
set_error("pjmedia_ws_speech_port_create for synth failed");
|
|
7115
|
+
return false;
|
|
7116
|
+
}
|
|
7117
|
+
|
|
7118
|
+
status = pjmedia_conf_add_port(ae->conf, call->inv->pool, fp->port, NULL, &fp->slot);
|
|
7119
|
+
if (status != PJ_SUCCESS) {
|
|
7120
|
+
set_error("pjmedia_conf_add_port failed");
|
|
7121
|
+
return false;
|
|
7122
|
+
}
|
|
7123
|
+
|
|
7124
|
+
fp->connection_mode = CONNECTION_MODE_SOURCE;
|
|
7125
|
+
|
|
7126
|
+
if(!connect_feature_port_to_stream_port(call, ae, fp)) {
|
|
7127
|
+
return false;
|
|
7128
|
+
}
|
|
7129
|
+
|
|
7130
|
+
ae->feature_cbps[FP_SPEECH_SYNTH].implementation = IMPLEMENTATION_WS_SPEECH;
|
|
7131
|
+
}
|
|
7132
|
+
|
|
7133
|
+
return PJ_SUCCESS;
|
|
6943
7134
|
}
|
|
6944
7135
|
|
|
6945
|
-
bool
|
|
7136
|
+
bool prepare_speech_recog(Call *call, AudioEndpoint *ae, const char *server_url, const char *engine, const char *language) {
|
|
6946
7137
|
pj_status_t status;
|
|
6947
7138
|
|
|
6948
7139
|
ConfBridgePort *fp = &ae->feature_cbps[FP_SPEECH_RECOG];
|
|
@@ -6952,17 +7143,42 @@ bool prepare_pocketsphinx(Call *call, AudioEndpoint *ae) {
|
|
|
6952
7143
|
return true;
|
|
6953
7144
|
}
|
|
6954
7145
|
|
|
6955
|
-
|
|
6956
|
-
|
|
6957
|
-
|
|
6958
|
-
|
|
6959
|
-
|
|
6960
|
-
|
|
6961
|
-
|
|
6962
|
-
|
|
6963
|
-
|
|
6964
|
-
|
|
6965
|
-
|
|
7146
|
+
if(!server_url) {
|
|
7147
|
+
status = pjmedia_pocketsphinx_port_create(
|
|
7148
|
+
call->inv->pool, PJMEDIA_PIA_SRATE(&ae->stream_cbp.port->info),
|
|
7149
|
+
PJMEDIA_PIA_CCNT(&ae->stream_cbp.port->info),
|
|
7150
|
+
PJMEDIA_PIA_SPF(&ae->stream_cbp.port->info),
|
|
7151
|
+
PJMEDIA_PIA_BITS(&ae->stream_cbp.port->info),
|
|
7152
|
+
on_speech_transcript,
|
|
7153
|
+
call,
|
|
7154
|
+
&fp->port);
|
|
7155
|
+
if (status != PJ_SUCCESS) {
|
|
7156
|
+
set_error("pjmedia_pocketsphinx_port_create failed");
|
|
7157
|
+
return false;
|
|
7158
|
+
}
|
|
7159
|
+
} else {
|
|
7160
|
+
status = pjmedia_ws_speech_port_create(
|
|
7161
|
+
call->inv->pool,
|
|
7162
|
+
PJMEDIA_PIA_SRATE(&ae->stream_cbp.port->info),
|
|
7163
|
+
PJMEDIA_PIA_CCNT(&ae->stream_cbp.port->info),
|
|
7164
|
+
PJMEDIA_PIA_SPF(&ae->stream_cbp.port->info),
|
|
7165
|
+
PJMEDIA_PIA_BITS(&ae->stream_cbp.port->info),
|
|
7166
|
+
g_ws_endpt,
|
|
7167
|
+
server_url,
|
|
7168
|
+
NULL,
|
|
7169
|
+
NULL,
|
|
7170
|
+
NULL,
|
|
7171
|
+
NULL,
|
|
7172
|
+
NULL,
|
|
7173
|
+
engine,
|
|
7174
|
+
language,
|
|
7175
|
+
on_ws_speech_event,
|
|
7176
|
+
call,
|
|
7177
|
+
&fp->port);
|
|
7178
|
+
if (status != PJ_SUCCESS) {
|
|
7179
|
+
set_error("pjmedia_ws_speech_port_create for recog failed");
|
|
7180
|
+
return false;
|
|
7181
|
+
}
|
|
6966
7182
|
}
|
|
6967
7183
|
|
|
6968
7184
|
status = pjmedia_conf_add_port(ae->conf, call->inv->pool, fp->port, NULL, &fp->slot);
|
|
@@ -8417,7 +8633,7 @@ static int digit_buffer_thread(void *arg) {
|
|
|
8417
8633
|
bool start_digit_buffer_thread() {
|
|
8418
8634
|
pj_status_t status;
|
|
8419
8635
|
pj_pool_t *pool =
|
|
8420
|
-
pj_pool_create(&
|
|
8636
|
+
pj_pool_create(&g_cp.factory, "digit_buffer_checker", 1000, 1000, NULL);
|
|
8421
8637
|
pj_thread_t *t;
|
|
8422
8638
|
status = pj_thread_create(pool, "digit_buffer_checker", &digit_buffer_thread,
|
|
8423
8639
|
NULL, 0, 0, &t);
|