pelican-nlp 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3787 @@
1
+ # prosomain.praat -- include file for prosogram
2
+ # This file isn't a stand-alone script; it is included by the main script "prosogram.praat".
3
+ # For documentation, user's guide and tutorial, see http://sites.google.com/site/prosogram/
4
+ # Author: Piet Mertens
5
+ # Requires Praat 6.0.43 or higher.
6
+ # Last modified on 2021-12-17
7
+
8
+
9
+ # For Postscript graphics, modify the next line to match the path to Ghostscript on your computer.
10
+ path_ghostscript$ = "c:\Program Files\gs\gs9.21\bin\gswin64c.exe" ; typical path on Windows-10 64 bit
11
+ # For GIF graphics, modify the next line to match the path to nconvert on your computer.
12
+ path_nconvert$ = "c:\Program Files\XnView\nconvert" ; only used for creating GIF files
13
+
14
+ # Don't change the following lines:
15
+ path_ghostscript$ = replace$ (path_ghostscript$, "\Program Files\", "\Progra~1\", 1)
16
+ path_nconvert$ = replace$ (path_nconvert$, "\Program Files\", "\Progra~1\", 1)
17
+
18
+ # Accepted filename extensions for sound files:
19
+ filename_extensions_sound$ = ":wav:aiff:aifc:nist:flac:sound:s16:mp3:"
20
+
21
+
22
+ # Procedure hierarchy
23
+ # main main procedure, in both modes (script form or batch mode)
24
+ # initialization_main
25
+ # process_form process fields in script form (in script mode, not in batch mode)
26
+ # process_multiple_input_files all processing in all tasks, for 1 or more input speech files
27
+ # task_flags set flags depending on task
28
+ # map_segmentation_type mapping between segmentation name and tier requirements
29
+ # initialization_multiple_files
30
+ # process_one_input_file
31
+ # initialization_per_file
32
+ # construct_filenames rule-based filenames
33
+ # read_annotation_file read annotation TextGrid from disk
34
+ # read_parameter_files read parameters from file
35
+ # calculate_parameters calculate parameters
36
+ # calculate_pitch_int calculate pitch (internal command, not batch command)
37
+ # calculate_intensity_bp_filtered
38
+ # corpus_conversion apply corpus conventions: tiernames, tier content, etc.
39
+ # prepare_plotted_textgrid create textgrid used in prosogram drawing
40
+ # load_intermediate_data_files
41
+ # get_segmentation read or calculate segmentation (phonetic alignment, etc.)
42
+ # read_nuclei_file_interactive_mode read internal TextGrid from earlier run
43
+ # copy_tiers_from_annotation_tg
44
+ # make_segmentation calculate segmentation
45
+ # safe_nuclei check boundaries of nuclei: unvoiced fragments, octave jumps, etc.
46
+ # create_table_of_nuclei create table of nuclei with prosodic features
47
+ # initialize_nucldat initialize some columns in table of nuclei
48
+ # speaker_info_get list all speakers in annotation and add speaker id to nucldat table
49
+ # stylize_nuclei pitch contour stylization
50
+ # speakers_prosodic_parms calculate prosodic paramters of each speaker
51
+ # pitchrange_normalized_pitch
52
+ # prosodic_profile
53
+ # prosodic_profile_new
54
+ # detect_hesitations detect hesitations from phonetic labels, acoustic prosodic features or words
55
+ # store_stylization
56
+ # speaker_autorange
57
+ # gr_start_picturewin
58
+ # gr_write_all_prosograms
59
+ # cleanup_current_file
60
+ # cleanup_global
61
+ # store_features write spreadsheet with prosodic features for syllables
62
+ #
63
+ # Commands in batch mode:
64
+ # prosogram (batch mode) calculate prosogram
65
+ # prosogram_variants variants for polytonia, boundary detection, stress detection, etc.
66
+ # polytonia (batch mode) calculate tonal annotation
67
+ # segmentation (batch mode) calculate automatic segmentation
68
+ # calculate_pitch (batch mode) calculate pitch (two-pass algorithm)
69
+
70
+ version$ = "Prosogram v3.01"
71
+ ; @logging: "reset debug timed", "_log.txt"
72
+
73
+
74
+ corpus$ = "" ; "" = use default corpus conventions;
75
+ ; Known corpora: rhapsodie, cprom, project_frfc, RUHRCAT, Prsir, gvlex, Kotsifas_2015
76
+
77
+
78
+ include prosoplot.praat
79
+ include segment.praat
80
+ include stylize.praat
81
+ include polytonia.praat
82
+ include util.praat
83
+
84
+ ;if (boundary_annotation)
85
+ ; include rules.praat
86
+ ; include prominence.praat
87
+ ;include duration.praat
88
+ ;include elasticity.praat
89
+ ;endif
90
+
91
+
92
+
93
+ ; types of tasks
94
+ task_prosogram = 4 ; stylization, prosodic features, prosodic profile, Polytonia
95
+ task_interactive = 5
96
+ task_calc_pitch = 1
97
+ task_calc_loudness = 10
98
+ task_calc_bp_intensity = 13
99
+ task_pitch_plot = 2
100
+ task_segmentation = 3 ; automatic segmentation into syllabic nuclei without phonetic alignment annotation
101
+ task_annotation = 6 ; Draw annotation only
102
+ task_validate_tiers = 12
103
+ task_autoanno = 9 ; only when using Praat form, not in batch mode
104
+
105
+ ; types of segmentation
106
+ segm_vnucl = 1
107
+ segm_extern = 2
108
+ segm_aloudness = 3
109
+ segm_anucl = 4
110
+ segm_asyll = 5
111
+ segm_msyllvow = 6
112
+ segm_msyllpeak = 7
113
+ segm_mrhyme = 8 ; syllable rhyme using phoneme and syllable annotation
114
+ segm_voiced = 9 ; voiced portions
115
+ segm_specsim = 10 ; spectral similarity
116
+ segm_pitchchange = 11
117
+ segm_pitchterrace = 12
118
+
119
+
120
+ if (not variableExists ("font"))
121
+ font_family$ = "Times" ; Font used in plot; else use "Helvetica"
122
+ fontsize = 10
123
+ else
124
+ font_family$ = extractWord$(font$, "")
125
+ fontsize = extractNumber(font$, " ")
126
+ endif
127
+
128
+ if (not variableExists ("batch_mode")) ; prosomain script started directly, not from prosogram.praat
129
+ batch_mode = 1
130
+ # variables initialized before batch functions
131
+ output_filename$ = ""
132
+ clip_level = 0
133
+ scale_signal_amplitude = 0
134
+ minimum_pitch = 0
135
+ maximum_pitch = 450
136
+ volatile = 0
137
+ viewsize$ = "wide"
138
+ @msg: "Batch mode..."
139
+ endif
140
+
141
+
142
+ procedure initialization_main
143
+ draw_prosograms = 1 ; draw_prosograms (modified by task_flags and by prosogram_variants)
144
+ save_intermediate_data = 0 ; save intermediate data (modified by task_flags and by prosogram_variants)
145
+ hesitation_annotation = 0 ; save detected hesitations
146
+ hesitation_method$ = "none" ; "annotation" / "phon+pros" / "words" / "all" / "none"
147
+ pause_method$ = "nuclei" ; "nuclei" / "phon_tier" / "syll_tier"
148
+ polytonia_annotation = 0 ; automatic annotation of pitch movements and pitch levels
149
+ calc_prominence = 0 ; compute prominence measures
150
+ boundary_annotation = 0 ; automatic detection of prosodic boundaries
151
+ stress_annotation = 0 ; automatic detection of stress
152
+ needs_loudness = 0 ; compute loudness based on excitation spectrum
153
+
154
+ ; Plotting
155
+ viewport_width = 7.5 ; width of viewport used for prosogram (inches)
156
+ clip_to_Y_range = 0 ; clip stylisation to Y range
157
+ ; Plotting options:
158
+ greyscale = 0 ; use greyscale instead of colors
159
+ show_f0 = 0 ; show F0 in prosogram (both in rich and light modes)
160
+ show_pitchrange = 0 ;
161
+ show_pauses = 0 ; show detected pauses in prosogram by label "P"
162
+ show_hesitations = 0 ; show hesitations in prosogram by label "Hes"
163
+ show_prominence = 0 ;
164
+ show_pseudosyllables = 0 ; show pseudosyllables in TextGrid
165
+ show_harmonicity = 0 ;
166
+ show_elasticity = 0 ;
167
+ show_localrate = 0 ;
168
+ show_rhythm = 0 ;
169
+ show_length = 0 ;
170
+
171
+ single_fname_graphics_output = 0; when true, all graphic files will be numbered using the same basename
172
+ mindur_pause_anno = 0.2 ; min duration for pause in annotation
173
+ prefilterHP100 = 0 ; Apply HP filtering to avoid problems with low quality audio recordings
174
+ prefilterLP = 0 ; Apply LP filtering when fricatives are too strong (low quality audio recording)
175
+ avoid_insuff_memory = 0 ; Used in calculation of intensity of BP filtered signal
176
+ use_duration_model = 0 ;
177
+ ; duration_model_filename$ = "db_phon_dur_rhapsodie.txt"
178
+ ; duration_model_filename$ = "db_phon_dur_cprom.txt"
179
+ ; duration_model_filename$ = "db_phon_dur_gvlex.txt"
180
+ duration_model_filename$ = ""
181
+ collect_output = 0 ; collect output in a single output file, for feature files, duration data, etc.
182
+ long_feature_table = 0 ; use long format feature table, along standard format
183
+ rich_format_stylization = 0 ; enable rich format stylization output file (one of the intermediate data files)
184
+ do_cleanup = 1 ; cleanup all loaded or computed data at end of process (disable for debugging)
185
+ save_BP = 0 ; only used in batch_mode segmentation
186
+ globalsheet_available = 0 ; multiple inputfile prosodic profile
187
+ endproc
188
+
189
+
190
+ procedure main
191
+ @debug_msg: "main: entry"
192
+ clearinfo ; clear the info window
193
+ @debug_msg: "main: Praat version = 'praatVersion'"
194
+ if (praatVersion < 6043)
195
+ @fatal_error: "Requires Praat version 6.0.43 or higher. Please update your Praat application."
196
+ endif
197
+
198
+ @initialization_main
199
+ @process_form
200
+ @process_multiple_input_files: task, input_files$, anal_t1, anal_t2
201
+
202
+ @msg: "Ready"
203
+ endproc
204
+
205
+
206
+ procedure process_multiple_input_files: .task, .filespec$, pmf_t1, pmf_t2
207
+ # <pmf_t1> starttime as used in call to multiple file batch processing
208
+ # <pmf_t2> endtime as used in call to multiple file batch processing
209
+ @debug_msg: "process_multiple_input_files: entry, filespec=<'.filespec$'>"
210
+ @msg: "'newline$'Script version 'version$'..."
211
+ task = .task ; needed in batch_mode, because global variable <task> is defined in process_form
212
+ .nfiles = 0
213
+ if (index (.filespec$, "*")) ; wildcard found
214
+ filelistID = Create Strings as file list: "filelist", .filespec$
215
+ .nfiles = Get number of strings
216
+ .wildcard_found = 1
217
+ else
218
+ .wildcard_found = 0
219
+ if (fileReadable (.filespec$))
220
+ .nfiles = 1
221
+ endif
222
+ endif
223
+ if (.nfiles == 0)
224
+ @fatal_error: "No input files found for <'.filespec$'>"
225
+ endif
226
+ ; filename returned by 'Get string' does not include path
227
+ @fname_parts: .filespec$
228
+ indir$ = result4$
229
+ @task_flags
230
+ @initialization_multiple_files
231
+ if (task == task_interactive and .nfiles > 1)
232
+ @msg: "Interactive mode uses a single speech file, not many."
233
+ .nfiles = 1
234
+ endif
235
+ for .ifile to .nfiles
236
+ if (not .wildcard_found)
237
+ .filespec$ = replace_regex$ (.filespec$, "\*", "",1) ; remove wildcard '*' (Kleene star)
238
+ @fname_parts: .filespec$
239
+ .fname$ = result1$
240
+ else
241
+ select Strings filelist
242
+ .fname$ = Get string: .ifile
243
+ ; filename returned by 'Get string' does not include path
244
+ endif
245
+ @process_one_input_file: pmf_t1, pmf_t2, .ifile, .nfiles, "'indir$''.fname$'"
246
+ endfor
247
+ @cleanup_global
248
+ endproc
249
+
250
+
251
+ procedure process_one_input_file: anal_t1, anal_t2, .ifile, .nfiles, .fullname$
252
+ @debug_msg: "process_one_input_file: entry, save_intermediate_data='save_intermediate_data'"
253
+ @initialization_per_file
254
+ @construct_filenames: .fullname$
255
+ @fname_parts: .fullname$
256
+ .fname$ = result2$
257
+ @msg: "'newline$'Processing input file <'.fname$'>..."
258
+
259
+ if (task == task_prosogram and .ifile == 1)
260
+ ; delete global report file created in previous run of Prosogram script
261
+ deleteFile: globalfile$
262
+ endif
263
+
264
+ if (needs_segm_type)
265
+ if (task == task_segmentation and not batch_mode)
266
+ segmentation_name$ = "asyll"
267
+ endif
268
+ if (task = task_interactive) ; peek segmentation_name$
269
+ @peek_settings
270
+ endif
271
+ if ((task = task_prosogram or task = task_interactive) and segmentation_name$ = "optimal")
272
+ @select_optimal_segmentation_method
273
+ @msg: "Selected optimal segmentation method: 'segmentation_name$'"
274
+ endif
275
+ @map_segmentation_type: segmentation_name$
276
+ endif
277
+
278
+ if (needs_segm_tg and not fileReadable (segfile$))
279
+ @error_msg: "Cannot find annotation file <'segfile$'>"
280
+ success = 0
281
+ goto file_done
282
+ endif
283
+
284
+ if (task == task_validate_tiers)
285
+ @validate_syllable_tier: segfile$
286
+ goto file_done
287
+ elsif (task = task_calc_pitch)
288
+ @calculate_pitch_int
289
+ removeObject: pitchID
290
+ pitch_available = 0
291
+ goto file_done
292
+ elsif (task = task_calc_bp_intensity)
293
+ @calculate_intensity_bp_filtered
294
+ removeObject: intbpID
295
+ intbp_available = 0
296
+ goto file_done
297
+ elsif (task = task_calc_loudness)
298
+ @calculate_loudness
299
+ removeObject: loudnessID
300
+ loudness_available = 0
301
+ goto file_done
302
+ endif
303
+
304
+ if (task == task_interactive) ; Read segmentation_name from nucleus file
305
+ @peek_settings
306
+ @gr_start_demowin: basename$
307
+ @gr_printline: "Interactive mode: Processing input file '.fname$'..."
308
+ endif
309
+
310
+ @peek_signal
311
+ anal_t1 = max(anal_t1, signal_start)
312
+ if (anal_t2 == 0)
313
+ anal_t2 = signal_finish
314
+ elsif (anal_t2 > 0)
315
+ anal_t2 = min(anal_t2, signal_finish)
316
+ endif
317
+ if (anal_t1 >= signal_finish)
318
+ @fatal_error: "The start time you supplied ('anal_t1:3') is outside the time range of the sound ('signal_start:3' - 'signal_finish:3')"
319
+ endif
320
+ if (anal_t1 >= anal_t2)
321
+ @fatal_error: "The start time you supplied ('anal_t1:3') is greater than the used end time ('anal_t2:3')"
322
+ endif
323
+
324
+ if (task == task_prosogram or task == task_interactive)
325
+ if (anal_t1 == signal_start and (abs(anal_t2 - signal_finish) < time_step))
326
+ needs_pitchrange = 1 ; will calculate pitchrange (for prosodic profile or for interactive mode)
327
+ ; needs_pitchrange is also set for prosodic profile in task_flags
328
+ ; if (needs_pitchrange = 1) then full signal is analyzed
329
+ endif
330
+ endif
331
+
332
+ .minutes = (anal_t2 - anal_t1) div 60
333
+ .sec = (anal_t2 - anal_t1) mod 60
334
+ @msg: "Analysis time range: 'anal_t1:3' - 'anal_t2:3' s ('.minutes' min, '.sec:3' s)"
335
+
336
+ if (needs_parameters)
337
+ if (not volatile)
338
+ @gr_printline: "Reading parameter files..."
339
+ @read_parameter_files
340
+ endif
341
+ @gr_printline: "Calculating parameters..."
342
+ @calculate_parameters
343
+ endif
344
+
345
+ if (task != task_interactive and signal_available) ; Free memory if possible
346
+ removeObject: soundID
347
+ signal_available = 0
348
+ endif
349
+
350
+ if (needs_segm_tg or draw_prosograms or needs_pitchrange)
351
+ @read_annotation_file
352
+ endif
353
+ if (segfile_available) ; apply conversion to segmentation textgrid before making textgrid for plotting
354
+ selectObject: segmentationID
355
+ Convert to backslash trigraphs
356
+ @corpus_conversion: 1, "segmentationID", corpus$
357
+ endif
358
+
359
+ if (draw_prosograms)
360
+ @gr_printline: "Preparing TextGrid plotted in prosograms..."
361
+ @prepare_plotted_textgrid: tiers_to_show$
362
+ @gr_printline: "Preparing TextGrid plotted in prosograms... Ready"
363
+ endif
364
+
365
+ if (needs_segm_tg) ; apply conversion to segmentation textgrid after making textgrid for plotting
366
+ @corpus_conversion: 2, "segmentationID", corpus$
367
+ endif
368
+
369
+ if (needs_stylization or task == task_segmentation)
370
+ if (needs_pitchrange) ; analyse entire signal
371
+ t1s = signal_start
372
+ t2s = signal_finish
373
+ else ; analyse only part of the speech signal
374
+ t1s = anal_t1
375
+ t2s = anal_t2
376
+ endif
377
+ if (not task == task_interactive)
378
+ @gr_printline: "Segmentation into syllabic nuclei. Method='segmentation_name$', Time range='t1s:3'-'t2s:3'"
379
+ endif
380
+ @get_segmentation: segm_type
381
+ ; also does @safe_nuclei
382
+ endif
383
+
384
+ if (task == task_segmentation) ; post-processing segmentation results and write to file
385
+ selectObject: nucleiID
386
+ .tmp1 = Extract tier: nucleus_tier
387
+ .tmpN = Into TextGrid
388
+ selectObject: nucleiID
389
+ .tmp2 = Extract tier: syllable_tier
390
+ .tmpS = Into TextGrid
391
+ selectObject: nucleiID
392
+ .tmp3 = Extract tier: dip_tier
393
+ .tmpD = Into TextGrid
394
+ removeObject: .tmp1, .tmp2, .tmp3
395
+ if (segm_type == segm_pitchchange or segm_type == segm_pitchterrace) ; only keep syllable tier and name it "segm"
396
+ selectObject: .tmpS, .tmpD
397
+ .tmp = Merge
398
+ Set tier name: 1, "segm"
399
+ Set tier name: 2, "state"
400
+ else ; segm_asyll, segm_specsim ... ; keep nucleus tier and syllable tier
401
+ selectObject: .tmpN, .tmpS
402
+ .tmp = Merge
403
+ @nucleus_tier_postproc: .tmp, 1, 2
404
+ if (batch_mode)
405
+ if (trimleft > 0 or trimright > 0)
406
+ @tier_trim_nucleus: .tmp, 1, trimleft, trimright
407
+ endif
408
+ endif
409
+ endif
410
+ selectObject: .tmp
411
+ Write to text file: autosegfile$
412
+ @msg: "Segmentation written to <'autosegfile$'>"
413
+ removeObject: .tmpN, .tmpS, .tmpD, .tmp
414
+ if (batch_mode and save_BP)
415
+ selectObject: intbpID
416
+ Write to text file: intbpfile$
417
+ @msg: "Intensity of BP-filtered signal written to <'intbpfile$'>"
418
+ endif
419
+ goto file_done
420
+ endif
421
+
422
+ if (needs_stylization)
423
+ if (nrof_nuclei_analysed < 1)
424
+ @error_msg: "No syllabic nuclei found in speech signal."
425
+ @error_msg: "If you are using segmentation from annotation tier (phon... or syll...), check tier name and tier content."
426
+ if (save_intermediate_data)
427
+ selectObject: nucleiID
428
+ Write to text file: nuclfile$
429
+ endif
430
+ success = 0
431
+ goto file_done
432
+ endif
433
+ @create_table_of_nuclei
434
+ endif
435
+
436
+ if (task == task_interactive and reuse_nucl)
437
+ ; stylization cannot be used by boundary_annotation, because procedure stylize computes data in nucldatID
438
+ @load_intermediate_data_files
439
+ endif
440
+
441
+ if (needs_stylization)
442
+ @initialize_nucldat: t1s, t2s
443
+
444
+ if (task <> task_interactive and hesitation_method$ <> "none")
445
+ ; hesitations are used in duration data, in Polytonia, in detection of boundaries and prominence
446
+ @msg: "Detecting hesitations (method='hesitation_method$')..."
447
+ @detect_hesitations: anal_t1, anal_t2, hesitation_method$
448
+ if (hesitation_annotation)
449
+ selectObject: nucleiID
450
+ .tmpID = Extract one tier: hesitation_tier
451
+ Write to text file: hesitfile$
452
+ @msg: "Hesitation annotation saved to file... <'hesitfile$'>"
453
+ removeObject: .tmpID
454
+ endif
455
+ endif
456
+
457
+ @debug_msg: "process_one_input_file: needs_stylization"
458
+ if (not stylization_available)
459
+ # Create pitch tier object for stylization
460
+ if (adaptive_glissando)
461
+ s$ = "G(adapt)='glissando_low'-'glissando'/T^2"
462
+ else
463
+ s$ = "G='glissando'/T^2"
464
+ endif
465
+ s$ = "'segmentation_name$', " + s$ + ", DG='diffgt', dmin='mindur_ts:3'"
466
+ @gr_printline: "Calculating stylization... ('s$')"
467
+ stylID = Create PitchTier: "stylization", signal_start, signal_finish
468
+ @stylize_nuclei: t1s, t2s
469
+ stylization_available = 1
470
+ @gr_printline: "Calculating stylization... Ready"
471
+ endif
472
+ ; next line comes after stylize_nuclei, because it uses values in nucldatID
473
+
474
+ ; @debug_msg: "process_one_input_file: profile_available='profile_available' needs_pitchrange='needs_pitchrange'"
475
+ if (not profile_available) ; in interactive mode profile may have been read from file
476
+ @speaker_info_get
477
+ @profile_table_create
478
+ @profile_table_prepare
479
+ profile_available = 1
480
+ ; Table is available, but needs to be filled by @speakers_prosodic_parms
481
+ if (needs_pitchrange)
482
+ ;if (variableExists ("corpus$") and corpus$ = "Proust" and variableExists ("corpus_speaker_range_1$"))
483
+ ; @msg: "Reading pitch range from corpus_speaker_range..."
484
+ ; speaker_range_1$ = corpus_speaker_range_1$
485
+ @gr_printline: "Calculating pitch range of each speaker..."
486
+ @speakers_prosodic_parms
487
+ ; values stored in prosodic profile table
488
+ if (task <> task_interactive)
489
+ @msg: "Calculating pitch range normalized pitch..."
490
+ @pitchrange_normalized_pitch
491
+ endif
492
+ endif
493
+ endif
494
+
495
+ if (needs_prosodic_profile and (task <> task_interactive) and (not polytonia_annotation))
496
+ ; Write prosodic profile report file
497
+ @prosodic_profile
498
+ @msg: "Writing prosodic profile of current input file to: 'profile_file$'"
499
+ selectObject: profileID
500
+ Save as headerless spreadsheet file: profile_file$
501
+ @prosodic_profile_new
502
+ @msg: "Writing global prosodic profile of all input files to: 'globalfile$'"
503
+ @update_global_report
504
+ endif
505
+
506
+ if (calc_prominence)
507
+ @msg: "Calculating prominence..."
508
+ @calc_prom_dynwidth: nucldatID, j_nucldur, j_promL2D_nucldur, 2, 0, 1
509
+ ; @calculate_prominence_measures
510
+ endif
511
+ if (show_pseudosyllables and draw_prosograms)
512
+ @grid_append_tier: nucleiID, syllable_tier, "newgridID"
513
+ endif
514
+
515
+ if (polytonia_annotation)
516
+ @msg: "Tonal annotation..."
517
+ @polytonia_main: anal_t1, anal_t2
518
+ if (draw_prosograms)
519
+ ; In prosograms, only the extended Polytonia is displayed
520
+ @grid_append_tier: nucleiID, polytonia2_tier, "newgridID"
521
+ if (not segfile_available) ; no other tiers available
522
+ selectObject: newgridID
523
+ Remove tier: 1 ; dummy tier
524
+ endif
525
+ endif
526
+ selectObject: nucleiID
527
+ .tmp1 = Extract one tier: polytonia_tier
528
+ selectObject: nucleiID
529
+ .tmp2 = Extract one tier: polytonia2_tier
530
+ selectObject: .tmp1, .tmp2
531
+ .tmp3 = Merge
532
+ Write to text file: polytonia_file$
533
+ @msg: "Tonal annotation saved to file... 'polytonia_file$'"
534
+ removeObject: .tmp1, .tmp2, .tmp3
535
+ endif
536
+
537
+ if (boundary_annotation)
538
+ @msg: "Boundary annotation..."
539
+ show_prominence = 0 ; plot prominence measures
540
+ boundary_annotation_verbose = 1
541
+ save_intermediate_data = 1
542
+ needs_prosodic_profile = 0
543
+ if (not (segm_type == segm_msyllvow or segm_type == segm_mrhyme or segm_type == segm_msyllpeak))
544
+ @error_msg: "Boundary annotation requires Syllabic segmentation method"
545
+ segm_type = segm_msyllvow
546
+ endif
547
+ if (pause_method$ == "syll_tier")
548
+ boundary_use_nuclei = 0 ; use pauses from annotation
549
+ elsif (pause_method$ == "nuclei"
550
+ boundary_use_nuclei = 1 ; use gaps between nuclei
551
+ endif
552
+ @msg: "Calculating boundaries (boundary_use_nuclei='boundary_use_nuclei')..."
553
+ boundary_skip_hesit = 0
554
+ @boundary_analysis: anal_t1, anal_t2, boundary_use_nuclei, boundary_skip_hesit
555
+ selectObject: nucleiID
556
+ tmpID = Extract one tier: boundary_tier
557
+ Write to text file: boundaryfile$
558
+ if (draw_prosograms)
559
+ @grid_append_tier: nucleiID, boundary_tier, "newgridID"
560
+ @grid_append_tier: nucleiID, boundary_tier, "segmentationID"
561
+ endif
562
+ @boundary_pass2: anal_t1, anal_t2
563
+ if (draw_prosograms)
564
+ @grid_append_tier: nucleiID, boundary2_tier, "newgridID"
565
+ endif
566
+ call tier_get segmentationID "^boundary-manu$" ref_tier "Cannot find boundary reference tier in segmentation grid" 0
567
+ call create_evaluation_data segmentationID ref_tier 'evalfile$'
568
+ @msg: "Boundaries ready"
569
+ endif
570
+
571
+ if (stress_annotation)
572
+ @stress_analysis: anal_t1, anal_t2, pause_use_nuclei
573
+ @grid_append_tier: nucleiID, stress_tier, "newgridID"
574
+ endif
575
+
576
+ if (save_intermediate_data)
577
+ if (not reuse_nucl)
578
+ selectObject: nucleiID
579
+ Write to text file: nuclfile$
580
+ endif
581
+ if (not reuse_styl)
582
+ selectObject: stylID
583
+ Write to text file: stylfile$
584
+ endif
585
+ if (rich_format_stylization)
586
+ @store_stylization: stylID, nucleiID, targetsfile$
587
+ endif
588
+ ; call store_stylization_resampled stylID nucleiID time_step 'stylpitchfile$'
589
+ ;.outfname$ = prefix$ + "_dur.txt" ; uses basename
590
+ ;.outfname$ = indir$ + "_dur_syllprom.txt" ; uses single output file
591
+ ;call duration_data_from_textgrid segmentationID .ifile "syllprom" "'.outfname$'" 'basename$'
592
+ endif
593
+
594
+ selectObject: stylID ; stylization pitch tier in Hz
595
+ stylSTID = Copy: "styl_ST"
596
+ @convert_Hz_ST: stylSTID
597
+
598
+ # Initialization for plotting results of current input file
599
+ if (draw_prosograms and auto_pitchrange)
600
+ ; GLOBAL automatic pitch range selection for entire corpus to be analysed
601
+ @speaker_autorange: anal_t1, anal_t2
602
+ ySTmax = ymax
603
+ ySTmin = ymin
604
+ endif
605
+ else ; no stylization needed
606
+ if (pitch_available)
607
+ selectObject: pitchID
608
+ .y = Get mean: 0, 0, "semitones re 1 Hz"
609
+ ySTmin = .y - 12
610
+ ySTmax = .y + 12
611
+ endif
612
+ endif ; needs_stylization
613
+
614
+ # Draw all prosograms for current input file
615
+ if (draw_prosograms)
616
+ if (task == task_interactive)
617
+ @gr_start_demowin: basename$
618
+ @gr_run_demowin: anal_t1, anal_t2, timeincr, ySTmin, ySTmax
619
+ @cleanup_current_file
620
+ @cleanup_global
621
+ exit
622
+ else
623
+ @msg: "Drawing and saving all prosograms..."
624
+ @gr_start_picturewin
625
+ @gr_write_all_prosograms: anal_t1, anal_t2, timeincr, .ifile, .nfiles
626
+ endif
627
+ endif ; draw_prosograms
628
+
629
+ if (task == task_annotation)
630
+ @gr_start_picturewin
631
+ if (nrof_pages == 0)
632
+ @gr_first_viewport_of_page
633
+ nrof_pages = 1
634
+ @gr_write_all_annotation: anal_t1, anal_t2, timeincr, .ifile, .nfiles
635
+ endif
636
+ endif
637
+
638
+ if (save_intermediate_data) ; only for some tasks
639
+ ; this step must follow plotting, because store_features removes columns
640
+ if (not collect_output)
641
+ @store_features: 1, 0, sheetfile$
642
+ if (long_feature_table)
643
+ @store_features: 1, 1, tablefile$
644
+ endif
645
+ else
646
+ @store_features: .ifile, 0, collectfile$
647
+ endif
648
+ endif
649
+
650
+ label file_done
651
+
652
+ # Delete temporary objects for current input file
653
+ @cleanup_current_file
654
+ endproc
655
+
656
+
657
+ procedure select_optimal_segmentation_method
658
+ if (not fileReadable (segfile$))
659
+ segmentation_name$ = "asyll"
660
+ else
661
+ @debug_msg: "select_optimal_segmentation_method: task='task', segmentation file found"
662
+ .tmpID = Read from file: segfile$
663
+ @tier_number_by_name: .tmpID, "^phon"
664
+ .phon_tier = result
665
+ @tier_number_by_name: .tmpID, "^syll"
666
+ .syll_tier = result
667
+ removeObject: .tmpID
668
+ @debug_msg: "select_optimal_segmentation_method: phon_tier='.phon_tier', syll_tier='.syll_tier'"
669
+ if (.phon_tier and .syll_tier)
670
+ segmentation_name$ = "rhyme"
671
+ elsif (.phon_tier)
672
+ segmentation_name$ = "vow-nucl"
673
+ elsif (.syll_tier)
674
+ segmentation_name$ = "syll"
675
+ else
676
+ segmentation_name$ = "asyll"
677
+ endif
678
+ endif
679
+ endproc
680
+
681
+
682
+ procedure map_segmentation_type: .method$
683
+ # Maps segmentation name to a type and sets some flags.
684
+ @debug_msg: "map_segmentation_type: entry, method='.method$'"
685
+ needs_segm_tg = 1 ; selected segmentation type needs segmentation TextGrid
686
+ needs_phon_tier = 0 ; selected segmentation type needs phoneme tier
687
+ needs_syll_tier = 0 ; selected segmentation type needs syllable tier
688
+ segmentation_name$ = .method$
689
+ if (.method$ = "vow-nucl")
690
+ segm_type = segm_vnucl
691
+ needs_phon_tier = 1
692
+ elsif (.method$ = "extern")
693
+ segm_type = segm_extern
694
+ elsif (.method$ = "loudness") ; automatic, loudness peaks
695
+ segm_type = segm_aloudness
696
+ needs_segm_tg = 0
697
+ elsif (.method$ = "int-BP") ; automatic, peaks in bandpass filters speech
698
+ segm_type = segm_anucl
699
+ needs_intbp = 1
700
+ needs_segm_tg = 0
701
+ elsif (.method$ = "asyll")
702
+ segm_type = segm_asyll
703
+ needs_intbp = 1
704
+ needs_segm_tg = 0
705
+ elsif (.method$ = "rhyme")
706
+ segm_type = segm_mrhyme
707
+ needs_phon_tier = 1
708
+ needs_syll_tier = 1
709
+ elsif (.method$ = "syll+vow")
710
+ segm_type = segm_msyllvow
711
+ needs_phon_tier = 1
712
+ needs_syll_tier = 1
713
+ elsif (.method$ = "syll")
714
+ segm_type = segm_msyllpeak
715
+ needs_syll_tier = 1
716
+ elsif (.method$ = "voiced")
717
+ segm_type = segm_voiced
718
+ needs_segm_tg = 0
719
+ elsif (.method$ = "specsim")
720
+ segm_type = segm_specsim
721
+ needs_segm_tg = 0
722
+ elsif (.method$ = "pitchchange")
723
+ segm_type = segm_pitchchange
724
+ needs_segm_tg = 0
725
+ needs_intbp = 1
726
+ elsif (.method$ = "pitchterrace")
727
+ segm_type = segm_pitchterrace
728
+ needs_segm_tg = 0
729
+ else
730
+ segmentation_name$ = "unknown"
731
+ @fatal_error: "Unknown segmentation type: '.method$'"
732
+ endif
733
+ if (boundary_annotation)
734
+ @debug_msg: "map_segmentation_type: boundary_annotation, segmentation name='segmentation_name$' ('segm_type')"
735
+ if (not (segm_type == segm_msyllvow or segm_type == segm_mrhyme or segm_type == segm_msyllpeak))
736
+ @error_msg: "Boundary annotation requires Syllabic segmentation method"
737
+ segm_type = segm_msyllvow
738
+ endif
739
+ endif
740
+ @debug_msg: "map_segmentation_type: exit"
741
+ endproc
742
+
743
+
744
+ procedure task_flags
745
+ # set some flags depending on task type
746
+ @debug_msg: "task_flags: entry"
747
+ needs_parameters = 1
748
+ needs_stylization = 1
749
+ needs_loudness = 0
750
+ needs_intbp = 0
751
+ needs_pitchrange = 0
752
+ needs_prosodic_profile = 0
753
+ needs_segm_tg = 0
754
+ needs_segm_type = 0
755
+ needs_picture_win = 0
756
+ nrofplottedtiers = 0
757
+ show_settings = 1
758
+ show_portee = 1
759
+ show_tg_bound = 1 ; plot vertical lines for interval boundaries in TextGrid
760
+ show_y_scale = 1 ; plot calibration in ST on vertical axis
761
+ show_y_scale_r = 1 ; plot calibration in Hz on vertical axis at right side
762
+ show_x_scale = 1
763
+ show_vuv = 1 ; plot V/UV parameter
764
+ show_intensity = 1 ; plot intensity parameter
765
+ show_intbp = 1 ; plot int-BP parameter
766
+ show_tiernames = 1 ; plot tier names to the right of tier
767
+ show_trajectories = 0 ; show intrasyllab up/down and intersyllab pitch intervals
768
+ if (task == task_calc_pitch or task == task_calc_bp_intensity or task == task_calc_loudness)
769
+ needs_parameters = 0
770
+ volatile = 0
771
+ needs_stylization = 0
772
+ draw_prosograms = 0
773
+ if (task == task_calc_loudness)
774
+ needs_loudness = 1
775
+ endif
776
+ elsif (task == task_annotation)
777
+ needs_parameters = 0
778
+ volatile = 0
779
+ needs_stylization = 0
780
+ needs_picture_win = 1
781
+ draw_prosograms = 0
782
+ show_y_scale = 0
783
+ elsif (task == task_prosogram)
784
+ needs_segm_type = 1 ; may be modified later by procedure: map_segmentation_type
785
+ if (calc_prominence)
786
+ ; needs_loudness = 1
787
+ endif
788
+ needs_picture_win = 1
789
+ if (viewsize$ = "compact")
790
+ show_tiernames = 0
791
+ endif
792
+ if (save_intermediate_data)
793
+ needs_prosodic_profile = 1
794
+ needs_pitchrange = 1
795
+ endif
796
+ if (polytonia_annotation)
797
+ needs_prosodic_profile = 1 ; because pitch range information is stored in profileID !!
798
+ needs_pitchrange = 1
799
+ hesitation_method$ = "all"
800
+ ; mindur_ts = 0.05
801
+ show_hesitations = 1
802
+ show_pauses = 1
803
+ save_intermediate_data = 1
804
+ endif
805
+ if (boundary_annotation)
806
+ calc_prominence = 1
807
+ show_prominence = 0
808
+ show_length = 1
809
+ boundary_annotation_verbose = 1
810
+ save_intermediate_data = 1
811
+ endif
812
+ if (stress_annotation)
813
+ calc_prominence = 1
814
+ show_prominence = 1 ; plot prominence measures
815
+ pause_use_nuclei = 1 ; 1= use gaps between nuclei; 0= use pauses from annotation
816
+ endif
817
+ if (hesitation_annotation)
818
+ needs_segm_tg = 1
819
+ needs_pitchrange = 0
820
+ needs_picture_win = 0
821
+ endif
822
+ elsif (task == task_pitch_plot)
823
+ needs_stylization = 0
824
+ segmentation_name$ = ""
825
+ needs_picture_win = 1
826
+ show_settings = 0
827
+ show_vuv = 0
828
+ ; show_intensity = 0
829
+ show_intbp = 0
830
+ rich = 1 ; because pitch is plotted in rich mode
831
+ show_pauses = 0
832
+ nrof_nuclei_analysed = 0
833
+ elsif (task == task_interactive)
834
+ needs_segm_type = 1
835
+ show_settings = 0
836
+ viewsize$ = "wide"
837
+ rich = 0
838
+ elsif (task == task_segmentation)
839
+ needs_segm_type = 1
840
+ needs_stylization = 0
841
+ draw_prosograms = 0
842
+ if (not variableExists ("batch_mode"))
843
+ segm_type = segm_asyll
844
+ endif
845
+ elsif (task == task_validate_tiers)
846
+ needs_segm_tg = 1
847
+ needs_parameters = 0
848
+ needs_stylization = 0
849
+ draw_prosograms = 0
850
+ endif
851
+ if (variableExists("rich"))
852
+ if (not rich)
853
+ show_tg_bound = 0
854
+ endif
855
+ endif
856
+ endproc
857
+
858
+
859
+ procedure process_form
860
+ ; Interpret all fields from script form and set global flag variables accordingly
861
+ @debug_msg: "process_form: entry"
862
+ needs_segm_type = 0 ; true if task requires a segmentation type
863
+
864
+ ; Task selection
865
+ if (index (task$, "Recalculate pitch"))
866
+ task = task_calc_pitch
867
+ elsif (index (task$, "Recalculate intensity"))
868
+ task = task_calc_bp_intensity
869
+ elsif (index (task$, "Recalculate loudness"))
870
+ task = task_calc_loudness
871
+ elsif (index (task$, "Prosogram") or index (task$, "intermediate data files") or index (task$, "Polytonia"))
872
+ task = task_prosogram
873
+ needs_segm_type = 1 ; may be modified later by procedure: map_segmentation_type
874
+ draw_prosograms = 0
875
+ save_intermediate_data = 0
876
+ if (index (task$, "Prosogram"))
877
+ draw_prosograms = 1
878
+ endif
879
+ if (index (task$, "intermediate data files"))
880
+ save_intermediate_data = 1
881
+ endif
882
+ if (index (task$, "Polytonia"))
883
+ polytonia_annotation = 1
884
+ draw_prosograms = 1
885
+ endif
886
+ elsif (index (task$, "Plot pitch"))
887
+ task = task_pitch_plot
888
+ elsif (index (task$, "automatic segmentation"))
889
+ task = task_segmentation
890
+ segm_type = segm_asyll
891
+ segmentation_name$ = "asyll"
892
+ needs_segm_type = 1
893
+ elsif (index (task$, "Interactive"))
894
+ task = task_interactive
895
+ needs_segm_type = 1
896
+ elsif (index (task$, "Draw annotation"))
897
+ task = task_annotation
898
+ elsif (index (task$, "boundary"))
899
+ task = task_autoanno
900
+ boundary_annotation = 1
901
+ needs_segm_type = 1
902
+ elsif (index (task$, "Validate"))
903
+ task = task_validate_tiers
904
+ else
905
+ @fatal_error: "Invalid task"
906
+ endif
907
+
908
+ ; Map segmentation type descriptions in form to names that will also be displayed in prosogram graph.
909
+ if (needs_segm_type)
910
+ .method$ = segmentation_method$
911
+ if (index (.method$, "optimal"))
912
+ segmentation_name$ = "optimal"
913
+ elsif (index (.method$, "in vowels"))
914
+ segmentation_name$ = "vow-nucl"
915
+ elsif (index (.method$, "external"))
916
+ segmentation_name$ = "extern"
917
+ elsif (index (.method$, "loudness")) ; automatic, loudness peaks
918
+ segmentation_name$ = "loudness"
919
+ elsif (index (.method$, "BP-filtered")) ; automatic, peaks in bandpass filters speech
920
+ segmentation_name$ = "int-BP"
921
+ elsif (index (.method$, "Automatic: acoustic syllables"))
922
+ segmentation_name$ = "asyll"
923
+ elsif (index (.method$, "in rhyme"))
924
+ segmentation_name$ = "rhyme"
925
+ elsif (index_regex (.method$, "in syllables.*and vowels"))
926
+ segmentation_name$ = "syll+vow"
927
+ elsif (index_regex (.method$, "in syllables.*and local peak"))
928
+ segmentation_name$ = "syll"
929
+ elsif (index (.method$, "Automatic: voiced portions"))
930
+ segmentation_name$ = "voiced"
931
+ else
932
+ @fatal_error: "Unknown segmentation type: '.method$'"
933
+ endif
934
+ endif
935
+
936
+ ; Time step (frame rate) used for calculation of intensity and pitch
937
+ time_step = 'frame_period$'
938
+ anal_t1 = left_Time_range
939
+ anal_t2 = right_Time_range
940
+
941
+ ; Plotting options
942
+ viewsize$ = "wide"
943
+ if (index (view$, "Compact"))
944
+ viewsize$ = "compact"
945
+ elsif (index (view$, "Large"))
946
+ viewsize$ = "large"
947
+ endif
948
+ rich = 0
949
+ if (index (view$, "rich"))
950
+ rich = 1
951
+ show_f0 = 1
952
+ endif
953
+ show_pitchrange = 0 ; Show pitch range
954
+ if (index (view$, "pitch range"))
955
+ show_pitchrange = 1
956
+ endif
957
+ if (rich = 0 and index (view$, "f0"))
958
+ show_f0 = 1
959
+ endif
960
+ draw_pitch_target_values = 0 ; Draw pitch target values (in ST) in prosogram
961
+ if (index (view$, "pitch targets"))
962
+ draw_pitch_target_values = 1
963
+ if (index (view$, "pitch targets in Hertz"))
964
+ draw_pitch_target_values = 2
965
+ endif
966
+ endif
967
+
968
+ ; Thresholds
969
+ j = index_regex (thresholds$, "/T")
970
+ s$ = mid$ (thresholds$, 3, j-3) ; s$ = glissando threshold(s)
971
+ if (index (thresholds$, "adaptive")) ; adaptive glissando threshold (lower before pause)
972
+ adaptive_glissando = 1
973
+ s2$ = left$ (s$, 4)
974
+ glissando_low = 's2$'
975
+ s2$ = mid$ (s$, 6, 4)
976
+ glissando = 's2$'
977
+ else ; fixed glissando threshold
978
+ adaptive_glissando = 0
979
+ glissando = 's$'
980
+ glissando_low = glissando
981
+ endif
982
+ diffgt = extractNumber (thresholds$, "DG=")
983
+ mindur_ts = 0.035 ; Minimum duration for a tonal segment (default)
984
+ j = index (thresholds$, "dmin")
985
+ if (j > 0)
986
+ s$ = mid$ (thresholds$, j+5, length (thresholds$) -(j+4))
987
+ mindur_ts = 's$'
988
+ endif
989
+
990
+ if (not variableExists ("output_mode"))
991
+ output_mode$ = "Fill page with strips"
992
+ endif
993
+ outputmode$ = left$ (output_mode$, index(output_mode$, " ") -1)
994
+
995
+ volatile = 0 ; by default, process full signal and store results
996
+ if (index (parameter_calculation$, "Partial") > 0)
997
+ volatile = 1
998
+ endif
999
+
1000
+ scale_signal_amplitude = 0
1001
+ if (variableExists ("scale_signal_intensity")) ; Form contains field "Scale signal intensity"
1002
+ if (index (scale_signal_intensity$, "Scale"))
1003
+ scale_signal_amplitude = 1
1004
+ endif
1005
+ endif
1006
+
1007
+ clip_level = 0
1008
+ if (variableExists ("clipping_threshold")) ; Form contains field "Clipping threshold"
1009
+ if (clipping_threshold > 100 or clipping_threshold < 0)
1010
+ @fatal_error: "Invalid value for clipping threshold"
1011
+ endif
1012
+ clip_level = clipping_threshold
1013
+ endif
1014
+
1015
+ ; Frequency range for pitch detection
1016
+ minimum_pitch = left_F0_detection_range
1017
+ maximum_pitch = right_F0_detection_range
1018
+ if (minimum_pitch = 0)
1019
+ ; autorange_f0detect = 1
1020
+ else
1021
+ if (minimum_pitch < 40 or maximum_pitch > 800)
1022
+ @fatal_error: "Invalid F0 range: expected to be within 40 - 800 Hz range"
1023
+ endif
1024
+ if (minimum_pitch >= maximum_pitch)
1025
+ @fatal_error: "Invalid F0 range: lower limit > higher limit"
1026
+ endif
1027
+ endif
1028
+ if (left_Pitch_range == 0)
1029
+ auto_pitchrange = 1 ; automatic pitch range adjustment in plot (not in F0 detection)
1030
+ else
1031
+ auto_pitchrange = 0 ; manual pitch range adjustment for plot
1032
+ if (right_Pitch_range <= left_Pitch_range or left_Pitch_range <= 0)
1033
+ @fatal_error: "Invalid values for pitch range"
1034
+ endif
1035
+ endif
1036
+ ySTmin = left_Pitch_range
1037
+ ySTmax = right_Pitch_range
1038
+
1039
+ timeincr = time_interval_per_strip ; time increment for plot
1040
+
1041
+ @task_flags
1042
+
1043
+ if (draw_prosograms and (index (output_format$,"EMF")) and not windows)
1044
+ @error_msg: "Windows Metafiles (EMF) are supported on Windows systems only."
1045
+ endif
1046
+
1047
+ if (draw_prosograms and (index (output_format$,"JPG")))
1048
+ if (not fileReadable (path_ghostscript$))
1049
+ @error_msg: "JPG output requires Ghostscript, which is not found. Verify configuration for Ghostcript."
1050
+ endif
1051
+ endif
1052
+
1053
+ s$ = input_files$
1054
+ s$ = replace$ (s$, " ", "", 0)
1055
+ if (length(s$) < 1)
1056
+ input_files$ = chooseReadFile$ ("Select file")
1057
+ endif
1058
+ @debug_msg: "process_form: exit"
1059
+ endproc
1060
+
1061
+
1062
+ procedure initialization_multiple_files
1063
+ @debug_msg: "initialization_multiple_files: entry"
1064
+ # Unit (ST or Hz) used for expressing shown pitch range
1065
+ units$ = "Semitones"
1066
+ ySTmin = 0
1067
+ ySTmax = 100
1068
+ # diffST = difference in ST between ST-scale relative to 100Hz and that rel to 1Hz
1069
+ .diffST = 12 * log2(100/1)
1070
+ if (units$ = "Semitones")
1071
+ yHzmin = semitonesToHertz(ySTmin-.diffST)
1072
+ yHzmax = semitonesToHertz(ySTmax-.diffST)
1073
+ else
1074
+ ySTmin = hertzToSemitones(yHzmin) - hertzToSemitones(1)
1075
+ ySTmax = hertzToSemitones(yHzmax) - hertzToSemitones(1)
1076
+ endif
1077
+ fc_low = 300 ; BP filter
1078
+ fc_high = 3500 ; BP filter
1079
+ mindiff = 3 ; intensity difference threshold for local dips in convex hull
1080
+ diff_left = 2 ; intensity difference between local peak and left boundary of nucleus
1081
+ @debug_msg: "initialization_multiple_files: exit"
1082
+ endproc
1083
+
1084
+
1085
+ procedure initialization_per_file
1086
+ @debug_msg: "initialization_per_file: entry"
1087
+ signal_available = 0 ; speech signal loaded
1088
+ intensity_available = 0
1089
+ pitch_available = 0
1090
+ harmonicity_available = 0
1091
+ intbp_available = 0
1092
+ inthp_available = 0
1093
+ nuclei_available = 0
1094
+ nucldat_available = 0
1095
+ loudness_available = 0
1096
+ segfile_available = 0 ; annotation file with alignements of phonemes, syllables, etc.
1097
+ segmentation_available = 0 ; 1 if segmentation has been read from saved <basename>_nucl.TextGrid file
1098
+ stylization_available = 0 ; 1 if stylization has been read from saved <basename>_styl.PitchTier file
1099
+ profile_available = 0 ; 1 if profile data has been read from saved <basename>_profile_data.txt
1100
+ reuse_nucl = 0 ; 1 if nucleus file could be read and used as segmentation
1101
+ reuse_styl = 0
1102
+ phones_available = 0 ; 1 if phoneme tier is found in annotation TextGrid
1103
+ syllables_available = 0 ; 1 if syllable tier is found in annotation TextGrid
1104
+ words_available = 0 ; 1 if word tier is found in annotation TextGrid
1105
+ creak_available = 0 ; creak textgrid/tier found and read ?
1106
+ speaker_available = 0 ; speaker textgrid/tier found and read ?
1107
+ hesitation_available = 0 ; hesitation textgrid/tier found and read ?
1108
+ newgrid_available = 0
1109
+ nrof_nuclei_analysed = 0
1110
+ success = 1 ; 0 if error encountered while processing input file
1111
+ endproc
1112
+
1113
+
1114
+ procedure construct_filenames: .infname$
1115
+ # Construct filenames to be used for TextGrid, parameters, graphics and intermediate data files,
1116
+ # starting from filename of sound input file
1117
+ # <.infname$> full path of input file, including filename extension
1118
+ @debug_msg: "construct_filenames: entry, infname=<'.infname$'>"
1119
+ if (rindex (.infname$,".") == 0)
1120
+ @fatal_error: "Invalid filename for input file. Should include filename extension."
1121
+ endif
1122
+ @fname_parts: .infname$
1123
+ .fext$ = replace_regex$ (result3$, "(.)", "\L\1", 0) ; get filename extension and convert it to lowercase
1124
+ if (task <> task_validate_tiers)
1125
+ if (index(filename_extensions_sound$, ":'.fext$':") == 0)
1126
+ @debug_msg: "construct_filenames: filename extension=<'.fext$'>"
1127
+ @fatal_error: "Input file ('.infname$') should be an audio file supported by Praat Open LongSound, with extension .wav, .aiff, .aifc, .nist, .sound, .flac, or .mp3"
1128
+ endif
1129
+ endif
1130
+ basename$ = result2$
1131
+ indir$ = result4$
1132
+ .path$ = indir$ + basename$
1133
+ signalfile$ = .path$ + "." + .fext$
1134
+ pitchfile$ = .path$ + ".Pitch"
1135
+ segfile$ = .path$ + ".TextGrid" ; TextGrid with already available segmentation (phonetic and/or syllabic alignment)
1136
+ globalfile$ = indir$ + "globalsheet.txt" ; speaker profile data, for all input speech files in run (headerless spreadsheet)
1137
+ collectfile$ = indir$ + "_collect_.txt" ; append output from multiple input files
1138
+
1139
+ if (batch_mode and length (corpus$)) ; in batch mode, file paths may be defined for a given corpus
1140
+ ; "corpus_subdir_sound$" is handled by batch commands
1141
+ ; "corpus_subdir_img$" is handled below
1142
+ if (variableExists ("corpus_subdir_pitch$"))
1143
+ pitchfile$ = corpus_home$ + corpus_subdir_pitch$ + basename$ + ".Pitch"
1144
+ endif
1145
+ if (variableExists ("corpus_subdir_tg$"))
1146
+ segfile$ = corpus_home$ + corpus_subdir_tg$ + basename$ + ".TextGrid"
1147
+ endif
1148
+ if (variableExists ("corpus_subdir_data$"))
1149
+ .path$ = corpus_home$ + corpus_subdir_data$ + basename$
1150
+ globalfile$ = corpus_home$ + corpus_subdir_data$ + "globalsheet.txt" ; no basename here
1151
+ endif
1152
+ endif
1153
+
1154
+ intensityfile$ = .path$ + ".Intensity"
1155
+ intbpfile$ = .path$ + "_BP.Intensity"
1156
+ inthpfile$ = .path$ + "_HP.Intensity"
1157
+ loudnessfile$ = .path$ + "_loud.Intensity"
1158
+ creakfile$ = .path$ + "_creak.TextGrid" ; TextGrid file with creak annotation
1159
+ speakerfile$ = .path$ + "_speaker.TextGrid" ; TextGrid file with speaker annotation
1160
+ nuclfile$ = .path$ + "_nucl.TextGrid" ; nuclfile contains automatic segmentation etc.
1161
+ stylfile$ = .path$ + "_styl.PitchTier" ; stylization as a sequence of targets, in Praat's PitchTier format
1162
+ stylpitchfile$ = .path$ + "_styl.Pitch" ; stylization as pitch samples
1163
+ targetsfile$ = .path$ + "_styl.txt" ; stylization as a sequence of targets and segmentations format
1164
+ statsfile$ = .path$ + "_profile.txt" ; prosodic profile report in text format
1165
+ sheetfile$ = .path$ + "_data.txt" ; syllable data (format: headerless spreadsheet file)
1166
+ tablefile$ = .path$ + "_table.txt" ; long format syllabic features, adding categorical variables for statistic analysis
1167
+ profile_file$ = .path$ + "_profile_data.txt" ; prosodic profile data, for current input speech file (headerless spreadsheet file)
1168
+ autosegfile$ = .path$ + "_auto.TextGrid" ; TextGrid file with output from automatic segmentation into syllables
1169
+ polytonia_file$ = .path$ + "_polytonia.TextGrid" ; TextGrid file with output from Polytonia
1170
+ boundaryfile$ = .path$ + "_boundary.TextGrid" ; TextGrid file with detected prosodic boundaries
1171
+ hesitfile$ = .path$ + "_hesit.TextGrid" ; TextGrid file with detected hesitations
1172
+ evalfile$ = .path$ + "_eval.txt"
1173
+ harmonicityfile$ = .path$ + ".Harmonicity"
1174
+
1175
+
1176
+ ; Special handling of output filename in batch mode
1177
+ ; The variable <output_filename$> is set in script form or in batch command "prosogram_variants"
1178
+ output_fname$ = output_filename$ ; local copy for modifications
1179
+ if (batch_mode and (task == task_prosogram or task == task_segmentation))
1180
+ if (length (output_filename$))
1181
+ ; filename explicitly given, keep it as such; does not use corpus_home nor corpus_subdir_img
1182
+ file_numbering = 0
1183
+ elsif (length (corpus$))
1184
+ if (variableExists ("corpus_subdir_img$"))
1185
+ output_fname$ = corpus_home$ + corpus_subdir_img$ + basename$ + "_" + output_suffix$
1186
+ else
1187
+ output_fname$ = corpus_home$ + basename$ + "_" + output_suffix$
1188
+ endif
1189
+ else
1190
+ output_fname$ = indir$ + basename$ + "_" + output_suffix$
1191
+ endif
1192
+ if (task == task_prosogram and draw_prosograms)
1193
+ @msg: "Graphics file will be written to (path+basename): <'output_fname$'>"
1194
+ endif
1195
+ @debug_msg: "construct_filenames: batch_mode, output_fname=<'output_fname$'>"
1196
+ if (task == task_segmentation)
1197
+ autosegfile$ = corpus_home$ + basename$ + output_suffix$ + ".TextGrid"
1198
+ endif
1199
+ endif
1200
+ if (not batch_mode) ; script mode
1201
+ if (length (indir$))
1202
+ output_fname$ = replace$ (output_fname$, "<input_directory>", indir$, 1)
1203
+ else
1204
+ output_fname$ = replace$ (output_fname$, "<input_directory>/", "", 1)
1205
+ endif
1206
+ output_fname$ = replace$ (output_fname$, "<basename>", basename$, 1)
1207
+ output_fname$ = replace$ (output_fname$, "//", "/", 1)
1208
+ if (single_fname_graphics_output) ; all graphics filenames have same basename followed by number
1209
+ output_filename$ = output_fname$ ; keep name for all graphics output files
1210
+ else
1211
+ file_ctr = 1 ; counter for graphics filename
1212
+ nrof_pages = 0
1213
+ endif
1214
+ @debug_msg: "construct_filenames: script_mode, output_fname=<'output_fname$'>"
1215
+ endif
1216
+
1217
+ @debug_msg: "construct_filenames: exit, output_fname=<'output_fname$'>"
1218
+ endproc
1219
+
1220
+
1221
+ procedure read_annotation_file
1222
+ @debug_msg: "read_annotation_file: entry"
1223
+ segfile_available = 0
1224
+ if (fileReadable (segfile$))
1225
+ segfile_available = 1
1226
+ @gr_printline: "Loading annotation TextGrid from file <'segfile$'>"
1227
+ segmentationID = Read from file: segfile$
1228
+ Rename: "annotation_input"
1229
+ elsif (needs_segm_tg)
1230
+ @fatal_error: "Cannot open TextGrid with segmentation from file <'segfile$'>"
1231
+ endif
1232
+ @debug_msg: "read_annotation_file: exit"
1233
+ endproc
1234
+
1235
+
1236
+ procedure corpus_conversion: .step, .gridname$, .corpus$
1237
+ # In-place conversion of a TextGrid object
1238
+ # <.step> 1 = before preparation of plotted tiers, 2 = after preparation
1239
+ @debug_msg: "corpus_conversion: entry, gridname=<'.gridname$'>"
1240
+ .grid = '.gridname$'
1241
+ selectObject: .grid
1242
+
1243
+ if (.corpus$ = "Portes" and .step = 1)
1244
+ @tier_number_by_name: .grid, "speaker"
1245
+ if (not result)
1246
+ @tier_number_by_name: .grid, "turns"
1247
+ if (result)
1248
+ Duplicate tier: result, 1, "speaker"
1249
+ Replace interval texts: 1, 1, 0, "([a-zA-Z]*)[0-9]*", "\1"
1250
+ endif
1251
+ endif
1252
+ endif
1253
+
1254
+ if (.corpus$ = "rhapsodie" and .step = 1)
1255
+ @tier_number_by_name: .grid, "locuteur"
1256
+ if (result)
1257
+ Set tier name: result, "speaker"
1258
+ endif
1259
+ endif
1260
+
1261
+ if (.corpus$ = "cprom" and .step = 1)
1262
+ @process_cprom: .grid
1263
+ endif
1264
+
1265
+ if (.corpus$ = "gvlex" and .step = 1)
1266
+ @lpa_sampa: .grid, corpus_tier_phon$
1267
+ @lpa_sampa: .grid, corpus_tier_syll$
1268
+ endif
1269
+
1270
+ if (.corpus$ = "Kotsifas_2015" and .step = 1)
1271
+ @tier_number_by_name: .grid, "Intonation"
1272
+ if (result)
1273
+ Set tier name: result, "intonation"
1274
+ endif
1275
+ @tier_number_by_name: .grid, "phon"
1276
+ if (result)
1277
+ .phon_tier = result
1278
+ .ni = Get number of intervals: .phon_tier
1279
+ for .j to .ni
1280
+ .t1s = Get start point: .phon_tier, .j
1281
+ .label$ = Get label of interval: .phon_tier, .j
1282
+ .pos = index (.label$, """") ; position of SAMPA primary stress mark " (double quote)
1283
+ if (.pos > 0 and .pos <> 1) ; bad position
1284
+ .s$ = replace_regex$ (.label$, """", "", 0)
1285
+ Set interval text: .phon_tier, .j, .s$
1286
+ @msg "Error: interval in phon tier at '.t1s:4' contains stress in wrong position. (label= '.label$')"
1287
+ endif
1288
+ endfor
1289
+ endif
1290
+ endif
1291
+
1292
+ if (.corpus$ = "RUHRCAT" and .step = 2)
1293
+ @tier_number_by_name: .grid, "phon"
1294
+ if (result)
1295
+ Replace interval text... result 0 0 "^j([aOoueE@])" "i\1" Regular Expressions
1296
+ Replace interval text... result 0 0 "^w([o0iaeE])" "u\1" Regular Expressions
1297
+ Replace interval text... result 0 0 "^w\\ct" "uO" Regular Expressions
1298
+ Replace interval text... result 0 0 "^j\\ct" "iO" Regular Expressions
1299
+ Replace interval text... result 0 0 "^j\\ef" "iE" Regular Expressions
1300
+ Replace interval text... result 0 0 "(\\:f)*" "" Regular Expressions
1301
+ Replace interval text... result 0 0 "^mmm$" "m=" Regular Expressions
1302
+ endif
1303
+ @tier_number_by_name: .grid, "syll"
1304
+ .syll_tier = result
1305
+ if (result)
1306
+ Replace interval text... result 0 0 "^mmm$" "m=" Regular Expressions
1307
+ endif
1308
+ ; Create prominence and hesitation tiers from "DM+Hes" tier
1309
+ @tier_number_by_name: .grid, "DM.Hes"
1310
+ if (result)
1311
+ .deliv_tier = result
1312
+ .nt = Get number of tiers
1313
+ .hes_tier = .nt + 1
1314
+ Duplicate tier: .syll_tier, .hes_tier, "hes"
1315
+ .ni = Get number of intervals: .syll_tier
1316
+ for .j to .ni
1317
+ .t1s = Get start time of interval: .syll_tier, .j
1318
+ .t2s = Get end time of interval: .syll_tier, .j
1319
+ @interval_from_time: .grid, .deliv_tier, .t1s+(.t2s-.t1s)/2, "interv"
1320
+ .label$ = Get label of interval: .deliv_tier, interv
1321
+ if (index (.label$, "Hes"))
1322
+ .s$ = "H"
1323
+ else
1324
+ .s$ = ""
1325
+ endif
1326
+ Set interval text: .hes_tier, .j, .s$
1327
+ endfor
1328
+ endif
1329
+ endif ; (.corpus$ = "RUHRCAT" and .step = 2)
1330
+ @debug_msg: "corpus_conversion: exit"
1331
+ endproc
1332
+
1333
+
1334
+ procedure prepare_plotted_textgrid: ltiers$
1335
+ # Prepare (create and fill) TextGrid with user-selected tiers plotted in prosograms
1336
+ # <ltiers> list of (comma-separated) tier numbers or names, to be copied from segmentation TextGrid
1337
+ # When tier name or number is preceded by "*", the tier content is converted from SAMPA to IPA symbols
1338
+ # Returns <nrofplottedtiers>, the number of tiers in resulting TextGrid
1339
+ @debug_msg: "prepare_plotted_textgrid: entry"
1340
+ if (index(ltiers$,""""))
1341
+ @fatal_error: "Invalid character (double quote) in field <Tiers to show>. Please check content of this field."
1342
+ endif
1343
+ nrofplottedtiers = 0
1344
+ if (segfile_available == 0 and (polytonia_annotation or boundary_annotation))
1345
+ newgridID = Create TextGrid: signal_start, signal_finish, "dummy", ""
1346
+ newgrid_available = 1
1347
+ endif
1348
+ if (segfile_available)
1349
+ selectObject: segmentationID
1350
+ tiers = 0
1351
+ nrofTiers = Get number of tiers
1352
+ ; Get number of speaker tier in segmentation TextGrid
1353
+ @tier_number_by_name: segmentationID, "^[Ss]peaker$"
1354
+ speaker_tier_in = result
1355
+ ; Select tiers (and convert) from segmentation TextGrid
1356
+ repeat
1357
+ @next_field: ltiers$
1358
+ ltiers$ = result3$ ; rest of fields
1359
+ if (result) ; next field found
1360
+ .field$ = result2$
1361
+ @debug_msg: "prepare_plotted_textgrid: next_field=<'.field$'>"
1362
+ .convert = 0 ; convert SAMPA to IPA
1363
+ if (left$ (.field$, 1) = "*")
1364
+ .field$ = right$ (.field$, length(.field$) - 1)
1365
+ .convert = 1
1366
+ endif
1367
+ @is_number: .field$
1368
+ if (result == 0) ; next field is not specified as a number
1369
+ @tier_number_by_name: segmentationID, .field$
1370
+ .tier_in = result
1371
+ if (result == 0) ; tier name not found
1372
+ @error_msg: "No tier named <'.field$'> found in input textgrid. Change content of field <Tiers to show>."
1373
+ @error_msg: "This tier will be skipped in prosogram."
1374
+ endif
1375
+ else ; next field is a number
1376
+ .tier_in = '.field$'
1377
+ if (.tier_in > nrofTiers)
1378
+ @error_msg: "Tier '.tier_in' not found. Input textgrid has only 'nrofTiers' tiers. Change content of field <Tiers to show>."
1379
+ @error_msg: "This tier will be skipped in prosogram."
1380
+ .tier_in = 0
1381
+ endif
1382
+ endif
1383
+ if (.tier_in > 0) ; valid tier to add
1384
+ selectObject: segmentationID
1385
+ if (tiers == 0) ; this is first tier to add to plotted grid
1386
+ .tmpID = Extract tier: .tier_in
1387
+ newgridID = Into TextGrid
1388
+ Rename: "plotted_tiers"
1389
+ newgrid_available = 1
1390
+ removeObject: .tmpID
1391
+ else
1392
+ @grid_append_tier: segmentationID, .tier_in, "newgridID"
1393
+ selectObject: newgridID
1394
+ endif
1395
+ tiers += 1
1396
+ if (.convert)
1397
+ @convert_sampa_ipa: newgridID, tiers
1398
+ endif
1399
+ endif
1400
+ endif
1401
+ until (result == 0) ; no more fields in list
1402
+ nrofplottedtiers = tiers
1403
+ if (nrofplottedtiers > 0)
1404
+ @textgrid_disable_textstyle: newgridID
1405
+ endif
1406
+ endif ; segfile_available
1407
+ if (show_pseudosyllables)
1408
+ nrofplottedtiers += 1 ; results will appear in extra tier
1409
+ endif
1410
+ if (polytonia_annotation)
1411
+ nrofplottedtiers += 1 ; results will appear in extra tier
1412
+ endif
1413
+ if (boundary_annotation)
1414
+ nrofplottedtiers += 1 ; results will appear in extra tier
1415
+ endif
1416
+ if (stress_annotation)
1417
+ nrofplottedtiers += 1 ; results will appear in extra tier
1418
+ endif
1419
+ @debug_msg: "prepare_plotted_textgrid: exit"
1420
+ endproc
1421
+
1422
+
1423
+ procedure peek_signal
1424
+ @debug_msg: "peek_signal: entry"
1425
+ @fname_parts: signalfile$
1426
+ .fext$ = replace_regex$ (result3$, "(.)", "\L\1", 0) ; to lowercase
1427
+ if (.fext$ = "sound")
1428
+ .tmpID = Read from file: signalfile$
1429
+ else
1430
+ .tmpID = Open long sound file: signalfile$
1431
+ endif
1432
+ signal_start = Get starting time
1433
+ signal_finish = Get finishing time
1434
+ removeObject: .tmpID
1435
+ @debug_msg: "peek_signal: exit"
1436
+ endproc
1437
+
1438
+
1439
+ procedure read_signal
1440
+ # Read signal. Apply clipping if clip > 0. Apply scaling of (scale_signal_amplitude > 0).
1441
+ @debug_msg: "read_signal: entry"
1442
+ soundID = Read from file: signalfile$
1443
+ fullsoundID = soundID
1444
+ signal_start = Get starting time
1445
+ signal_finish = Get finishing time
1446
+ if (clip_level > 0) ; Avoid error related to speech files clipped at max amplitude.
1447
+ @msg: "Clipping speech signal at 'clip_level' percent..."
1448
+ clip = clip_level/100
1449
+ Formula... if (abs(self)>clip) then if self>0 then clip else -clip fi else self fi
1450
+ endif
1451
+ if (scale_signal_amplitude)
1452
+ @msg: "Scaling speech signal at 70 dB..."
1453
+ Scale intensity: 70.0
1454
+ endif
1455
+ signal_available = 1
1456
+ @debug_msg: "read_signal: exit"
1457
+ endproc
1458
+
1459
+
1460
+ procedure read_parameter_files
1461
+ @debug_msg: "read_parameter_files: entry"
1462
+ if ((not volatile) and fileReadable (pitchfile$))
1463
+ pitchID = Read from file: pitchfile$
1464
+ .t = Get time step
1465
+ if (.t > time_step)
1466
+ @msg: "Pitch file on disk has larger time step."
1467
+ removeObject: pitchID
1468
+ else
1469
+ .t2 = Get end time
1470
+ if (.t2 + 5*time_step < signal_finish)
1471
+ @msg: "Pitch file end time ('.t2' s) < speech signal end time ('signal_finish' s)"
1472
+ endif
1473
+ pitch_available = 1
1474
+ @gr_printline: "Consulted pitch from file <'pitchfile$'> (time step='.t:3')"
1475
+ endif
1476
+ endif
1477
+ if (show_harmonicity)
1478
+ if (fileReadable (harmonicityfile$))
1479
+ harmonicityID = Read from file: harmonicityfile$
1480
+ @msg: "Consulted harmonicity from file <'harmonicityfile$'>"
1481
+ harmonicity_available = 1
1482
+ endif
1483
+ endif
1484
+ if (needs_loudness)
1485
+ if (fileReadable (loudnessfile$))
1486
+ loudnessID = Read from file: loudnessfile$
1487
+ @msg: "Consulted loudness from file <'loudnessfile$'>"
1488
+ loudness_available = 1
1489
+ endif
1490
+ endif
1491
+ if (needs_intbp and not volatile)
1492
+ if (fileReadable (intbpfile$))
1493
+ intbpID = Read from file: intbpfile$
1494
+ .t = Get time step
1495
+ intbp_available = 1
1496
+ @gr_printline: "Consulted BP intensity from file <'intbpfile$'> (time step='.t:3')"
1497
+ endif
1498
+ endif
1499
+ @debug_msg: "read_parameter_files: exit"
1500
+ endproc
1501
+
1502
+
1503
+ procedure calculate_pitch_int
1504
+ @debug_msg: "calculate_pitch_int: entry"
1505
+ if (not signal_available)
1506
+ @read_signal
1507
+ endif
1508
+ ; Standard settings, see FAQ Pitch Analysis
1509
+ tstep = 0 ; automatic, i.e. 0.75/pitch_floor, e.g. for 60Hz, time step = 0.0125 s
1510
+ voicing_threshold = 0.45
1511
+ silence_threshold = 0.03
1512
+ octave_cost = 0.01
1513
+ octave_jump_cost = 0.35
1514
+ vuv_cost = 0.14
1515
+ ; Modified settings
1516
+ tstep = time_step ; typically 0.01 or 0.005 s
1517
+
1518
+ ; Actual pitch determination, either explicit F0 range or autorange (2 pass, when minimum_pitch = 0)
1519
+ .f0min = minimum_pitch ; local variable
1520
+ .f0max = maximum_pitch ; local variable
1521
+ if (.f0min = 0) ; autorange, pass 1
1522
+ @debug_msg: "calculate_pitch_int: pass 1"
1523
+ .tstep1 = 0.01 ; speed up pass 1
1524
+ .f0min = 65
1525
+ .f0max = 800
1526
+ @from_Hz_to_ST_rel_1: .f0min
1527
+ .minST = result
1528
+ @from_Hz_to_ST_rel_1: .f0max
1529
+ .maxST = result
1530
+ @msg: "Calculating pitch in autorange mode, Pass 1..."
1531
+ @msg: ".'tab$'Time step='.tstep1:2', F0 range='.f0min:0'-'.f0max:0' Hz ('.minST:1'-'.maxST:1' ST), Voicing threshold='voicing_threshold:2'"
1532
+ selectObject: soundID
1533
+ pitchID = To Pitch (ac): .tstep1, .f0min, 15, "no", silence_threshold, voicing_threshold, octave_cost, octave_jump_cost, vuv_cost, .f0max
1534
+ .p05 = Get quantile: 0, 0, 0.05, "Hertz"
1535
+ .p25 = Get quantile: 0, 0, 0.25, "Hertz"
1536
+ .p50 = Get quantile: 0, 0, 0.50, "Hertz"
1537
+ .p75 = Get quantile: 0, 0, 0.75, "Hertz"
1538
+ .p95 = Get quantile: 0, 0, 0.95, "Hertz"
1539
+ removeObject: pitchID
1540
+ @msg: ".'tab$'Pitch percentiles 5%='.p05:0' 25%='.p25:0' 50%='.p50:0' 75%='.p75:0' 95%='.p95:0' (Hz)"
1541
+ .f0min = floor ( max (semitonesToHertz(hertzToSemitones(.p50) - 12), 50) ) ; 12 ST below median pitch, with minimum of 50 Hz
1542
+ .f0max = floor ( min (semitonesToHertz(hertzToSemitones(.p50) + 18), 1000) ) ; 18 ST above median pitch, with maximum of 1000 Hz
1543
+ .minST = hertzToSemitones (.f0min) - hertzToSemitones(1)
1544
+ .maxST = hertzToSemitones (.f0max) - hertzToSemitones(1)
1545
+ @msg: ".'tab$'Selected F0 detection range for pass 2: '.f0min'-'.f0max' Hz, '.minST:1'-'.maxST:1' ST"
1546
+ endif
1547
+ @debug_msg: "calculate_pitch_int: pass 2"
1548
+ @from_Hz_to_ST_rel_1: .f0min
1549
+ .minST = result
1550
+ @from_Hz_to_ST_rel_1: .f0max
1551
+ .maxST = result
1552
+ @msg: "Calculating pitch for fixed frequency range..."
1553
+ @msg: ".'tab$'Time step='tstep:2', F0 range='.f0min'-'.f0max' Hz ('.minST:1'-'.maxST:1' ST), Voicing threshold='voicing_threshold:2'"
1554
+ selectObject: soundID
1555
+ pitchID = To Pitch (ac): tstep, .f0min, 15, "no", silence_threshold, voicing_threshold, octave_cost, octave_jump_cost, vuv_cost, .f0max
1556
+ .p50 = Get quantile: 0, 0, 0.50, "Hertz"
1557
+ .p95 = Get quantile: 0, 0, 0.95, "Hertz"
1558
+ .p05 = Get quantile: 0, 0, 0.05, "Hertz"
1559
+ @msg: ".'tab$'Pitch percentiles 50%='.p50:0' 5%='.p05:0' 95%='.p95:0' (Hz)"
1560
+ if (not volatile)
1561
+ @msg: "Writing Pitch object to 'pitchfile$'"
1562
+ Write to binary file: pitchfile$
1563
+ endif
1564
+ pitch_available = 1
1565
+ @debug_msg: "calculate_pitch_int: exit"
1566
+ endproc
1567
+
1568
+
1569
+ procedure calculate_intensity_bp_filtered
1570
+ @msg: "Calculating intensity of BP filtered signal (fc_low='fc_low:0', fc_high='fc_high:0') ..."
1571
+ if (avoid_insuff_memory)
1572
+ call intensity_bp_filter_longsound_compact "'signalfile$'" fc_low fc_high intbpID
1573
+ else
1574
+ if (not signal_available)
1575
+ @read_signal
1576
+ endif
1577
+ selectObject: soundID
1578
+ tmpfsID = Filter (pass Hann band): fc_low, fc_high, 100
1579
+ selectObject: tmpfsID
1580
+ intbpID = To Intensity: 100, time_step
1581
+ intbpID = selected ("Intensity", -1) ; keep this line !!
1582
+ removeObject: tmpfsID
1583
+ endif
1584
+ if (not volatile)
1585
+ selectObject: intbpID
1586
+ Write to text file: intbpfile$
1587
+ endif
1588
+ intbp_available = 1
1589
+ endproc
1590
+
1591
+
1592
+ procedure bp_filter: .soundin, .fc_low, .fc_high, .varname$
1593
+ # BP-filter sound object and put reference to resulting object in variable with name <.varname>
1594
+ selectObject: .soundin
1595
+ .out = Filter (pass Hann band): .fc_low, .fc_high, 100
1596
+ '.varname$' = .out
1597
+ endproc
1598
+
1599
+
1600
+ procedure intensity_bp_filter_longsound_compact .fname$ .fc_low .fc_high .varname$
1601
+ # Filter signal and compute intensity. Filtered signal is temporarily stored as file.
1602
+ # Use this procedure to avoid insufficient memory.
1603
+ # Use double quotes if filename contains blanks:
1604
+ # call intensity_bp_filter_longsound_compact "'filename$'" 300 2500 myvar
1605
+ # .fname$ filename of sound file to be filtered
1606
+ .tmpfile$ = prefix$ + "_filtered.wav"
1607
+ .longsound = Open long sound file: .fname$
1608
+ .t1 = Get start time
1609
+ .endtime = Get end time
1610
+ .maxdurpart = 300 ; duration of part of signal to be filtered separatedly due to available memory
1611
+ .partnr = 1
1612
+ repeat
1613
+ .t2 = min (.endtime, .t1 + .maxdurpart)
1614
+ selectObject: .longsound
1615
+ .part = Extract part... .t1 .t2 yes
1616
+ if (clip_level > 0)
1617
+ clip = clip_level/100
1618
+ Formula... if (abs(self)>clip) then if self>0 then clip else -clip fi else self fi
1619
+ endif
1620
+ .filt = Filter (pass Hann band)... .fc_low .fc_high 100
1621
+ Formula... if (abs(self)>0.98) then if self>0 then 0.98 else -0.98 fi else self fi
1622
+ if (.partnr == 1)
1623
+ Save as WAV file: .tmpfile$
1624
+ else
1625
+ Append to existing sound file... '.tmpfile$'
1626
+ endif
1627
+ removeObject: .filt, .part
1628
+ .t1 = .t2
1629
+ .partnr += 1
1630
+ until (.t2 >= .endtime)
1631
+ removeObject: .longsound ; free memory
1632
+ .tmp = Read from file... '.tmpfile$'
1633
+ intbpID = To Intensity: 100, time_step
1634
+ intbpID = selected ("Intensity", -1) ; keep this line !!
1635
+ '.varname$' = intbpID
1636
+ removeObject: .tmp
1637
+ filedelete '.tmpfile$'
1638
+ endproc
1639
+
1640
+
1641
+ procedure calculate_loudness
1642
+ if (not signal_available)
1643
+ @read_signal
1644
+ endif
1645
+ execute loudness.praat 'soundID' 'signal_start' 'signal_finish' 'time_step'
1646
+ loudnessID = selected ("Intensity", -1)
1647
+ if (not volatile)
1648
+ Save as short text file: loudnessfile$
1649
+ endif
1650
+ loudness_available = 1
1651
+ endproc
1652
+
1653
+
1654
+ procedure calculate_parameters
1655
+ @debug_msg: "calculate_parameters: entry"
1656
+ if (not signal_available)
1657
+ @read_signal
1658
+ endif
1659
+ if (volatile)
1660
+ # Calculate parameters on the fly only for interval to stylize
1661
+ selectObject: soundID
1662
+ tmpsoundID = Extract part: anal_t1, anal_t2, "rectangular", 1.0, "yes"
1663
+ removeObject: soundID ; remove full signal
1664
+ soundID = tmpsoundID ; redefine soundID
1665
+ selectObject: soundID
1666
+ if (scale_signal_amplitude)
1667
+ Scale intensity: scale_signal_amplitude
1668
+ endif
1669
+ intensityID = To Intensity: 100, time_step
1670
+ intensityID = selected ("Intensity", -1) ; keep this line !!
1671
+ intensity_available = 1
1672
+ @calculate_pitch_int
1673
+ if (needs_loudness and not loudness_available)
1674
+ execute loudness.praat 'soundID' 'anal_t1' 'anal_t2' 'time_step'
1675
+ loudnessID = selected ("Intensity", -1)
1676
+ loudness_available = 1
1677
+ endif
1678
+ if (needs_intbp and not intbp_available)
1679
+ @calculate_intensity_bp_filtered
1680
+ intbp_available = 1
1681
+ endif
1682
+ else ; (volatile == 0)
1683
+ if (not intensity_available)
1684
+
1685
+ if (corpus$ = "rhapsodie")
1686
+ prefilterHP100 = 0
1687
+ if (basename$ = "Rhap-D0008")
1688
+ prefilterHP100 = 1
1689
+ endif
1690
+ endif
1691
+
1692
+ if (prefilterHP100) ; Apply HP filtering to avoid problems with low quality recordings by filtering low frequency band
1693
+ @msg: "Preprocessing (high-pass filtering) speech signal ('basename$')..."
1694
+ selectObject: soundID
1695
+ fc_low = 100
1696
+ tmpID = Filter (pass Hann band): fc_low, 0, 100
1697
+ removeObject: soundID
1698
+ soundID = tmpID
1699
+ endif
1700
+ if (prefilterLP) ; Apply LP filtering when fricatives are very intense
1701
+ @msg: "Preprocessing (low-pass filtering) speech signal... 'basename$'"
1702
+ selectObject: soundID
1703
+ fc = 2500
1704
+ tmpID = Filter (pass Hann band): 0, fc, 100
1705
+ removeObject: soundID
1706
+ soundID = tmpID
1707
+ endif
1708
+
1709
+ selectObject: soundID
1710
+ tmpID = To Intensity: 100, time_step
1711
+ intensityID = selected ("Intensity", -1) ; keep this line !!
1712
+ intensity_available = 1
1713
+ endif
1714
+ if (not pitch_available)
1715
+ @calculate_pitch_int
1716
+ endif
1717
+ if (show_harmonicity and not harmonicity_available)
1718
+ selectObject: soundID
1719
+ silence_threshold = 0.1
1720
+ nrofperiods = 1.0
1721
+ harmonicityID = To Harmonicity (cc)... time_step minimum_pitch silence_threshold nrofperiods
1722
+ harmonicity_available = 1
1723
+ endif
1724
+ if (needs_loudness and not loudness_available)
1725
+ execute loudness.praat 'soundID' 'signal_start' 'signal_finish' 'time_step'
1726
+ loudnessID = selected ("Intensity", -1)
1727
+ Write to short text file: loudnessfile$
1728
+ loudness_available = 1
1729
+ endif
1730
+ if (needs_intbp and not intbp_available)
1731
+ @calculate_intensity_bp_filtered
1732
+ endif
1733
+ endif ; # volatile
1734
+ # Calculate V/UV decision and store in TextGrid
1735
+ selectObject: pitchID
1736
+ pointprocID = To PointProcess
1737
+ vuvgridID = To TextGrid (vuv): 0.02, 0.01
1738
+ Rename: "vuv"
1739
+ removeObject: pointprocID
1740
+ @debug_msg: "calculate_parameters: exit"
1741
+ endproc
1742
+
1743
+
1744
+ procedure peek_settings
1745
+ # Read settings from intermediate data file (_nucl.TextGrid).
1746
+ @debug_msg: "peek_settings: entry"
1747
+ if (not fileReadable (nuclfile$))
1748
+ @fatal_error: "Cannot find nucleus file: <'nuclfile$'>'newline$'Obtain it first using Prosogram task: Calculate intermediate data files"
1749
+ else
1750
+ .tmpID = Read from file: nuclfile$
1751
+ @tier_number_by_name: .tmpID, "^settings$"
1752
+ @debug_msg: "peek_settings: settings tier nr='result'"
1753
+ if (result)
1754
+ .s$ = Get label of interval: result, 1
1755
+ @debug_msg: "peek_settings: settings tier label=<'.s$'>"
1756
+ segmentation_name$ = extractWord$ (.s$, "SEG=")
1757
+ adaptive_glissando = extractNumber (.s$, "ADAPT=")
1758
+ glissando = extractNumber (.s$, "GT=")
1759
+ glissando_low = glissando
1760
+ if (adaptive_glissando = undefined)
1761
+ adaptive_glissando = 0
1762
+ endif
1763
+ if (adaptive_glissando)
1764
+ glissando_low = extractNumber (.s$, "GT_low=")
1765
+ endif
1766
+ diffgt = extractNumber (.s$, "DG=")
1767
+ mindur_ts = extractNumber (.s$, "MINTS=")
1768
+ else
1769
+ @fatal_error: "Cannot find settings tier in nucleus file"
1770
+ endif
1771
+ removeObject: .tmpID
1772
+ endif
1773
+ @debug_msg: "peek_settings: exit"
1774
+ endproc
1775
+
1776
+
1777
+ procedure load_intermediate_data_files
1778
+ # Reads intermediate data files into data objects. Used in interactive mode.
1779
+ .nerrs = 0
1780
+ if (not fileReadable (stylfile$))
1781
+ @gr_printline: "Cannot open stylization file <'stylfile$'>"
1782
+ .nerrs += 1
1783
+ endif
1784
+ if (not fileReadable (sheetfile$))
1785
+ @gr_printline: "Cannot open prosodic features from file <'sheetfile$'>"
1786
+ .nerrs += 1
1787
+ endif
1788
+ if (not fileReadable (profile_file$))
1789
+ @gr_printline: "Cannot open prosodic file <'profile_file$'>"
1790
+ .nerrs += 1
1791
+ endif
1792
+ if (.nerrs == 0)
1793
+ @gr_printline: "Loading stylization from file <'stylfile$'>..."
1794
+ stylID = Read from file: stylfile$
1795
+ stylization_available = 1
1796
+ reuse_styl = 1
1797
+ removeObject: nucldatID
1798
+ @gr_printline: "Loading prosodic features from file <'sheetfile$'>..."
1799
+ nucldatID = Read TableOfReal from headerless spreadsheet file: sheetfile$
1800
+ @gr_printline: "Loading prosodic profile from file <'profile_file$'>..."
1801
+ @profile_table_load
1802
+ else
1803
+ @cleanup_current_file
1804
+ @cleanup_global
1805
+ @fatal_error: "Some data files could not be found. Please run task <Calculate intermediate data files> again before using Interactive mode."
1806
+ endif
1807
+ selectObject: nucldatID
1808
+ .nrows = Get number of rows
1809
+ if (nrof_nuclei_analysed <> .nrows)
1810
+ @fatal_error: "Mismatch between data in intermediate data files. Please run task <Calculate intermediate data files> again before using Interactive mode."
1811
+ endif
1812
+ endproc
1813
+
1814
+
1815
+ procedure get_segmentation: method
1816
+ # Calculate segmentation or read it from file
1817
+ @debug_msg: "get_segmentation: entry"
1818
+ if (method == segm_vnucl) ; vowel nuclei
1819
+ segmentation_name$ = "vow-nucl"
1820
+ elsif (method == segm_extern)
1821
+ segmentation_name$ = "extern"
1822
+ elsif (method == segm_aloudness) ; automatic, loudness peaks
1823
+ segmentation_name$ = "loudness"
1824
+ elsif (method == segm_anucl) ; automatic, peaks in bandpass filters speech
1825
+ segmentation_name$ = "int-BP"
1826
+ elsif (method == segm_msyllvow) ; syllabic nuclei
1827
+ segmentation_name$ = "syll+vow"
1828
+ elsif (method == segm_msyllpeak) ; syllabic nuclei
1829
+ segmentation_name$ = "syll"
1830
+ elsif (method == segm_mrhyme) ; syllable rhyme
1831
+ segmentation_name$ = "rhyme"
1832
+ elsif (method == segm_asyll) ; pseudo-syllables
1833
+ segmentation_name$ = "asyll"
1834
+ elsif (method == segm_voiced) ; voiced portions
1835
+ segmentation_name$ = "voiced"
1836
+ elsif (method == segm_specsim) ;
1837
+ segmentation_name$ = "specsim"
1838
+ elsif (method == segm_pitchchange) ; coding not completed
1839
+ segmentation_name$ = "pitchchange"
1840
+ elsif (method == segm_pitchterrace) ; coding not completed
1841
+ segmentation_name$ = "pitchterrace"
1842
+ else
1843
+ segmentation_name$ = "unknown"
1844
+ endif
1845
+ @debug_msg: "get_segmentation: method='method' name='segmentation_name$'"
1846
+
1847
+ if (task == task_interactive)
1848
+ @read_nuclei_file_interactive_mode
1849
+ ;@gr_printline: "segmentation_available='segmentation_available' reuse_nucl='reuse_nucl' nr='nrof_nuclei_analysed'"
1850
+ endif ; task_interactive
1851
+
1852
+ if (not segmentation_available)
1853
+ @debug_msg: "get_segmentation: segmentation not available"
1854
+ # Make segmentation into nuclei depending upon segmentation method
1855
+ # tiers in annotation TextGrid file
1856
+ phone_tier_in = 1 ; default location
1857
+ # create TextGrid nucleiID
1858
+ phone_tier = 1
1859
+ dip_tier = 2
1860
+ nucleus_tier = 3
1861
+ syllable_tier = 4
1862
+ vuv_tier = 5
1863
+ discontinuity_tier = 6
1864
+ safe_tier = 7
1865
+ pointer_tier = 8
1866
+ speaker_tier = 9
1867
+ creak_tier = 10
1868
+ settings_tier = 11
1869
+ hesitation_tier = 12
1870
+ polytonia_tier = 0 ; may be added later for polytonia_annotation
1871
+ polytonia2_tier = 0 ; may be added later for polytonia_annotation
1872
+ boundary_tier = 0 ; may be added later for automatic boundary detection
1873
+ stress_tier = 0 ; may be added later for stress_annotation
1874
+ .n = 12
1875
+ .tiers$ = "phone dip nucleus syll vuv discont safe pointer speaker creak settings hesitation"
1876
+ .point_tiers$ = "dip discont"
1877
+ if (boundary_annotation)
1878
+ .tiers$ += " boundary-auto"
1879
+ .point_tiers$ += " boundary-auto"
1880
+ .n += 1
1881
+ boundary_tier = .n
1882
+ endif
1883
+ if (polytonia_annotation)
1884
+ .tiers$ += " polytonia polytonia-iw"
1885
+ polytonia_tier = .n+1
1886
+ polytonia2_tier = .n+2
1887
+ .n += 2
1888
+ endif
1889
+ if (stress_annotation)
1890
+ .tiers$ += " stress"
1891
+ .n += 1
1892
+ stress_tier = .n
1893
+ endif
1894
+ nucleiID = Create TextGrid: signal_start, signal_finish, .tiers$, .point_tiers$
1895
+ Rename: "nucl"
1896
+ nuclei_available = 1
1897
+
1898
+ # copy VUV decision from vuvgrid to nuclei grid
1899
+ @copy_tier: vuvgridID, 1, nucleiID, vuv_tier
1900
+ removeObject: vuvgridID
1901
+
1902
+
1903
+ if (segfile_available) ; and not internal data format available
1904
+ @debug_msg: "get_segmentation: segfile available"
1905
+ @gr_printline: "Copying annotation tiers..."
1906
+ @copy_tiers_from_annotation_tg
1907
+ endif
1908
+
1909
+ if (not speaker_available)
1910
+ if (fileReadable (speakerfile$))
1911
+ @msg: "Reading speaker information from 'speakerfile$'"
1912
+ tmpID = Read from file: speakerfile$
1913
+ @tier_number_by_name: tmpID, "^[Ss]peaker$"
1914
+ if (result > 0)
1915
+ @copy_tier: tmpID, result, nucleiID, speaker_tier
1916
+ speaker_available = 1
1917
+ else
1918
+ @msg: "No speaker tier found in 'speakerfile$'"
1919
+ endif
1920
+ removeObject: tmpID
1921
+ endif
1922
+ endif
1923
+
1924
+ if (not creak_available)
1925
+ if (fileReadable (creakfile$))
1926
+ @msg: "Reading 'creakfile$'"
1927
+ tmpID = Read from file: creakfile$
1928
+ @tier_number_by_name: tmpID, "^[Cc]reaky?$"
1929
+ if (result > 0)
1930
+ @msg: "Reading creak information from tier 'result' of 'creakfile$'"
1931
+ @copy_tier: tmpID, result, nucleiID, creak_tier
1932
+ creak_available = 1
1933
+ endif
1934
+ removeObject: tmpID
1935
+ endif
1936
+ endif
1937
+
1938
+ if (corpus$ = "cprom" or corpus$ = "rhapsodie")
1939
+ @tier_number_by_name: segmentationID, "^contour$"
1940
+ if (result)
1941
+ @grid_append_tier: segmentationID, result, "nucleiID"
1942
+ endif
1943
+ @tier_number_by_name: segmentationID, "^prom$"
1944
+ if (result)
1945
+ @grid_append_tier: segmentationID, result, "nucleiID"
1946
+ endif
1947
+ endif
1948
+
1949
+ if (needs_phon_tier and not phones_available)
1950
+ @fatal_error: "Cannot find phoneme tier (named phon) in annotation TextGrid"
1951
+ endif
1952
+
1953
+ if (needs_syll_tier and not syllables_available)
1954
+ @fatal_error: "Cannot find syllable tier (named syll) in annotation TextGrid"
1955
+ endif
1956
+
1957
+ @msg: "Calculating actual segmentation. Method='segmentation_name$'..."
1958
+ if (segm_type != segm_extern)
1959
+ @make_segmentation: segm_type, t1s, t2s, nucleiID, mindiff
1960
+ else ; copy consulted external segmentation in tier "segm" of file to nucleus tier of object nucleiID
1961
+ @tier_get: segmentationID, "^segm$", "tier_in", "Cannot find segmentation tier (named segm) in segmentation TextGrid", 1
1962
+ @copy_tier: segmentationID, tier_in, nucleiID, nucleus_tier
1963
+ selectObject: nucleiID
1964
+ .n = Get number of intervals: nucleus_tier
1965
+ for .j from 1 to .n
1966
+ label$ = Get label of interval: nucleus_tier, .j
1967
+ @is_vowel: label$
1968
+ if (is_vowel)
1969
+ Set interval text: nucleus_tier, .j, "a"
1970
+ endif
1971
+ endfor
1972
+ endif
1973
+
1974
+ @safe_nuclei: t1s, t2s
1975
+ nrof_nuclei_analysed = result
1976
+
1977
+ # Store analysis, segmentation and stylization settings in nucleiID TextGrid
1978
+ selectObject: nucleiID
1979
+ Set interval text: settings_tier, 1, "File='basename$' SEG='segmentation_name$' t1='t1s' t2='t2s' " +
1980
+ ... "GT='glissando' GT_low='glissando_low' ADAPT='adaptive_glissando' DG='diffgt' MINTS='mindur_ts' MINPAUSE='mindur_pause_gap'"
1981
+
1982
+
1983
+ if (hesitation_available == 0)
1984
+ .tier = nucleus_tier
1985
+ if (syllables_available)
1986
+ .tier = syllable_tier
1987
+ endif
1988
+ @copy_tier: nucleiID, .tier, nucleiID, hesitation_tier
1989
+ @tier_clear_text: nucleiID, hesitation_tier
1990
+ hesitation_available = 1
1991
+ endif
1992
+ if (stress_annotation)
1993
+ if (segm_type == segm_msyllvow or segm_type == segm_msyllpeak or segm_type == segm_mrhyme)
1994
+ @tier_replace2: nucleiID, syllable_tier, nucleiID, stress_tier
1995
+ elsif (segm_type == segm_vnucl and segfile_available and syllables_available)
1996
+ @tier_replace2: nucleiID, syllable_tier, nucleiID, stress_tier
1997
+ else
1998
+ @tier_replace2: nucleiID, nucleus_tier, nucleiID, stress_tier
1999
+ endif
2000
+ @tier_clear_text: nucleiID, stress_tier
2001
+ endif
2002
+ endif ; not segmentation available
2003
+ @debug_msg: "get_segmentation: exit"
2004
+ endproc
2005
+
2006
+
2007
+ procedure read_nuclei_file_interactive_mode
2008
+ ; Read settings and tiers from saved nuclei file into nucleiID TextGrid:
2009
+ if (task == task_interactive)
2010
+ if (not fileReadable (nuclfile$))
2011
+ @gr_printline: "Cannot find internal data TextGrid file <'nuclfile$'>"
2012
+ else
2013
+ @gr_printline: "Loading internal data TextGrid from file <'nuclfile$'>"
2014
+ tmpID = Read from file: nuclfile$
2015
+ @tier_number_by_name: tmpID, "^settings$"
2016
+ if (result)
2017
+ @debug_msg: "get_segmentation: interactive mode, found settings"
2018
+ settings_tier = result
2019
+ s$ = Get label of interval: settings_tier, 1
2020
+ method_$ = extractWord$ (s$, "SEG=")
2021
+ t1_ = extractNumber (s$, "t1=")
2022
+ t2_ = extractNumber (s$, "t2=")
2023
+ gt_ = extractNumber (s$, "GT=")
2024
+ gt_low_ = extractNumber (s$, "GT_low=")
2025
+ if (gt_low_ == undefined)
2026
+ gt_low_ = gt_
2027
+ endif
2028
+ .adapt = extractNumber (s$, "ADAPT=")
2029
+ if (.adapt == undefined)
2030
+ .adapt = 0
2031
+ endif
2032
+ dg_ = extractNumber (s$, "DG=")
2033
+ mints_ = extractNumber (s$, "MINTS=")
2034
+ if (anal_t1 >= t1_ and abs(anal_t2 - t2_) < time_step) ; Correct for rounding errors in 32 vs. 64 bit
2035
+ segmentation_name$ = method_$
2036
+ @tier_number_by_name: tmpID, "^phone$"
2037
+ phone_tier = result
2038
+ if (result)
2039
+ @tier_number_by_name: tmpID, "^syll$"
2040
+ syllable_tier = result
2041
+ endif
2042
+ if (result)
2043
+ @tier_number_by_name: tmpID, "^nucleus$"
2044
+ nucleus_tier = result
2045
+ endif
2046
+ if (result)
2047
+ @tier_number_by_name: tmpID, "^vuv$"
2048
+ vuv_tier = result
2049
+ removeObject: vuvgridID
2050
+ endif
2051
+ if (result)
2052
+ @tier_number_by_name: tmpID, "^discont$"
2053
+ discontinuity_tier = result
2054
+ endif
2055
+ if (result)
2056
+ @tier_number_by_name: tmpID, "^safe$"
2057
+ safe_tier = result
2058
+ endif
2059
+ if (result)
2060
+ @tier_number_by_name: tmpID, "^pointer$"
2061
+ pointer_tier = result
2062
+ endif
2063
+ if (result)
2064
+ @tier_number_by_name: tmpID, "^speaker$"
2065
+ speaker_tier = result
2066
+ if (speaker_tier)
2067
+ speaker_available = 1 ; speaker tier available
2068
+ endif
2069
+ endif
2070
+ if (result) ; success
2071
+ reuse_nucl = 1
2072
+ segmentation_available = 1
2073
+ nucleiID = tmpID
2074
+ nuclei_available = 1
2075
+ ; nrof_nuclei_analysed = Count labels: safe_tier, "a"
2076
+ nrof_nuclei_analysed = Count labels: nucleus_tier, "a"
2077
+ @gr_printline: "Loaded internal data TextGrid from file <'nuclfile$'>"
2078
+ endif
2079
+ endif ; if anal_t1
2080
+ endif ; tier with settings found
2081
+ if (not segmentation_available) ; textgrid read but not used
2082
+ removeObject: tmpID
2083
+ endif
2084
+ endif ; nuclfile found
2085
+ endif
2086
+ endproc
2087
+
2088
+
2089
+ procedure copy_tiers_from_annotation_tg
2090
+ ; Copy tiers (phon, syll, speaker, words, creak, hes) from segmentation TextGrid to nucleiID
2091
+ @tier_number_by_name: segmentationID, "^phon"
2092
+ # The following if-block may override default name of phoneme tier
2093
+ if (variableExists ("corpus$") and variableExists ("corpus_tier_phon$"))
2094
+ if (length (corpus_tier_phon$))
2095
+ @tier_number_by_name: segmentationID, "^'corpus_tier_phon$'$"
2096
+ if (result = 0)
2097
+ @msg: "Cannot find phoneme tier named <'corpus_tier_phon$'>"
2098
+ endif
2099
+ endif
2100
+ endif
2101
+ if (result) ; found phoneme tier
2102
+ phone_tier_in = result
2103
+ @msg: "Using phonetic alignment from tier named <'result2$'>"
2104
+ phones_available = 1
2105
+ else
2106
+ phone_tier_in = 0
2107
+ phones_available = 0
2108
+ @msg: "Cannot find tier with phonetic alignment"
2109
+ endif
2110
+ if (phones_available)
2111
+ @tier_replace: segmentationID, phone_tier_in, nucleiID, phone_tier, "nucleiID"
2112
+ selectObject: nucleiID
2113
+ Set tier name: phone_tier, "phone"
2114
+ endif
2115
+
2116
+ @tier_number_by_name: segmentationID, "^syll"
2117
+ # The following if-block may override default name of syllable tier
2118
+ if (variableExists ("corpus$") and variableExists ("corpus_tier_syll$"))
2119
+ if (length (corpus_tier_syll$))
2120
+ @tier_number_by_name: segmentationID, "^'corpus_tier_syll$'$"
2121
+ endif
2122
+ endif
2123
+ if (result and segm_type <> segm_asyll)
2124
+ ; In segm_asyll, the syllable tier is used for pseudo-syllables and should not be used for true syllables.
2125
+ syll_tier_in = result
2126
+ @msg: "Using syllable alignment from tier named <'result2$'>"
2127
+ @copy_tier: segmentationID, syll_tier_in, nucleiID, syllable_tier
2128
+ ; call tier_replace segmentationID syll_tier_in nucleiID syllable_tier nucleiID
2129
+ ; selectObject: nucleiID
2130
+ ; Set tier name... syllable_tier syll
2131
+ syllables_available = 1
2132
+ endif
2133
+
2134
+ @tier_number_by_name: segmentationID, "^[Ss]peaker$"
2135
+ # The following if-block may override default name of speaker tier
2136
+ if (variableExists ("corpus$") and variableExists ("corpus_tier_speaker$"))
2137
+ if (length (corpus_tier_speaker$))
2138
+ @tier_number_by_name: segmentationID, "^'corpus_tier_speaker$'$"
2139
+ endif
2140
+ endif
2141
+ if (result)
2142
+ speaker_tier_in = result
2143
+ @msg: "Using speaker information from tier named <'result2$'>"
2144
+ if (corpus$ = "rhapsodie")
2145
+ @copy_tier_tweeked: segmentationID, speaker_tier_in, nucleiID, speaker_tier
2146
+ else
2147
+ @copy_tier: segmentationID, speaker_tier_in, nucleiID, speaker_tier
2148
+ endif
2149
+ speaker_available = 1 ; speaker tier available
2150
+ else ; input segmentation textgrid does not contain speaker tier
2151
+ selectObject: nucleiID
2152
+ Set interval text: speaker_tier, 1, "ANON"
2153
+ endif
2154
+
2155
+ @tier_number_by_name: segmentationID, "^[Cc]reaky?$"
2156
+ if (result)
2157
+ @msg: "Using creak information from tier named <'result2$'> of <'segfile$'>"
2158
+ @copy_tier: segmentationID, result, nucleiID, creak_tier
2159
+ creak_available = 1
2160
+ endif
2161
+
2162
+ word_tier_in = 0
2163
+ @tier_number_by_name: segmentationID, "^words?$"
2164
+ # The following if-block may override default name of word tier
2165
+ if (variableExists ("corpus$") and variableExists ("corpus_tier_word$"))
2166
+ if (length (corpus_tier_word$))
2167
+ @tier_number_by_name: segmentationID, "^'corpus_tier_word$'$"
2168
+ endif
2169
+ endif
2170
+ if (result)
2171
+ word_tier_in = result
2172
+ endif
2173
+
2174
+ if (segm_type <> segm_asyll) ; syllable_tier will be used for storing asyll intervals
2175
+ @tier_number_by_name: segmentationID, "^hes$"
2176
+ if (result)
2177
+ hesitation_tier_in = result
2178
+ @msg: "Using hesitation information from tier named <'result2$'> of <'segfile$'>"
2179
+ @copy_tier: segmentationID, hesitation_tier_in, nucleiID, hesitation_tier
2180
+ hesitation_available = 1
2181
+ endif
2182
+ endif
2183
+ endproc
2184
+
2185
+
2186
+
2187
+ procedure clipPitchTier: objectID, .ymin, .ymax, .xmin, .xmax
2188
+ # Clip a PitchTier by replacing data points outside clip range by values 1 ST outside range
2189
+ @debug_msg: "clipPitchTier: entry"
2190
+ selectObject: objectID
2191
+ .firsti = Get low index from time: .xmin
2192
+ if (.firsti = 0)
2193
+ .firsti = 1
2194
+ endif
2195
+ .lasti = Get nearest index from time: .xmax
2196
+ for .i from .firsti to .lasti
2197
+ .x = Get time from index: .i
2198
+ .y = Get value at index: .i
2199
+ .y = min(.y,.ymax+1)
2200
+ .y = max(.y,.ymin-1)
2201
+ endif
2202
+ Remove point: .i
2203
+ Add point: .x, .y
2204
+ endfor
2205
+ @debug_msg: "clipPitchTier: exit"
2206
+ endproc
2207
+
2208
+
2209
+ procedure cleanup_current_file
2210
+ # Delete temporary objects created for current input file
2211
+ @debug_msg: "cleanup_current_file: entry"
2212
+ if (do_cleanup)
2213
+ if (signal_available)
2214
+ removeObject: soundID
2215
+ if (volatile)
2216
+ removeObject: fullsoundID
2217
+ endif
2218
+ signal_available = 0
2219
+ endif
2220
+ if (intensity_available)
2221
+ removeObject: intensityID
2222
+ endif
2223
+ if (pitch_available)
2224
+ removeObject: pitchID
2225
+ endif
2226
+ if (loudness_available)
2227
+ removeObject: loudnessID
2228
+ endif
2229
+ if (intbp_available)
2230
+ removeObject: intbpID
2231
+ endif
2232
+ if (inthp_available)
2233
+ removeObject: inthpID
2234
+ endif
2235
+ if (segfile_available)
2236
+ if not boundary_annotation
2237
+ removeObject: segmentationID
2238
+ endif
2239
+ endif
2240
+ if (draw_prosograms)
2241
+ if (variableExists("newgridID"))
2242
+ ; if (segfile_available and nrofplottedtiers > 0) or (polytonia_annotation)
2243
+ removeObject: newgridID
2244
+ endif
2245
+ endif
2246
+ if (nuclei_available)
2247
+ removeObject: nucleiID
2248
+ endif
2249
+ if (needs_stylization)
2250
+ if (nucldat_available)
2251
+ removeObject: nucldatID
2252
+ endif
2253
+ if (stylization_available)
2254
+ removeObject: stylID, stylSTID
2255
+ endif
2256
+ endif
2257
+ if (profile_available)
2258
+ removeObject: profileID
2259
+ endif
2260
+ endif ; if (do_cleanup)
2261
+ @debug_msg: "cleanup_current_file: exit"
2262
+ endproc
2263
+
2264
+
2265
+ procedure cleanup_global
2266
+ @debug_msg: "cleanup_global: entry"
2267
+ if (do_cleanup)
2268
+ if (globalsheet_available)
2269
+ removeObject: globalprofileID
2270
+ endif
2271
+ if ((not batch_mode) and variableExists ("filelistID"))
2272
+ removeObject: filelistID
2273
+ endif
2274
+ endif ; if (do_cleanup)
2275
+ @debug_msg: "cleanup_global: exit"
2276
+ endproc
2277
+
2278
+
2279
+ procedure convert_sampa_ipa: .objectID, .tier
2280
+ selectObject: .objectID
2281
+ .n = Get number of intervals: .tier
2282
+ for .j from 1 to .n
2283
+ label$ = Get label of interval: .tier, .j
2284
+ @sampa_ipa: label$
2285
+ Set interval text: .tier, .j, label$
2286
+ endfor
2287
+ endproc
2288
+
2289
+
2290
+ procedure sampa_ipa: label$
2291
+ # replace SAMPA label$ by IPA representation in Praat's "special symbols" format
2292
+ .len = length (label$)
2293
+ s$ = "" ; resulting string
2294
+ .i = 1
2295
+ while (.i <= .len)
2296
+ b$ = "" ; translation (i.e. output) of current SAMPA symbol
2297
+ c1$ = mid$ (label$,.i,1) ; next char
2298
+ c2$ = mid$ (label$,.i,2) ; next 2 chars
2299
+ c3$ = mid$ (label$,.i,3) ; next 3 chars
2300
+ c4$ = mid$ (label$,.i,4) ; next 4 chars
2301
+ cn$ = mid$ (label$,.i+1,1) ; following char
2302
+ .restlen = .len - .i
2303
+ if (.restlen >= 1 and (cn$ = "~" or cn$ = "`" or cn$ = "="))
2304
+ if (cn$ = "~") ; nasal vowels (length=2)
2305
+ if (c2$ = "a~" or c2$ = "A~")
2306
+ b$ = "\as\~^"
2307
+ elsif (c2$ = "o~" or c2$ = "O~")
2308
+ b$ = "\ct\~^"
2309
+ elsif (c2$ = "e~" or c2$ = "E~")
2310
+ b$ = "\ef\~^"
2311
+ elsif (c2$ = "9~")
2312
+ b$ = "\oe\~^"
2313
+ endif
2314
+ elsif (cn$ = "`") ; rhoticity
2315
+ if (index ("uoi", c1$))
2316
+ b$ = c1$ + "\hr"
2317
+ elsif (c2$ = "@`")
2318
+ b$ = "\sr"
2319
+ elsif (c2$ = "s`")
2320
+ b$ = "\s."
2321
+ elsif (c2$ = "z`")
2322
+ b$ = "\z."
2323
+ endif
2324
+ elsif (cn$ = "=") ; syllabicity in x-sampa
2325
+ if (index_regex (c1$, "[mnNJlrR]"))
2326
+ b$ = c1$ + "\|v"
2327
+ endif
2328
+ endif
2329
+ if (length (b$) > 0)
2330
+ s$ = s$ + b$
2331
+ .i += 2
2332
+ else
2333
+ s$ = s$ + c1$
2334
+ .i += 1
2335
+ endif
2336
+ # SAMPA symbols of length 1
2337
+ elsif (index ("BCDSTZRMNHJGAEIOQVY@2679?:{}&", c1$) or c1$ = """") ; additional regex to speed up things
2338
+ if (c1$ = """") ; primary stress
2339
+ b$ = "\'1"
2340
+ elsif (c1$ = "A")
2341
+ b$ = "\as"
2342
+ elsif (c1$ = "E")
2343
+ ; b$ = "\ep"
2344
+ b$ = "\ef"
2345
+ elsif (c1$ = "I")
2346
+ b$ = "\ic"
2347
+ elsif (c1$ = "O")
2348
+ b$ = "\ct"
2349
+ elsif (c1$ = "Y")
2350
+ b$ = "\yc"
2351
+ elsif (c1$ = "Q")
2352
+ b$ = "\ab"
2353
+ elsif (c1$ = "V")
2354
+ b$ = "\vt"
2355
+ elsif (c1$ = "2")
2356
+ b$ = "\o/"
2357
+ elsif (c1$ = "6")
2358
+ b$ = "\sr"
2359
+ elsif (c1$ = "7")
2360
+ b$ = "\rh"
2361
+ elsif (c1$ = "9")
2362
+ b$ = "\oe"
2363
+ elsif (c1$ = "@") ; schwa
2364
+ b$ = "\sw"
2365
+ elsif (c1$ = "B") ; voiced bilabial fricative
2366
+ b$ = "\bf"
2367
+ elsif (c1$ = "C") ; voiceless palatal fricative
2368
+ b$ = "\c,"
2369
+ elsif (c1$ = "D") ; voiced dental fricative
2370
+ b$ = "\dh"
2371
+ elsif (c1$ = "T") ; voiceless dental fricative
2372
+ b$ = "\tf"
2373
+ elsif (c1$ = "S")
2374
+ b$ = "\sh"
2375
+ elsif (c1$ = "Z")
2376
+ b$ = "\zh"
2377
+ elsif (c1$ = "R")
2378
+ b$ = "\rc"
2379
+ elsif (c1$ = "M")
2380
+ b$ = "\mj"
2381
+ elsif (c1$ = "N")
2382
+ b$ = "\ng"
2383
+ elsif (c1$ = "H")
2384
+ b$ = "\ht"
2385
+ elsif (c1$ = "J")
2386
+ b$ = "\nj"
2387
+ elsif (c1$ = "G")
2388
+ b$ = "\gf"
2389
+ elsif (c1$ = "?")
2390
+ b$ = "\?g"
2391
+ elsif (c1$ = ":")
2392
+ b$ = "\:f"
2393
+ elsif (c1$ = "{")
2394
+ b$ = "\ae"
2395
+ elsif (c1$ = "}")
2396
+ b$ = "\u-"
2397
+ elsif (c1$ = "&")
2398
+ b$ = "\Oe"
2399
+ endif
2400
+ if (length (b$) > 0)
2401
+ s$ = s$ + b$ ; append b$ to output string
2402
+ .i += 1
2403
+ else
2404
+ s$ = s$ + c1$
2405
+ .i += 1
2406
+ endif
2407
+ elsif (c1$ = "\")
2408
+ if (index ("_\:f__\a~__\o~__\o/__\ef_", "_'c3$'_"))
2409
+ s$ = s$ + c3$
2410
+ .i += 3
2411
+ ; elsif (.restlen >= 3 and index (":\a~:\o~:\o/:", ":'c3$':") > 0)
2412
+ ; s$ = s$ + c3$
2413
+ ; .i += 3
2414
+ elsif (.restlen >= 4 and index (":\ep~:", c4$) > 0)
2415
+ s$ = s$ + c4$
2416
+ .i += 4
2417
+ else ; others: just copy them
2418
+ s$ = s$ + c1$
2419
+ .i += 1
2420
+ endif
2421
+ elsif (c3$ = "i_d")
2422
+ s$ = s$ + "i\Nv"
2423
+ .i += 3
2424
+ else ; others: just copy them
2425
+ s$ = s$ + c1$
2426
+ .i += 1
2427
+ endif
2428
+ endwhile
2429
+ label$ = s$
2430
+ endproc
2431
+
2432
+
2433
+ procedure speaker_info_get
2434
+ # 1. Make list of speakers appearing in tier "speaker" of annotated segmentation textgrid.
2435
+ # 2. Add speaker identification to nucldatID, as a number (in column <speaker_id>) and as a string (in row label).
2436
+ @debug_msg: "speaker_info_get: entry, speaker_available='speaker_available'"
2437
+ speakers = 0 ; nrof different speakers found
2438
+ speakers$ = "" ; used to store speaker names and their number
2439
+ if (speaker_available)
2440
+ speaker = 0 ; nrof current speaker
2441
+ selectObject: nucldatID
2442
+ .rows = Get number of rows
2443
+ for .row to .rows
2444
+ .t1 = Get value: .row, j_nucl_t1
2445
+ .t2 = Get value: .row, j_nucl_t2
2446
+ selectObject: nucleiID
2447
+ .i = Get interval at time: speaker_tier, .t1+(.t2-.t1)/2
2448
+ speaker$ = Get label of interval: speaker_tier, .i
2449
+ speaker$ = replace_regex$ (speaker$, "^ +(.*) +$", "\1", 1) ; trim spaces at left and right
2450
+ if (index (speakers$, "<'speaker$'>") > 0) ; speaker already encountered in file
2451
+ speaker = extractNumber (speakers$, "<'speaker$'>:") ; get his number
2452
+ else ; encountered a new speaker
2453
+ speakers += 1
2454
+ speaker = speakers ; number of speaker
2455
+ speakers$ = speakers$ + "<'speaker$'>:'speaker' " ; store name and number in list
2456
+ speaker_label'speaker'$ = speaker$
2457
+ endif
2458
+ selectObject: nucldatID
2459
+ Set value: .row, j_speaker_id, speaker
2460
+ Set row label (index): .row, speaker$
2461
+ endfor
2462
+ else
2463
+ @msg: "No speaker tier in input textgrid. Assuming 1 speaker."
2464
+ speakers = 1
2465
+ speaker$ = "ANON"
2466
+ speaker_label1$ = speaker$
2467
+ speakers$ = "<'speaker$'>:'speakers' "
2468
+ selectObject: nucleiID
2469
+ Set interval text: speaker_tier, 1, speaker$
2470
+ selectObject: nucldatID
2471
+ .rows = Get number of rows
2472
+ for .row from 1 to .rows
2473
+ Set value: .row, j_speaker_id, 1
2474
+ endfor
2475
+ endif
2476
+
2477
+ ; Correct values at speaker turns, for pause duration, internucleus duration
2478
+ selectObject: nucldatID
2479
+ .rows = Get number of rows
2480
+ for .row to .rows
2481
+ if (.row > 1)
2482
+ selectObject: nucldatID
2483
+ .sp = Get value: .row, j_speaker_id
2484
+ .prevsp = Get value: .row-1, j_speaker_id
2485
+ if (.sp <> .prevsp)
2486
+ Set value: .row, j_intersyl, 0
2487
+ Set value: .row, j_internucldur, 0
2488
+ .t1 = Get value: .row-1, j_nucl_t1
2489
+ .t2 = Get value: .row-1, j_nucl_t2
2490
+ ; pause ends and speaker turn, even when next turn starts with silence
2491
+ selectObject: nucleiID
2492
+ .i = Get interval at time: speaker_tier, .t1
2493
+ .t2s = Get end time of interval: speaker_tier, .i
2494
+ selectObject: nucldatID
2495
+ if (.t2s-.t2 >= mindur_pause_gap)
2496
+ Set value: .row-1, j_pause_dur, .t2s-.t2
2497
+ else
2498
+ Set value: .row-1, j_pause_dur, 0
2499
+ endif
2500
+ endif
2501
+ endif
2502
+ endfor
2503
+ @debug_msg: "speaker_info_get: exit"
2504
+ endproc
2505
+
2506
+
2507
+ procedure speaker_autorange: t1, t2
2508
+ ; propose a pitch range suitable for plotting the range of all speakers in time interval <t1>..<t2>
2509
+ ; return range in ST in <ymin>..<ymax>
2510
+ @debug_msg: "speaker_autorange: entry"
2511
+ if (task == task_interactive and profile_available)
2512
+ speakers$ = ""
2513
+ selectObject: profileID
2514
+ .nrows = Get number of rows
2515
+ for .j to .nrows
2516
+ .s$ = Get row label: .j
2517
+ .s$ = replace_regex$(.s$, "^('basename$'_)", "", 1)
2518
+ speakers$ += "<'.s$'>:'.j' "
2519
+ speaker_label'.j'$ = .s$
2520
+ .topST = Get value: .j, j_pitch_top_ST
2521
+ .bottomST = Get value: .j, j_pitch_bottom_ST
2522
+ .medianST = Get value: .j, j_pitch_median_ST
2523
+ speaker_range_'.j'$ = "TOP_ST='.topST:1' BOTTOM_ST='.bottomST:1' MEDIAN_ST='.medianST:1' "
2524
+ endfor
2525
+ endif
2526
+ if (task == task_pitch_plot or not needs_pitchrange) ; no nuclei available
2527
+ selectObject: pitchID
2528
+ .y = Get mean: t1, t2, "semitones re 1 Hz"
2529
+ if (.y == undefined)
2530
+ @msg: "speaker_autorange: mean pitch undefined for time 't1:3'-'t2:3', using default 85 ST"
2531
+ .y = 85
2532
+ endif
2533
+ ymin = .y - 12
2534
+ ymax = .y + 12
2535
+ else ; compute total range for all speakers in time interval <t1>..<t2>
2536
+ ymin = 1000
2537
+ ymax = 0
2538
+ .t = t1
2539
+ selectObject: nucleiID
2540
+ .n = Get number of intervals: speaker_tier
2541
+ .t2n = Get end point: speaker_tier, .n
2542
+ .t2 = min (t2, .t2n)
2543
+ repeat ; all speakers in range t1..t2
2544
+ .i = Get interval at time: speaker_tier, .t
2545
+ speaker$ = Get label of interval: speaker_tier, .i
2546
+ speaker$ = replace_regex$ (speaker$, "^ +(.*) +$", "\1", 1) ; trim left and right
2547
+ speaker_j = extractNumber (speakers$, "<'speaker$'>:") ; get his number
2548
+ if (speaker_j == 0 or speaker_j == undefined)
2549
+ ; @msg: "speaker_autorange: speaker undefined at time '.t:3'"
2550
+ ; This happens when there are no nuclei for this speaker
2551
+ selectObject: pitchID
2552
+ ymin = Get mean: 0, 0, "semitones re 1 Hz"
2553
+ ymax = ymin
2554
+ else
2555
+ .y = extractNumber (speaker_range_'speaker_j'$, "BOTTOM_ST=")
2556
+ if (.y == undefined)
2557
+ @msg: "speaker_autorange: BOTTOM_ST undefined t='.t:3'"
2558
+ .y = 85
2559
+ endif
2560
+ ymin = min (ymin, .y)
2561
+ .y = extractNumber (speaker_range_'speaker_j'$, "TOP_ST=")
2562
+ if (.y == undefined)
2563
+ @msg: "speaker_autorange: TOP_ST undefined t='.t:3'"
2564
+ .y = 85
2565
+ endif
2566
+ ymax = max (ymax, .y)
2567
+ endif
2568
+ selectObject: nucleiID
2569
+ .t = Get end point: speaker_tier, .i
2570
+ until (.t >= .t2)
2571
+ if (ymax - ymin < 24) ; default range 24 ST or 2 octaves
2572
+ ymin = max(0, (ymin + (ymax - ymin)/2) - 12)
2573
+ ymax = ymin + 24
2574
+ endif
2575
+ endif ; task
2576
+ @debug_msg: "speaker_autorange: exit"
2577
+ endproc
2578
+
2579
+
2580
+ procedure nucleus_tier_postproc: .grid, .nucl_tier, .syll_tier
2581
+ selectObject: .grid
2582
+ interval_tier = Is interval tier: .nucl_tier
2583
+ if (interval_tier)
2584
+ Set tier name: .nucl_tier, "segm"
2585
+ .n = Get number of intervals: .nucl_tier
2586
+ ; replace parts other than nucleus by zero-length strings
2587
+ for .i to .n
2588
+ .s$ = Get label of interval: .nucl_tier, .i
2589
+ if (index (":<:>:<>:xL:xR:U:skip:reject:short:", ":'.s$':"))
2590
+ Set interval text: .nucl_tier, .i, ""
2591
+ endif
2592
+ endfor
2593
+ ; reduce contiguous empty intervals
2594
+ .i = 2
2595
+ while (.i <= .n)
2596
+ .prev$ = Get label of interval: .nucl_tier, .i-1
2597
+ .s$ = Get label of interval: .nucl_tier, .i
2598
+ if (.prev$ = "" and .s$ = "")
2599
+ Remove left boundary: .nucl_tier, .i
2600
+ .n -= 1
2601
+ else
2602
+ .i += 1
2603
+ endif
2604
+ endwhile
2605
+ ; add syllable boundaries to nucleus tier
2606
+ .n = Get number of intervals: .syll_tier
2607
+ for .i to .n - 1
2608
+ .t = Get end point: .syll_tier, .i
2609
+ .j = Get interval at time: .nucl_tier, .t
2610
+ .x1 = Get start point: .nucl_tier, .j
2611
+ .x2 = Get end point: .nucl_tier, .j
2612
+ if (.t - .x1 > time_step and .t < .x2)
2613
+ Insert boundary: .nucl_tier, .t
2614
+ endif
2615
+ Set interval text: .syll_tier, .i, ""
2616
+ endfor
2617
+ endif
2618
+ endproc
2619
+
2620
+
2621
+ procedure copy_tier_tweeked: srcgrid, srctier, destgrid, desttier
2622
+ ; Special treatment for Rhapsodie tier "locuteur". Force interval contiguity.
2623
+ selectObject: destgrid
2624
+ .n = Get number of intervals: desttier
2625
+ .endtime = Get end time of interval: desttier, .n
2626
+ selectObject: srcgrid
2627
+ .n = Get number of intervals: srctier
2628
+ for .i to .n
2629
+ selectObject: srcgrid
2630
+ .t1 = Get start time of interval: srctier, .i
2631
+ .t2 = Get end time of interval: srctier, .i
2632
+ .label$ = Get label of interval: srctier, .i
2633
+ if (.i < .n)
2634
+ .t3 = Get start time of interval: srctier, .i+1
2635
+ if (.t2 < .t3)
2636
+ .t2 = .t3 ; intervals should be contiguous
2637
+ endif
2638
+ endif
2639
+ selectObject: destgrid
2640
+ if (.t2 < .endtime and .i < .n)
2641
+ Insert boundary: desttier, .t2
2642
+ endif
2643
+ if (.t1 <= .endtime)
2644
+ .t2 = min (.t2, .endtime) ; when destgrid is shorter than srcgrid
2645
+ j = Get interval at time: desttier, .t1 + (.t2-.t1)/2
2646
+ Set interval text: desttier, j, .label$
2647
+ endif
2648
+ endfor
2649
+ endproc
2650
+
2651
+
2652
+ procedure validate_phoneme_tier: .infname$
2653
+ @fname_parts: .infname$
2654
+ .basename$ = result2$
2655
+ .dataID = Read from file: .infname$
2656
+ @tier_get: .dataID, "^phon", "phone_tier_", "Cannot find phoneme tier in '.basename$'", 1
2657
+ .n = Get number of intervals: phone_tier_
2658
+ for .phon from 1 to .n
2659
+ .t1_phon = Get start time of interval: phone_tier_, .phon
2660
+ .label_phon$ = Get label of interval: phone_tier_, .phon
2661
+ @verify_tier: .label_phon$, .basename$, "phoneme", .t1_phon
2662
+ endfor
2663
+ removeObject: .dataID
2664
+ endproc
2665
+
2666
+
2667
+ procedure validate_syllable_tier: .infname$
2668
+ ; (1) Validate symbols in phoneme tier: should be valid SAMPA or Praat phonetic symbols
2669
+ ; (2) Validate symbols in syllable tier
2670
+ ; (3) Validante alignment of syllable and phoneme tiers
2671
+ ; (4) Check number of vowels (or syllabic consonants) within syllable.
2672
+ ; Uses tiers named "phon...", "syll...", and optionally "word...".
2673
+ ; Test is skipped when syllable label equals "_" or when word label equals "%".
2674
+ @fname_parts: .infname$
2675
+ .basename$ = result2$
2676
+ dataID = Read from file: .infname$
2677
+ @corpus_conversion: 1, "dataID", corpus$
2678
+ @tier_get: dataID, "^phon", "phone_tier", "Cannot find phoneme tier in '.basename$'", 1
2679
+ @tier_get: dataID, "^syll", "syllable_tier", "Cannot find syllable tier in '.basename$'", 1
2680
+ @tier_get: dataID, "^word", "word_tier", "Cannot find word tier in '.basename$'", 0
2681
+
2682
+ @debug_msg: "validate_syllable_tier: infname='.infname$' phone_tier='phone_tier'"
2683
+ .ns = Get number of intervals: syllable_tier
2684
+ for .j from 1 to .ns
2685
+ .t1_syll = Get start time of interval: syllable_tier, .j
2686
+ .t2_syll = Get end time of interval: syllable_tier, .j
2687
+ .label_syll$ = Get label of interval: syllable_tier, .j
2688
+ .dur = .t2_syll - .t1_syll
2689
+ if (.dur < 0.005)
2690
+ @msg: "Syllable with label '.label_syll$', starting at '.t1_syll:4', is only '.dur:4' s long"
2691
+ endif
2692
+ if (word_tier)
2693
+ .k = Get interval at time: word_tier, (.t1_syll+(.t2_syll-.t1_syll)/2)
2694
+ .word$ = Get label of interval: word_tier, .k
2695
+ .word$ = replace_regex$ (.word$, " ", "", 0) ; rm whitespace
2696
+ else
2697
+ .word$ = ""
2698
+ endif
2699
+ if (index("_#", .label_syll$)== 0 and .word$ <> "%") ; not pause, not noise, not incomprehensible
2700
+ errors = 0
2701
+ ; Check label in tier
2702
+ @verify_tier: .label_syll$, .basename$, "syllable", .t1_syll
2703
+ errors += result
2704
+ @intervals_from_time_range_excl: dataID, phone_tier, .t1_syll, .t2_syll, "ph1", "ph2"
2705
+ nrof_vowels = 0
2706
+ nrof_syllabics = 0
2707
+ if (phone_tier)
2708
+ for .phon from ph1 to ph2
2709
+ .t1_phon = Get start time of interval: phone_tier, .phon
2710
+ .t2_phon = Get end time of interval: phone_tier, .phon
2711
+ .label_phon$ = Get label of interval: phone_tier, .phon
2712
+ .dur = .t2_phon - .t1_phon
2713
+ if (.dur < 0.005)
2714
+ @msg: "Phoneme with label '.label_phon$', starting at '.t1_phon:4', is only '.dur:4' s long"
2715
+ endif
2716
+ @verify_tier: .label_phon$, .basename$, "phoneme", .t1_phon
2717
+ errors += result
2718
+ ; Check alignment between syllable tier and phoneme tier
2719
+ if (.phon == ph1 and .t1_phon <> .t1_syll)
2720
+ @msg: "Syllable ('.label_syll$') starting at '.t1_syll:4' not aligned with phoneme ('.label_phon$') start ('.t1_phon:4')"
2721
+ errors += 1
2722
+ endif
2723
+ if (.phon == ph2 and .t2_phon <> .t2_syll)
2724
+ @msg: "Syllable ('.label_syll$') ending at '.t2_syll:4' not aligned with phoneme ('.label_phon$') end ('.t2_phon:4')"
2725
+ errors += 1
2726
+ endif
2727
+ ; Check number of vowels/syllabics
2728
+ @is_syllabic: .label_phon$
2729
+ if (result)
2730
+ nrof_syllabics += 1
2731
+ if (is_vowel)
2732
+ nrof_vowels += 1
2733
+ endif
2734
+ if (nrof_syllabics > 1)
2735
+ @msg: "Multiple vowels (or syllabics) in syllable ('.label_syll$') at time '.t1_syll:4'"
2736
+ endif
2737
+ endif ; syllabic
2738
+ if (.phon == ph2 and nrof_vowels == 0)
2739
+ @msg: "No vowel in syllable ('.label_syll$') at time '.t1_syll:4'"
2740
+ endif
2741
+ endfor ; phonemes in syllable
2742
+ endif ; phone_tier
2743
+ endif
2744
+ endfor
2745
+ removeObject: dataID
2746
+ endproc
2747
+
2748
+
2749
+ procedure verify_tier: .str$, .fname$, .type$, .time
2750
+ # Verify the label in a tier of type <.type> (either "phoneme", "syllable", "hesitation", "prominence", "polytonia")
2751
+ # <result> 0 when valid or 1 when error found.
2752
+ result = 0
2753
+ if (index(.str$, " "))
2754
+ @error_msg: "Label in tier at time '.time:2' of file <'.fname$'> contains whitespace"
2755
+ endif
2756
+ if (.type$ = "phoneme")
2757
+ .str$ = replace_regex$(.str$, "^""", "", 0) ; remove stress symbol
2758
+ ; .str$ = replace_regex$(.str$, "^_$", "", 0) ; remove pause symbol
2759
+ .str$ = replace$(.str$, "\rc", "R", 0)
2760
+ .str$ = replace$(.str$, "\ep", "E", 0)
2761
+ if (.str$ = "_" or .str$ = "")
2762
+ elsif (not index_regex(.str$, "^[ptcCkbdgfsTDSvzZxGhmnNJlrRiyue2oE9OajwH@\?][=~]?$"))
2763
+ @msg: "Label in phone tier at time '.time:2' contains invalid character: <'.str$'>"
2764
+ result = 1
2765
+ endif
2766
+ elsif (.type$ = "syllable")
2767
+ .str$ = replace$(.str$, "\rc", "R", 0)
2768
+ .str$ = replace$(.str$, "\ep", "E", 0)
2769
+ if (not index_regex(.str$, "^[ptcCkbdgfsTDSvzZxGhmnNJlrRiyue2oE9OajwH@\?=~]+$"))
2770
+ @msg: "Label in syllable tier at time '.time:2' contains invalid character: <'.str$'>"
2771
+ result = 1
2772
+ endif
2773
+ elsif (.type$ = "hesitation")
2774
+ if (not index_regex(.str$, "^H?$"))
2775
+ @msg: "Label in hesitation tier at time '.time:2' contains invalid character: <'.str$'>"
2776
+ result = 1
2777
+ endif
2778
+ elsif (.type$ = "prominence")
2779
+ if (not index_regex(.str$, "^[0SW]$"))
2780
+ @msg: "Label in prominence tier at time '.time:2' contains invalid character: <'.str$'>"
2781
+ result = 1
2782
+ endif
2783
+ elsif (.type$ = "polytonia")
2784
+ if (not index_regex(.str$, "^[BLMHT_RrFfSC,]*$"))
2785
+ @msg: "Label in polytonia tier at time '.time:2' contains invalid sequence: <'.str$'>"
2786
+ result = 1
2787
+ endif
2788
+ if (index_regex(.str$, "CC") or index_regex(.str$, "__"))
2789
+ @msg: "Label in polytonia tier at time '.time:2' contains invalid sequence: <'.str$'>"
2790
+ result = 1
2791
+ endif
2792
+ elsif (.type$ = "stressref")
2793
+ if (not index_regex(.str$, "^[SW0]$"))
2794
+ @msg: "Label in stress-ref tier at time '.time:2' contains invalid character: <'.str$'>"
2795
+ result = 1
2796
+ endif
2797
+ else
2798
+ @msg: "verify_tier: ERROR invalid tier type: <'.type$'>"
2799
+ endif
2800
+ endproc
2801
+
2802
+
2803
+ procedure store_stylization: paramID, nucleiID, .filename$
2804
+ # Store stylization targets, indicating whether they are initial/final/intermediate relative to nucleus boundaries
2805
+ @debug_msg: "store_stylization: entry"
2806
+ filedelete '.filename$'
2807
+ selectObject: nucleiID ; segmentation into nuclei, etc
2808
+ .nnucl = Get number of intervals: nucleus_tier
2809
+ select paramID ; stylization (PitchTier)
2810
+ .ni = Get number of points
2811
+ for .interv from 1 to .nnucl ; indices of nuclei for which to plot stylization
2812
+ selectObject: nucleiID ; segmentation into nuclei, etc
2813
+ @is_nucleus: .interv
2814
+ if (result)
2815
+ .nx1 = Get start point... nucleus_tier .interv ; time of start of nucleus
2816
+ .nx2 = Get end point... nucleus_tier .interv ; time of end of nucleus
2817
+ select paramID ; stylization (PitchTier)
2818
+ # Check that the nearest indices are within nucleus
2819
+ .i1 = Get nearest index from time... .nx1
2820
+ repeat
2821
+ .t1 = Get time from index... .i1
2822
+ if (.t1 < .nx1)
2823
+ .i1 += 1
2824
+ endif
2825
+ until (.t1 >= .nx1 or .t1 >= .nx2 or .i1 >= .ni)
2826
+ .i2 = Get nearest index from time... .nx2
2827
+ repeat
2828
+ .t2 = Get time from index... .i2
2829
+ if (.t2 > .nx2)
2830
+ .i2 -= 1
2831
+ endif
2832
+ until (.t2 <= .nx2 or .t2 <= .nx1 or .i2 <= 1)
2833
+ for .i from .i1 to .i2 ; each stylization (tonal) segment i
2834
+ selectObject: paramID ; stylization (PitchTier)
2835
+ .x = Get time from index... .i
2836
+ .y = Get value at index... .i
2837
+ .yST = hertzToSemitones(.y) - hertzToSemitones(1)
2838
+ if (.i == .i1)
2839
+ if (syllables_available) ; write syllable start, end and label
2840
+ selectObject: nucleiID
2841
+ .isyll = Get interval at time... syllable_tier .x
2842
+ .t1s = Get start point... syllable_tier .isyll
2843
+ .t2s = Get end point... syllable_tier .isyll
2844
+ .s$ = Get label of interval... syllable_tier .isyll
2845
+ fileappend '.filename$' syll'tab$''.t1s:4''tab$''.t2s:4''tab$''.s$''newline$'
2846
+ endif
2847
+ fileappend '.filename$' nucl'tab$''.nx1:4''tab$''.nx2:4''newline$'
2848
+ .type$ = "tstart" ; target at start of nucleus
2849
+ elsif (.i == .i2)
2850
+ .type$ = "tend" ; target at end of nucleus
2851
+ ; fileappend '.filename$' Ne'tab$''.nx2:4''newline$'
2852
+ else
2853
+ .type$ = "tin" ; target inside nucleus
2854
+ endif
2855
+ fileappend '.filename$' '.type$''tab$''.x:4''tab$''.y:1''tab$''.yST:1''newline$'
2856
+ endfor
2857
+ endif
2858
+ endfor
2859
+ @debug_msg: "store_stylization: exit"
2860
+ endproc
2861
+
2862
+
2863
+ procedure store_stylization_resampled paramID nucleiID .dx .filename$
2864
+ # Store resampled stylization, as if it were a pitch curve, sampled at .dx (second)
2865
+ # <nucleiID> segmentation into nuclei, etc
2866
+ filedelete '.filename$'
2867
+ selectObject: paramID ; stylization (PitchTier)
2868
+ .np = Get number of points
2869
+ ; nrof targets in Pitch Tier
2870
+ .starttime = Get start time
2871
+ .endtime = Get end time
2872
+ .nx = floor((.endtime-.starttime)/.dx) ; nrof pitch samples in Pitch object
2873
+ fileappend '.filename$' File type = "ooTextFile"'newline$'
2874
+ fileappend '.filename$' Object class = "Pitch 1"'newline$''newline$'
2875
+ fileappend '.filename$' xmin = '.starttime''newline$'
2876
+ fileappend '.filename$' xmax = '.endtime''newline$'
2877
+ fileappend '.filename$' nx = '.nx''newline$'
2878
+ fileappend '.filename$' dx = '.dx''newline$'
2879
+ fileappend '.filename$' x1 = '.starttime''newline$'
2880
+ fileappend '.filename$' ceiling = 600'newline$'
2881
+ fileappend '.filename$' maxnCandidates = 1'newline$'
2882
+ fileappend '.filename$' frame []:'newline$'
2883
+ .i = 0 ; frame index
2884
+ .t = .starttime
2885
+ while (.t < .endtime)
2886
+ .i += 1 ; frame index for current time
2887
+ .value = 0 ; default F0 (outside nuclei)
2888
+ selectObject: nucleiID ; segmentation into nuclei, etc
2889
+ .interv = Get interval at time... nucleus_tier .t
2890
+ .label$ = Get label of interval... nucleus_tier .interv
2891
+ .nx1 = Get starting point... nucleus_tier .interv ; time of start of nucleus
2892
+ .nx2 = Get end point... nucleus_tier .interv ; time of end of nucleus
2893
+ selectObject: paramID ; stylization (PitchTier)
2894
+ .i1 = Get low index from time... .t ; index of point in pitch tier >= .t
2895
+ if (not .i1 == undefined) and (.i1 > 0)
2896
+ .t1 = Get time from index... .i1
2897
+ if (.i1 < .np)
2898
+ .i2 = .i1 + 1 ; index of next point in pitch tier
2899
+ .t2 = Get time from index... .i2 ; time of next point
2900
+ else
2901
+ .i2 = .i1
2902
+ .t2 = .nx2
2903
+ endif
2904
+ if (.label$ = "a" and .t >= .nx1 and .t <= .nx2 and .t2 <= .nx2)
2905
+ .y1 = Get value at index... .i1
2906
+ .y2 = Get value at index... .i2
2907
+ .value = .y1 + ((.y2-.y1)*(.t-.t1)/(.t2-.t1))
2908
+ endif
2909
+ endif
2910
+ fileappend '.filename$' 'tab$'frame ['.i']:'newline$'
2911
+ fileappend '.filename$' 'tab$''tab$'intensity = 0'newline$'
2912
+ fileappend '.filename$' 'tab$''tab$'nCandidates = 1'newline$'
2913
+ fileappend '.filename$' 'tab$''tab$'candidate = []'newline$'
2914
+ fileappend '.filename$' 'tab$''tab$''tab$'candidate = [1]:'newline$'
2915
+ fileappend '.filename$' 'tab$''tab$''tab$''tab$'frequency = '.value:0''newline$'
2916
+ fileappend '.filename$' 'tab$''tab$''tab$''tab$'strength = 1'newline$'
2917
+ .t += .dx
2918
+ endwhile
2919
+ endproc
2920
+
2921
+
2922
+ procedure option_get: .type$, .optname$, .varname$, .default$, .data$
2923
+ # Extract an option value from a command string.
2924
+ # <.type> option type: {word|real|boolean}
2925
+ # word: a string without whitespace
2926
+ # line: a string with whitespace
2927
+ # real: a number
2928
+ # boolean: a boolean, where TRUE is encoded by one of: y, Y, yes, 1, T, TRUE
2929
+ # <.optname> option name, e.g. "file=", "threshold="
2930
+ # <.varname> name of variable, where value of option will be stored
2931
+ # <.default> default option value, when option is not mentioned in command
2932
+ # <.data> command argument string, where options will be searched
2933
+ if (.type$ = "word")
2934
+ .s$ = extractWord$ (.data$, .optname$)
2935
+ '.varname$'$ = .default$
2936
+ if (length (.s$))
2937
+ '.varname$'$ = .s$
2938
+ endif
2939
+ elsif (.type$ = "line")
2940
+ .s$ = extractLine$ (.data$, .optname$)
2941
+ '.varname$'$ = .default$
2942
+ if (length (.s$))
2943
+ '.varname$'$ = .s$
2944
+ endif
2945
+ elsif (.type$ = "real")
2946
+ .v = extractNumber (.data$, .optname$)
2947
+ '.varname$' = '.default$'
2948
+ if not (.v == undefined)
2949
+ '.varname$' = .v
2950
+ endif
2951
+ elsif (.type$ = "boolean")
2952
+ .s$ = extractWord$ (.data$, .optname$)
2953
+ '.varname$' = 0 ; default value
2954
+ if (length (.s$) < 1)
2955
+ .s$ = .default$
2956
+ endif
2957
+ if (index (":y:1:Y:yes:T:TRUE:", ":'.s$':") > 0)
2958
+ '.varname$' = 1
2959
+ endif
2960
+ endif
2961
+ endproc
2962
+
2963
+
2964
+ procedure option_check: .type$, .optname$, .valids$, .value$
2965
+ # Check whether the value of an option belongs to a set of accepted values (separated by a colon)
2966
+ if (.type$ = "word")
2967
+ if (index (.valids$, ":'.value$':") <= 0)
2968
+ @msg: "invalid value <'.value$'> for option <'.optname$'>"
2969
+ endif
2970
+ endif
2971
+ endproc
2972
+
2973
+
2974
+ procedure batch_cmd_start: .cmd$, .args$
2975
+ @msg: " 'newline$'Batch command> '.cmd$': '.args$''newline$'"
2976
+ endproc
2977
+
2978
+
2979
+ procedure batch_cmd_ready: .cmd$
2980
+ @msg: " 'newline$'Batch command> '.cmd$': ... Ready"
2981
+ endproc
2982
+
2983
+
2984
+ procedure check_regex: .re$
2985
+ if (index_regex (.re$, "^\*"))
2986
+ @fatal_error: "Invalid regular expression: <'.re$'> (\* should be preceded by some string)"
2987
+ endif
2988
+ endproc
2989
+
2990
+
2991
+ # Batch command "calculate_pitch"
2992
+ # arguments: option_name, default_value
2993
+ # file= "" ; input file of files specified by a regular expression
2994
+ # time_step= 0.005 ; frame period
2995
+ # f0min= 0 ; lower value for F0 detection - f0min=0 selects auto pitch range detection
2996
+ # f0max= 450 ; upper value for F0 detection
2997
+ procedure calculate_pitch: .cmda$
2998
+ @debug_msg: "calculate_pitch: entry"
2999
+ @batch_cmd_start: "calculate_pitch", .cmda$
3000
+ @initialization_main
3001
+ @option_get: "word", "file=", "inputfname", "", .cmda$
3002
+ @option_get: "real", "time_step=", "time_step", "0.005", .cmda$
3003
+ @option_get: "real", "f0min=", "minimum_pitch", "0", .cmda$
3004
+ @option_get: "real", "f0max=", "maximum_pitch", "450", .cmda$
3005
+
3006
+ if (variableExists ("corpus_home$"))
3007
+ if (variableExists ("corpus_subdir_sound$"))
3008
+ inputfname$ = corpus_home$ + corpus_subdir_sound$ + inputfname$
3009
+ else
3010
+ inputfname$ = corpus_home$ + inputfname$
3011
+ endif
3012
+ endif
3013
+ @check_regex: inputfname$
3014
+ @files_get_regex: "filelistID", inputfname$
3015
+ selectObject: filelistID
3016
+ .nfiles = Get number of strings
3017
+ if (.nfiles == 0)
3018
+ @msg: "No input files found for <'inputfname$'>"
3019
+ endif
3020
+ task = task_calc_pitch
3021
+ for .ifile to .nfiles
3022
+ selectObject: filelistID
3023
+ .fname$ = Get string: .ifile
3024
+ @initialization_per_file
3025
+ @construct_filenames: .fname$
3026
+ @fname_parts: pitchfile$
3027
+ .dir$ = result4$
3028
+ if (not fileReadable (.dir$))
3029
+ @fatal_error: "calculate_pitch: Cannot find directory <'.dir$'>. Create it first."
3030
+ endif
3031
+ @calculate_pitch_int
3032
+ removeObject: soundID, pitchID
3033
+ endfor
3034
+ removeObject: filelistID
3035
+ @batch_cmd_ready: "calculate_pitch"
3036
+ @debug_msg: "calculate_pitch: exit"
3037
+ endproc
3038
+
3039
+
3040
+ # Batch command "prosogram_variants"
3041
+ # arguments: option_name, default_value
3042
+ # file= "" ; input file of files specified by a regular expression
3043
+ # time_step= 0.005
3044
+ # f0min= 0 ; lower value for F0 detection - f0min=0 selects auto pitch range detection
3045
+ # f0max= 450 ; upper value for F0 detection
3046
+ # t1= 0 ; start time of analysis
3047
+ # t2= 0 ; end time of analysis
3048
+ # volatile= no ; analysis of speech signal restricted to analysis time interval
3049
+ # segmentation= optimal ; segmentation type, selected from: {optimal, vow-nucl, extern, int-BP, asyll, rhyme, syll+vow, syll, voiced}
3050
+ # g= ; glissando threshold; default selects adaptive threshold 0.16 + 0.32
3051
+ # dg= 30 ; differential glissando threshold
3052
+ # dmin= 0.035 ; minimum duration of tonal segment
3053
+ # draw= yes ; draw Prosogram in Graphics window and write graphics files
3054
+ # time_incr= 3.0 ; duration of prosogram pane
3055
+ # wide= yes ; selects wide size for Prosogram, otherwise compact size
3056
+ # rich= yes ; selects rich format, otherwise light format
3057
+ # pitch= yes ; plot pitch (overrides value of "rich")
3058
+ # pitchrange= yes ; plot pitch range
3059
+ # targets= 0 ; draw pitch targets in ST (=1) or in Hz (=2)
3060
+ # tiers= *1,2,3 ; tiers shown in the graphics output; tiers are specified by name or number (no whitespace allowed); * indicates SAMPA to IPA conversion.
3061
+ # outputfile= "" ; filename of graphics file (without filename extension). Zero-length string selects corpus-defined path and numbering in filename.
3062
+ # outputformat=PNG300 ; format of graphics file, selected from: {EPS, EMF, PNG300, PNG600, JPG300, JPG600}
3063
+ # number= yes ; number graphics files (path+basename+number+extension)
3064
+ # collect= no ; collect output of duration calculation in a single output file
3065
+ # save= no ; save intermediate data in files
3066
+ # cleanup= yes ; cleanup data after processing
3067
+ # settings= yes ; show settings (segmentation type, thresholds) in Prosogram
3068
+ # x_scale= yes ; show X scale (numbers on axis)
3069
+ # y_scale= yes ; show Y scale (numbers on axis)
3070
+ # y_scale_r= yes ; show Y scale (values in Hz on right axis)
3071
+ # pauses= yes ; show pauses (by "P")
3072
+ # tg_bound= yes/no ; show vertical boundaries of tier intervals in textgrid (default is yes in rich mode and no in light mode
3073
+ # portee= yes ; show portee (horizontal ST calibration lines)
3074
+ # traject= no ; show intrasyllab up/down and intersyllab pitch intervals
3075
+ # hesit= no ; show hesitations
3076
+ # hesit_method=all ; set method for hesitation detection
3077
+ # rhythm= no ; show rhythm
3078
+
3079
+
3080
+ procedure prosogram_variants: .cmda$
3081
+ ; @logging: "reset debug timed", "_log.txt"
3082
+ @debug_msg: "prosogram_variants: entry, command=<'.cmda$'>"
3083
+ @initialization_main
3084
+ task = task_prosogram
3085
+ auto_pitchrange = 1 ; automatic pitch range adjustment in plot
3086
+ outputmode$ = "Fill"
3087
+ @option_get: "word", "file=", "inputfname", "", .cmda$
3088
+ @option_get: "real", "time_step=", "time_step", "0.005", .cmda$
3089
+ @option_get: "real", "f0min=", "minimum_pitch", "0", .cmda$
3090
+ @option_get: "real", "f0max=", "maximum_pitch", "450", .cmda$
3091
+ @option_get: "real", "t1=", "pmf_t1", "0", .cmda$
3092
+ @option_get: "real", "t2=", "pmf_t2", "0", .cmda$
3093
+ @option_get: "real", "time_incr=", "timeincr", "3.0", .cmda$
3094
+ @option_get: "word", "segmentation=", "segmentation_name", "optimal", .cmda$
3095
+ @option_check: "word", "segmentation", ":optimal:vow-nucl:extern:loudness:int-BP:asyll:rhyme:syll+vow:syll:voiced:", segmentation_name$
3096
+ @option_get: "real", "g=", "glissando", "-0.32", .cmda$
3097
+ @option_get: "real", "dg=", "diffgt", "30", .cmda$
3098
+ @option_get: "real", "dmin=", "mindur_ts", "0.035", .cmda$
3099
+ @option_get: "boolean", "wide=", "wide", "yes", .cmda$
3100
+ @option_get: "boolean", "rich=", "rich", "yes", .cmda$
3101
+ if (rich)
3102
+ tg_bound_default$ = "yes"
3103
+ show_intensity = 1
3104
+ show_f0 = 1
3105
+ else
3106
+ tg_bound_default$ = "no"
3107
+ show_intensity = 0
3108
+ show_f0 = 0
3109
+ endif
3110
+ @option_get: "word", "tiers=", "tiers_to_show", "*1,2,3", .cmda$
3111
+ @option_get: "boolean", "collect=", "collect_output", "no", .cmda$
3112
+ @option_get: "word", "outputfile=", "output_filename", "", .cmda$
3113
+ @option_get: "word", "outputsuffix=", "output_suffix", "", .cmda$
3114
+ @option_get: "word", "outputformat=", "output_format", "PNG300", .cmda$
3115
+ @option_check: "word", "outputformat", ":EPS:EMF:PNG300:PNG600:JPG300:JPG600:", output_format$
3116
+ output_format$ = replace_regex$ (output_format$, "PNG(\d+)", "PNG \1 dpi", 1)
3117
+ @option_get: "boolean", "number=", "file_numbering", "yes", .cmda$
3118
+ @option_get: "real", "jpegq=", "jpegq", "75", .cmda$
3119
+ @option_get: "boolean", "save=", "save_intermediate_data", "no", .cmda$
3120
+ @option_get: "boolean", "polytonia=", "polytonia_annotation", "no", .cmda$
3121
+ @option_get: "boolean", "boundaries=", "boundary_annotation", "no", .cmda$
3122
+ @option_get: "boolean", "cleanup=", "do_cleanup", "yes", .cmda$
3123
+ glissando_low = 0.16
3124
+ if (glissando < 0)
3125
+ adaptive_glissando = 1
3126
+ glissando = abs(glissando)
3127
+ else
3128
+ adaptive_glissando = 0
3129
+ endif
3130
+ if (wide)
3131
+ viewsize$ = "wide"
3132
+ else
3133
+ viewsize$ = "compact"
3134
+ endif
3135
+ if (variableExists ("corpus_home$"))
3136
+ if (variableExists ("corpus_subdir_sound$"))
3137
+ inputfname$ = corpus_home$ + corpus_subdir_sound$ + inputfname$
3138
+ else
3139
+ inputfname$ = corpus_home$ + inputfname$
3140
+ endif
3141
+ endif
3142
+
3143
+ @debug_msg: "prosogram_variants: inputfile='inputfname$' t1='pmf_t1' t2='pmf_t2' time_step='time_step:3' segmentation_name='segmentation_name$' wide='wide' rich='rich' adapative='adaptive_glissando'"
3144
+ @debug_msg: "prosogram_variants: outputfile='output_filename$'"
3145
+
3146
+ @check_regex: inputfname$
3147
+ @files_get_regex: "filelistID", inputfname$
3148
+ selectObject: filelistID
3149
+ .nfiles = Get number of strings
3150
+ if (.nfiles == 0)
3151
+ @msg: "No input files found for <'inputfname$'>"
3152
+ endif
3153
+ ; @task_flags and @initialization_multiple_files initialize various default settings, which may be overridden by optional settings in batch command
3154
+ @task_flags
3155
+ if (save_intermediate_data)
3156
+ needs_prosodic_profile = 1
3157
+ endif
3158
+
3159
+ @initialization_multiple_files
3160
+ ; The following lines override some variables initialized by @task_flags and @initialization_multiple_files
3161
+ @option_get: "boolean", "volatile=", "volatile", "no", .cmda$
3162
+ if (task == task_pitch_plot)
3163
+ @option_get: "boolean", "intensity=", "show_intensity", "no", .cmda$
3164
+ endif
3165
+ @option_get: "boolean", "pitchrange=", "show_pitchrange", "no", .cmda$
3166
+ if (variableExists ("show_pitchrange"))
3167
+ if (show_pitchrange)
3168
+ needs_pitchrange = 1
3169
+ endif
3170
+ endif
3171
+ @option_get: "boolean", "draw=", "draw_prosograms", "yes", .cmda$
3172
+ @option_get: "boolean", "pitch=", "show_f0", "yes", .cmda$
3173
+ @option_get: "boolean", "portee=", "show_portee", "yes", .cmda$
3174
+ @option_get: "boolean", "tg_bound=", "show_tg_boundaries", tg_bound_default$, .cmda$
3175
+ @option_get: "boolean", "settings=", "show_settings", "yes", .cmda$
3176
+ @option_get: "boolean", "x_scale=", "show_x_scale", "yes", .cmda$
3177
+ @option_get: "boolean", "y_scale=", "show_y_scale", "yes", .cmda$
3178
+ @option_get: "boolean", "y_scale_r=", "show_y_scale_r", "yes", .cmda$
3179
+ @option_get: "boolean", "tiernames=", "show_tiernames", "yes", .cmda$
3180
+ @option_get: "boolean", "pauses=", "show_pauses", "no", .cmda$
3181
+ @option_get: "real", "targets=", "draw_pitch_target_values", "0", .cmda$
3182
+ @option_get: "boolean", "traject=", "show_trajectories", "no", .cmda$
3183
+ @option_get: "boolean", "grey=", "greyscale", "no", .cmda$
3184
+ @option_get: "boolean", "hesit=", "show_hesitations", "no", .cmda$
3185
+ @option_get: "boolean", "hesit_anno=", "hesitation_annotation", "no", .cmda$
3186
+ @option_get: "word", "hesit_method=", "hesitation_method", "all", .cmda$
3187
+ @option_check: "word", "hesitation_method", ":all:none:phon+pros:words:annotation:", hesitation_method$
3188
+ @option_get: "boolean", "rhythm=", "show_rhythm", "no", .cmda$
3189
+ @debug_msg: "prosogram_variants: wide='wide' rich='rich' pitchrange='show_pitchrange' adapative='adaptive_glissando'"
3190
+ @debug_msg: "prosogram_variants: show_tiernames='show_tiernames'"
3191
+ for .ifile to .nfiles
3192
+ selectObject: filelistID
3193
+ .fname$ = Get string: .ifile
3194
+ @debug_msg: "prosogram_variants: pmf_t1='pmf_t1:4', pmf_t2='pmf_t2:4', fname='.fname$'"
3195
+ @process_one_input_file: pmf_t1, pmf_t2, .ifile, .nfiles, .fname$
3196
+ endfor
3197
+ @cleanup_global
3198
+ removeObject: filelistID
3199
+ @debug_msg: "prosogram_variants: exit"
3200
+ endproc
3201
+
3202
+
3203
+ procedure prosogram: .cmda$
3204
+ @batch_cmd_start: "prosogram", .cmda$
3205
+ @prosogram_variants: .cmda$
3206
+ @batch_cmd_ready: "prosogram"
3207
+ endproc
3208
+
3209
+
3210
+ procedure polytonia: .cmda$
3211
+ @batch_cmd_start: "polytonia", .cmda$
3212
+ @prosogram_variants: .cmda$ + " polytonia=yes"
3213
+ @batch_cmd_ready: "polytonia"
3214
+ endproc
3215
+
3216
+
3217
+ procedure detect_boundaries: .cmda$
3218
+ @batch_cmd_start: "detect_boundaries", .cmda$
3219
+ @prosogram_variants: .cmda$ + " boundaries=yes"
3220
+ @batch_cmd_ready: "detect_boundaries"
3221
+ endproc
3222
+
3223
+
3224
+ procedure hesitations: .cmda$
3225
+ @batch_cmd_start: "hesitations", .cmda$
3226
+ @prosogram_variants: .cmda$ + " hesit_anno=yes"
3227
+ @batch_cmd_ready: "hesitations"
3228
+ endproc
3229
+
3230
+
3231
+ procedure plot_pitch: .cmda$
3232
+ @batch_cmd_start: "plot_pitch", .cmda$
3233
+ task = task_pitch_plot
3234
+ @prosogram_variants: .cmda$
3235
+ @batch_cmd_ready: "plot_pitch"
3236
+ endproc
3237
+
3238
+
3239
+ procedure prominence: .cmda$
3240
+ @batch_cmd_start: "prominence", .cmda$
3241
+ task = task_prosogram
3242
+ needs_loudness = 1
3243
+ calc_prominence = 1
3244
+ show_prominence = 1
3245
+ @prosogram_variants: .cmda$
3246
+ calc_prominence = 0
3247
+ show_prominence = 0
3248
+ @batch_cmd_ready: "prominence"
3249
+ endproc
3250
+
3251
+
3252
+ procedure segmentation: .cmda$
3253
+ @debug_msg: "segmentation: entry"
3254
+ @batch_cmd_start: "segmentation", .cmda$
3255
+ @initialization_main
3256
+ @option_get: "word", "file=", "inputfname", "", .cmda$
3257
+ @option_get: "real", "t1=", "pmf_t1", "0", .cmda$
3258
+ @option_get: "real", "t2=", "pmf_t2", "0", .cmda$
3259
+ @option_get: "real", "time_step=", "time_step", "0.005", .cmda$
3260
+ @option_get: "word", "method=", "segmentation_name", "asyll", .cmda$
3261
+ @option_check: "word", "method", ":asyll:specsim:pitchchange:pitchterrace:", segmentation_name$
3262
+ @option_get: "real", "left=", "trimleft", "0", .cmda$
3263
+ @option_get: "real", "right=", "trimright", "0", .cmda$
3264
+ @option_get: "word", "output_suffix=", "output_suffix", "", .cmda$
3265
+ @option_get: "boolean", "save_BP=", "save_BP", "no", .cmda$
3266
+ if (variableExists ("corpus_home$"))
3267
+ if (variableExists ("corpus_subdir_sound$"))
3268
+ inputfname$ = corpus_home$ + corpus_subdir_sound$ + inputfname$
3269
+ else
3270
+ inputfname$ = corpus_home$ + inputfname$
3271
+ endif
3272
+ endif
3273
+ task = task_segmentation
3274
+ @map_segmentation_type: segmentation_name$
3275
+ @debug_msg: "segmentation: segmentation_name='segmentation_name$'"
3276
+
3277
+ @check_regex: inputfname$
3278
+ @files_get_regex: "filelistID", inputfname$
3279
+ selectObject: filelistID
3280
+ .nfiles = Get number of strings
3281
+ if (.nfiles == 0)
3282
+ @msg: "No input files found for <'inputfname$'>"
3283
+ endif
3284
+ @task_flags
3285
+ @initialization_multiple_files
3286
+ ; The following lines override some variables initialized by @task_flags and @initialization_multiple_files
3287
+ if (segmentation_name$ == "asyll")
3288
+ @option_get: "real", "fc_low=", "fc_low", "300", .cmda$
3289
+ @option_get: "real", "fc_high=", "fc_high", "3500", .cmda$
3290
+ endif
3291
+ @option_get: "real", "mindiff=", "mindiff", "3", .cmda$
3292
+ @option_get: "real", "leftdiff=", "diff_left", "2", .cmda$
3293
+ if (segmentation_name$ == "pitchchange")
3294
+ @option_get: "real", "threshold=", "pc_threshold", "2", .cmda$
3295
+ endif
3296
+ if (segmentation_name$ == "pitchterrace")
3297
+ @option_get: "real", "threshold=", "pc_threshold", "2", .cmda$
3298
+ @option_get: "real", "mindur=", "mindur_terrace", "0", .cmda$
3299
+ endif
3300
+ volatile = 1 ; avoid reading and overwriting pitch and intensity of BP filtered speech
3301
+ @msg: " 'newline$'Batch command segmentation: method='segmentation_name$' fc_low='fc_low:0' fc_high='fc_high:0' mindiff='mindiff' leftdiff='diff_left' time_step='time_step' output_suffix=<'output_suffix$'>"
3302
+ for .ifile to .nfiles
3303
+ selectObject: filelistID
3304
+ .fname$ = Get string: .ifile
3305
+ @msg: "Processing file <'.fname$'>"
3306
+ @process_one_input_file: pmf_t1, pmf_t2, .ifile, .nfiles, .fname$
3307
+ endfor
3308
+ @cleanup_global
3309
+ removeObject: filelistID
3310
+ @batch_cmd_ready: "segmentation"
3311
+ endproc
3312
+
3313
+
3314
+ # Batch command "calculate_duration_data"
3315
+ # arguments: option_name, default_value
3316
+ # file= "" ; input file or files specified by a regular expression
3317
+ # unit= "sound" ; speech segment for which to obtain duration data: sound, syllable...
3318
+ # collect= "yes" ; collect results of multiple input files into same output file
3319
+ # out= default ; output filename, default = "_dur_<unit>.txt"
3320
+ procedure calculate_duration_data: .cmda$
3321
+ @batch_cmd_start: "calculate_duration_data", .cmda$
3322
+ @initialization_main
3323
+ ; Specify the input filename as TextGrid file
3324
+ @option_get: "word", "file=", "filespec", "", .cmda$
3325
+ @option_get: "word", "unit=", "unit", "sound", .cmda$
3326
+ ; Collect output = append results of multiple input files in one output file
3327
+ @option_get: "boolean", "collect=", "collect_output", "yes", .cmda$
3328
+ ; @option_get: "word", "out=", collectfile, "_dur_'unit$'.txt", .cmda$
3329
+
3330
+ @check_regex: filespec$
3331
+ @files_get_regex: "filelistID", filespec$
3332
+ selectObject: filelistID
3333
+ .nfiles = Get number of strings
3334
+ if (.nfiles == 0)
3335
+ @msg: "No input files found for <'filespec$'>"
3336
+ endif
3337
+ ; prepare default filename for outputfile
3338
+ @fname_parts: filespec$
3339
+ indir$ = result4$
3340
+ @option_get: "word", "out=", "outfname", "'indir$'_dur_'unit$'.txt", .cmda$
3341
+ @msg: "calculate_duration_data: filespec='filespec$', '.nfiles' files, collect='collect_output', outfname='outfname$'"
3342
+ for .j to .nfiles
3343
+ selectObject: filelistID
3344
+ .fname$ = Get string: .j
3345
+ @fname_parts: .fname$
3346
+ .basename$ = result2$
3347
+ ;.fname$ = indir$ + .basename$ + ".TextGrid"
3348
+ @msg: "file='.j' <'.fname$'> out='outfname$'"
3349
+ dataID = Read from file: .fname$
3350
+ @corpus_conversion: 1, "dataID", corpus$
3351
+ if (unit$ = "sound")
3352
+ .f = .j
3353
+ if (not collect_output)
3354
+ .f = 1
3355
+ endif
3356
+ @duration_data_from_textgrid: dataID, .f, unit$, outfname$, .basename$
3357
+ endif
3358
+ removeObject: dataID
3359
+ endfor
3360
+ removeObject: filelistID
3361
+ @batch_cmd_ready: "calculate_duration_data"
3362
+ endproc
3363
+
3364
+
3365
+ procedure convert_eps .cmda$
3366
+ # Assumes Ghostscript, nconvert and pdftk are installed.
3367
+ # nconvert is required when converting to GIF format. http://www.xnview.com/
3368
+ # pdftk is required when creating multipage PDF files. http://www.pdflabs.com/tools/pdttk-the-pdf-toolkit/
3369
+ ; Specify the filename as eps files
3370
+ call option_get word file= filespec "*.eps" '.cmda$'
3371
+ call fname_parts 'filespec$'
3372
+ basename$ = result2$
3373
+ indir$ = result4$
3374
+ call option_get word outfname= outfname "'basename$'.pdf" '.cmda$'
3375
+ call option_get word format= graphics_format "JPG" '.cmda$'
3376
+ call option_check word graphics_format :PNG:GIF:JPG:PDF:multipage_PDF: 'graphics_format$'
3377
+ call option_get word resolution= resolution "300" '.cmda$'
3378
+ call option_check word resolution :72:96:120:300:600: 'resolution$'
3379
+ call option_get word outdir= outdir "<same_as_input>" '.cmda$'
3380
+ if (outdir$ = "<same_as_input>")
3381
+ outdir$ = indir$
3382
+ else
3383
+ call fname_parts 'outdir$'
3384
+ outdir$ = result4$
3385
+ endif
3386
+ Create Strings as file list... filelist 'filespec$'
3387
+ .nfiles = Get number of strings
3388
+ pdf_files$ = ""
3389
+ call msg convert_eps: filespec='filespec$', '.nfiles' files, outdir='outdir$'
3390
+ call msg convert_eps: format='graphics_format$'
3391
+ eps_pdf$ = path_ghostscript$ + " -dNOPAUSE -dBATCH -dQUIET -sDEVICE=pdfwrite"
3392
+ eps_jpg$ = path_ghostscript$ + " -dNOPAUSE -dBATCH -dQUIET -sDEVICE=jpeg -sEPSCrop"
3393
+ eps_png$ = path_ghostscript$ + " -dNOPAUSE -dBATCH -dQUIET -sDEVICE=png16 -sEPSCrop"
3394
+ png_gif$ = path_nconvert$
3395
+ for .j to .nfiles
3396
+ select Strings filelist
3397
+ .fname$ = Get string... .j
3398
+ call fname_parts '.fname$'
3399
+ basename$ = result2$
3400
+ pdf_files$ = pdf_files$ + "'basename$'.pdf "
3401
+ src$ = indir$ + basename$
3402
+ dst$ = outdir$ + basename$
3403
+ res = 'resolution$'
3404
+ if (graphics_format$ = "GIF")
3405
+ system 'eps_png$' -r'res' -sOutputFile="'dst$'.png" "'src$'.eps"
3406
+ system 'png_gif$' -quiet -out gif 'dst$'.png
3407
+ filedelete 'dst$'.png
3408
+ elsif (graphics_format$ = "PNG")
3409
+ system 'eps_png$' -r'res' -sOutputFile="'dst$'.png" "'src$'.eps"
3410
+ elsif (index (graphics_format$, "PDF")) ; PDF or multipage PDF
3411
+ if (res == 96 or res == 120)
3412
+ @msg: "Invalid resolution for PDF. Using 300 dpi."
3413
+ res = 300
3414
+ endif
3415
+ ;printline command= system 'eps_pdf$' -r'res' -sOutputFile="'dst$'.pdf" "'src$'.eps"
3416
+ system 'eps_pdf$' -r'res' -sOutputFile="'dst$'.pdf" "'src$'.eps"
3417
+ elsif (graphics_format$ = "JPG")
3418
+ if (res == 96 or res == 120)
3419
+ @msg: "Invalid resolution for JPEG. Using 300 dpi."
3420
+ endif
3421
+ system 'eps_jpg$' -r'res' -sOutputFile="'dst$'.jpg" "'src$'.eps"
3422
+ endif
3423
+ endfor
3424
+ if (index (graphics_format$, "multipage"))
3425
+ ; use wildcard expansion by pdftk in order to avoid command line buffer overflow
3426
+ ;pdf_out$ = replace_regex$ (filespec$, "[0-9\*]*\.eps", "\.pdf", 1)
3427
+ pdf_out$ = outdir$ + "/" + outfname$
3428
+ pdf_out$ = replace$ (pdf_out$, "//", "/", 0)
3429
+ pdf_in$ = replace_regex$ (filespec$, "\.eps", "\.pdf", 1)
3430
+ printline convert_eps: system pdftk 'pdf_in$' cat output 'pdf_out$'
3431
+ system pdftk 'pdf_in$' cat output 'pdf_out$'
3432
+ endif
3433
+ select Strings filelist
3434
+ Remove
3435
+ @msg: "Batch command convert_eps Ready"
3436
+ endproc
3437
+
3438
+
3439
+ procedure corpus_open: .cmda$
3440
+ @initialization_main
3441
+ ; Load signal files and corresponding TextGrid files. Specify filenames as wav files regular expression.
3442
+ @option_get: "word", "file=", "inputfname", ".*.wav$", .cmda$
3443
+
3444
+ if (variableExists ("corpus_home$"))
3445
+ if (variableExists ("corpus_subdir_sound$"))
3446
+ inputfname$ = corpus_home$ + corpus_subdir_sound$ + inputfname$
3447
+ else
3448
+ inputfname$ = corpus_home$ + inputfname$
3449
+ endif
3450
+ endif
3451
+ @files_get_regex: "filelistID", inputfname$
3452
+ selectObject: filelistID
3453
+ .nrofFiles = Get number of strings
3454
+ if (.nrofFiles == 0)
3455
+ @msg: "No input files found for <'inputfname$'>"
3456
+ endif
3457
+ for .j to .nrofFiles
3458
+ selectObject: filelistID
3459
+ .fname$ = Get string: .j
3460
+ @fname_parts: .fname$
3461
+ .basename$ = result2$
3462
+ Open long sound file: .fname$
3463
+ .dur = Get total duration
3464
+ @msg: "'.basename$''tab$''.dur:3'"
3465
+ .tg$ = .basename$ + ".TextGrid"
3466
+ if (variableExists ("corpus_home$"))
3467
+ .tg$ = corpus_home$ + .basename$ + ".TextGrid"
3468
+ if (variableExists ("corpus_subdir_tg$"))
3469
+ .tg$ = corpus_home$ + corpus_subdir_tg$ + .basename$ + ".TextGrid"
3470
+ endif
3471
+ endif
3472
+ if (fileReadable (.tg$))
3473
+ Read from file: .tg$
3474
+ endif
3475
+ endfor
3476
+ removeObject: filelistID
3477
+ @msg: "Batch command corpus_open Ready"
3478
+ endproc
3479
+
3480
+
3481
+ procedure validate_tier: .cmda$
3482
+ @initialization_main
3483
+ @option_get: "word", "file=", "inputfname", ".*.TextGrid$", .cmda$
3484
+ ; call option_get word tiertype= tiertype "phoneme" '.cmda$'
3485
+
3486
+ if (variableExists ("corpus$") and length (corpus$) and variableExists ("corpus_home$"))
3487
+ if (variableExists ("corpus_subdir_tg$"))
3488
+ inputfname$ = corpus_home$ + corpus_subdir_tg$ + inputfname$
3489
+ else
3490
+ inputfname$ = corpus_home$ + inputfname$
3491
+ endif
3492
+ endif
3493
+ @files_get_regex: "filelistID", inputfname$
3494
+ selectObject: filelistID
3495
+ .nrofFiles = Get number of strings
3496
+ if (.nrofFiles == 0)
3497
+ @msg: "No input files found for <'inputfname$'>"
3498
+ endif
3499
+ for .j to .nrofFiles
3500
+ selectObject: filelistID
3501
+ .tg$ = Get string: .j
3502
+ if (fileReadable (.tg$))
3503
+ @validate_phoneme_tier: .tg$
3504
+ endif
3505
+ endfor
3506
+ removeObject: filelistID
3507
+ @msg: "Batch command validate_tier Ready"
3508
+ endproc
3509
+
3510
+
3511
+ procedure find_nucleus: .type$, .t1, .t2, .forward
3512
+ ; Find nucleus of given <.type$> in time range <.t1>..<.t2>
3513
+ ; <.type$> "a" = valid_nucleus, "-" = rejected nucleus
3514
+ ; <.forward> search from .t1 to .t2, otherwise from .t2 to .t1
3515
+ ; Return index of nucleus (in nucleus_tier) in variable <result> or 0 if not found
3516
+ result = 0
3517
+ selectObject: nucleiID
3518
+ .endt = Get end time
3519
+ .startt = Get start time
3520
+ .endt = min (.endt, .t2)
3521
+ .startt = max (.startt, .t1)
3522
+ if (.forward)
3523
+ .t = .t1
3524
+ else
3525
+ .t = .t2 - 0.002
3526
+ endif
3527
+ while ((.forward and .t < .endt) or (.forward == 0 and .t > .startt) and result == 0)
3528
+ .i = Get interval at time: nucleus_tier, .t
3529
+ .s$ = Get label of interval: nucleus_tier, .i
3530
+ if (.s$ == .type$)
3531
+ result = .i
3532
+ endif
3533
+ if (.forward)
3534
+ .t = Get end time of interval: nucleus_tier, .i
3535
+ else
3536
+ .t = .startt
3537
+ if (.i > 1)
3538
+ .t = Get start time of interval: nucleus_tier, .i-1
3539
+ endif
3540
+ endif
3541
+ endwhile
3542
+ endproc
3543
+
3544
+
3545
+ procedure detect_hesitations: .t1a, .t2a, .method$
3546
+ # Detect hesitations, either from hesitation tier in TextGrid of from acoustic cues and phoneme identity.
3547
+ # <.method>
3548
+ # "phon+pros" use phoneme label and prosodic features (phon_dur >= 0.3 && intrasyll <= 0 && intrasyll > -3 ST)
3549
+ # phoneme is "\oe", "\o/", "9", "@" or "2"
3550
+ # "words" use word tier from annotation TextGrid, where hesitations are labeled "euh" or "eh"
3551
+ # "all" combine above methods
3552
+ # "none" don't attempt detection
3553
+ # "annotation" use hesitation tier from annotation TextGrid, where a hesitation is marked as "H"
3554
+ @debug_msg: "detect_hesitations: entry, method='.method$'"
3555
+
3556
+ if segfile_available and (.method$ = "phon+pros" or .method$ = "words" or .method$ = "all")
3557
+ @intervals_from_time_range: nucleiID, nucleus_tier, .t1a, .t2a, "first_interval", "last_interval"
3558
+ for .j from first_interval to last_interval
3559
+ selectObject: nucleiID
3560
+ @is_nucleus: .j
3561
+ if (result)
3562
+ .hesit = 0 ; default = no hesitation detected
3563
+ .t1 = Get start time of interval: nucleus_tier, .j
3564
+ .t2 = Get end time of interval: nucleus_tier, .j
3565
+ .tmid = .t1 + (.t2-.t1)/2
3566
+ if ((.method$ = "phon+pros" or .method$ = "all") and phones_available)
3567
+ @interval_from_time: nucleiID, phone_tier, .tmid, "i"
3568
+ .label$ = Get label of interval: phone_tier, i
3569
+ .label$ = replace_regex$(.label$, """", "", 0) ; remove double quotes from match string
3570
+ .px1 = Get start time of interval: phone_tier, i
3571
+ .px2 = Get end time of interval: phone_tier, i
3572
+ .phon_dur = .px2-.px1
3573
+ .s$ = Get label of interval: pointer_tier, .j
3574
+ .pj = '.s$'
3575
+ selectObject: nucldatID
3576
+ .intST = Get value: .pj, j_intrasyl
3577
+ if (index ("=\o/=\oe=9=2=@=", "='.label$'=")
3578
+ ... and .phon_dur >= 0.3 and .intST <= 0 and abs(.intST) < 3)
3579
+ .hesit = 1
3580
+ endif
3581
+ endif
3582
+ if ((.method$ = "words" or .method$ = "all") and word_tier_in > 0)
3583
+ @interval_from_time: segmentationID, word_tier_in, .tmid, "jw"
3584
+ .label$ = Get label of interval: word_tier_in, jw
3585
+ if (index ("=euh=eh=", "='.label$'="))
3586
+ .hesit = 1
3587
+ endif
3588
+ endif
3589
+ if ((.method$ = "annotation" or .method$ = "all") and hesitation_available)
3590
+ @interval_from_time: nucleiID, hesitation_tier, .tmid, "jw"
3591
+ .label$ = Get label of interval: hesitation_tier, jw
3592
+ if (.label$ == "H")
3593
+ .hesit = 1
3594
+ endif
3595
+ endif
3596
+ if (.hesit)
3597
+ ; @msg: "detect_hesitations: hesitation detected at '.t1:3', method='.method$'"
3598
+ @interval_from_time: nucleiID, hesitation_tier, .tmid, "i"
3599
+ Set interval text: hesitation_tier, i, "H"
3600
+ endif
3601
+ endif
3602
+ endfor
3603
+ hesitation_available = 1
3604
+ endif
3605
+
3606
+ ; Copy info in hesitation tier to column of nucldatID
3607
+ selectObject: nucldatID
3608
+ .nrows = Get number of rows
3609
+ for .j to .nrows
3610
+ .t1n = Get value: .j, j_nucl_t1
3611
+ .t2n = Get value: .j, j_nucl_t2
3612
+ selectObject: nucleiID
3613
+ .i = Get interval at time: hesitation_tier, .t1n+(.t2n-.t1n)/2
3614
+ .label$ = Get label of interval: hesitation_tier, .i
3615
+ .hesit = 0
3616
+ if (.label$ = "H")
3617
+ .hesit = 1
3618
+ endif
3619
+ selectObject: nucldatID
3620
+ Set value: .j, j_hesitation, .hesit
3621
+ endfor
3622
+ @debug_msg: "detect_hesitations: exit"
3623
+ endproc
3624
+
3625
+
3626
+ procedure process_cprom: .grid
3627
+ @tier_number_by_name: .grid, "^ss$"
3628
+ if (result)
3629
+ Remove tier: result
3630
+ endif
3631
+ @tier_number_by_name: .grid, "phones"
3632
+ if (result) ; Remove labels for pauses ("_"), respirations ("*"), etc. "+"
3633
+ Replace interval text... result 0 0 "[\+\*_#%]" "" Regular Expressions
3634
+ endif
3635
+ @tier_number_by_name: .grid, "syll"
3636
+ .syll_tier = result
3637
+ if (.syll_tier) ; Simplify complex labels for pauses, respirations, etc. to "_"
3638
+ Replace interval text... .syll_tier 0 0 "[\+\*_#]+" "_" Regular Expressions
3639
+ endif
3640
+ ; Create prominence and hesitation tiers from "delivery" tier in TextGrid <.grid>
3641
+ if (0)
3642
+ @tier_number_by_name: .grid, "delivery"
3643
+ if (result)
3644
+ .deliv_tier = result
3645
+ .nt = Get number of tiers
3646
+ .prom_tier = .nt + 1
3647
+ .hes_tier = .nt + 2
3648
+ .ni = Get number of intervals: .deliv_tier
3649
+ Duplicate tier: .syll_tier, .prom_tier, "prom"
3650
+ Duplicate tier: .syll_tier, .hes_tier, "hes"
3651
+ for .j to .ni
3652
+ .label$ = Get label of interval: .deliv_tier, .j
3653
+ if (index (.label$, "P"))
3654
+ .s$ = "S"
3655
+ elsif (index (.label$, "p"))
3656
+ .s$ = "W"
3657
+ else
3658
+ .s$ = "0"
3659
+ endif
3660
+ Set interval text: .prom_tier, .j, .s$
3661
+ if (index (.label$, "z"))
3662
+ .s$ = "H"
3663
+ else
3664
+ .s$ = ""
3665
+ endif
3666
+ Set interval text: .hes_tier, .j, .s$
3667
+ endfor
3668
+ endif
3669
+ endif
3670
+ @tier_number_by_name: .grid, "prom"
3671
+ if (result) ; Remove or rename labels
3672
+ Replace interval text... result 0 0 "0" "" Regular Expressions
3673
+ Replace interval text... result 0 0 "W" "p" Regular Expressions
3674
+ Replace interval text... result 0 0 "S" "P" Regular Expressions
3675
+ endif
3676
+ @tier_number_by_name: .grid, "rg"
3677
+ if (result) ; Remove labels
3678
+ Replace interval text... result 0 0 ".*" "" Regular Expressions
3679
+ endif
3680
+ endproc
3681
+
3682
+
3683
+ procedure lpa_sampa: .grid, .tiername$
3684
+ # Convert LPA (Limsi Phonetic Alphabet) to SAMPA
3685
+ @tier_number_by_name: .grid, .tiername$
3686
+ .tier = result
3687
+ if (result)
3688
+ ; first 3 lines deal with pause, incomprehensible sound and respiration
3689
+ Replace interval texts: .tier, 0, 0, "\.", "_", "Regular Expressions"
3690
+ Replace interval texts: .tier, 0, 0, "\*", "", "Regular Expressions"
3691
+ Replace interval texts: .tier, 0, 0, "H", "", "Regular Expressions"
3692
+ ; next line deals with intervals containing several non speech elements
3693
+ Replace interval texts: .tier, 0, 0, "_+", "_", "Regular Expressions"
3694
+ Replace interval texts: .tier, 0, 0, "@", "2", "Regular Expressions"
3695
+ Replace interval texts: .tier, 0, 0, "x", "@", "Regular Expressions"
3696
+ Replace interval texts: .tier, 0, 0, "h", "H", "Regular Expressions"
3697
+ Replace interval texts: .tier, 0, 0, "I", "e~", "Regular Expressions"
3698
+ Replace interval texts: .tier, 0, 0, "O", "o~", "Regular Expressions"
3699
+ Replace interval texts: .tier, 0, 0, "A", "a~", "Regular Expressions"
3700
+ Replace interval texts: .tier, 0, 0, "c", "O", "Regular Expressions"
3701
+ Replace interval texts: .tier, 0, 0, "X", "9", "Regular Expressions"
3702
+ Replace interval texts: .tier, 0, 0, "r", "R", "Regular Expressions"
3703
+ Replace interval texts: .tier, 0, 0, "N", "J", "Regular Expressions"
3704
+ endif
3705
+ endproc
3706
+
3707
+
3708
+ procedure tier_trim_nucleus: .grid, .tier, .dtleft, .dtright
3709
+ ; Change boundaries of nucleus <.tier>, trimming left and right by amount <.dtleft> and <.dtright>, when duration remaining nucleus > 0.1 s
3710
+ @debug_msg: "tier_trim_nucleus: entry, .tier='.tier', .dtleft='.dtleft:3'"
3711
+ selectObject: .grid
3712
+ .n = Get number of tiers
3713
+ if (.tier <= .n)
3714
+ .ni = Get number of intervals: .tier
3715
+ .i = 1
3716
+ while (.i <= .ni)
3717
+ .s$ = Get label of interval: .tier, .i
3718
+ if (.s$ = "a")
3719
+ .t1 = Get starting point: .tier, .i
3720
+ .t2 = Get end point: .tier, .i
3721
+ if (.dtleft > 0 and .t2-.t1 > .dtleft) ; left side
3722
+ .t = min (.t1 + .dtleft, .t2 - 0.1)
3723
+ if (.t > .t1)
3724
+ Set interval text: .tier, .i, ""
3725
+ Insert boundary: .tier, .t
3726
+ Set interval text: .tier, .i+1, .s$
3727
+ .ni += 1
3728
+ .t1 = .t
3729
+ .i += 1
3730
+ endif
3731
+ endif
3732
+ if (.dtright > 0 and .t2-.t1 > .dtright) ; right side
3733
+ .t = max (.t1 + 0.1, .t2 - .dtright)
3734
+ if (.t < .t2)
3735
+ Set interval text: .tier, .i, ""
3736
+ Insert boundary: .tier, .t
3737
+ Set interval text: .tier, .i, .s$
3738
+ .ni += 1
3739
+ .i += 1
3740
+ endif
3741
+ endif
3742
+ endif
3743
+ .i += 1
3744
+ endwhile
3745
+ endif
3746
+ endproc
3747
+
3748
+
3749
+ procedure textgrid_disable_textstyle: .gridID
3750
+ selectObject: .gridID
3751
+ .n = Get number of tiers
3752
+ for .tier to .n
3753
+ @tier_disable_textstyle: .gridID, .tier
3754
+ endfor
3755
+ endproc
3756
+
3757
+
3758
+ procedure tier_disable_textstyle: .gridID, .tier
3759
+ selectObject: .gridID
3760
+ .n = Get number of intervals: .tier
3761
+ for .interval to .n
3762
+ .label$ = Get label of interval: .tier, .interval
3763
+ .label$ = replace$ (.label$, "_", "\_ ", 0) ; avoid interpretation of underscore as subscript
3764
+ .label$ = replace$ (.label$, "%", "\% ", 0) ; avoid interpretation of percent as italic text style
3765
+ Set interval text: .tier, .interval, .label$
3766
+ endfor
3767
+ endproc
3768
+
3769
+
3770
+ procedure tier_merge_intervals_except: .gridID, .tier, .except$
3771
+ ; Merge adjacent intervals other than those with label <.except$>
3772
+ selectObject: .gridID
3773
+ .n = Get number of intervals: .tier
3774
+ .i = 1
3775
+ while (.i < .n)
3776
+ .label1$ = Get label of interval: .tier, .i
3777
+ .label2$ = Get label of interval: .tier, .i+1
3778
+ if (.label1$ <> .except$ and .label2$ <> .except$)
3779
+ Remove right boundary: .tier, .i
3780
+ ; Set interval text: .tier, .i, ""
3781
+ .n -= 1
3782
+ else
3783
+ .i += 1
3784
+ endif
3785
+ endwhile
3786
+ endproc
3787
+