pelican-nlp 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,773 @@
1
+ # polytonia.praat -- include file for prosogram
2
+
3
+ # This file is included by prosogram.praat. It isn't a stand-alone script. Use prosogram.praat instead.
4
+ # Author: Piet Mertens
5
+ # For documentation see:
6
+ # http://sites.google.com/site/prosogram/
7
+ # http://sites.google.com/site/prosogram/polytonia
8
+ # http://sites.google.com/site/prosogram/userguide#polytonia
9
+
10
+ # 2021-12-10 bugfix
11
+ # Last modifications: 2020-02-27, 2021-12-10
12
+
13
+
14
+ # Procedure hierarchy
15
+ #
16
+ # polytonia_main
17
+ # load_speaker_range fetch pitch range parameters of current speaker into current context
18
+ # get_tier_label_for_nucleus get label for interval on specified tier for specified interval on nucleus tier
19
+ # get_values_for_nucleus
20
+ # find_previous_nucleus get index of preceding valid nucleus by same speaker
21
+ # intrasyllabic_contour determine contour type (level, rise, fall...) of current nucleus
22
+ # polytonia_sustain decide whether contour is labeled "sustain"
23
+ # polytonia_extrema assign pitch level T (top) or B (bottom), when pitch is close to pitch range extrema
24
+ # polytonia_localchange assign pitch level L (low), M (mid), H (high), from pitch changes in nucleus and left context
25
+ # polytonia_infer assign pitch level from intrasyllabic contour and pitch range
26
+ # polytonia_island assign pitch level in short interpausal stretch with unassigned pitch level, from pitch range
27
+ # infer_pitchlevel infer pitch level from pitch range
28
+ # polytonia_plateau
29
+ # Debugging info added to contour label (partly discarded at end of process)
30
+ # o pitch out of valid range; don't use this syllable
31
+ # l{ intrasyllabic large rise/fall starts in lower half of pitch range
32
+ # m{ intrasyllabic large rise starts just below median of pitch range
33
+ # h{ intrasyllabic large fall starts in upper quarter of pitch range
34
+ # l{{ intrasyllabic small rise starts slightly above level of previous nucleus, which is L
35
+ # h4 intrasyllabic level contour in upper quarter of pitch range
36
+
37
+
38
+ ; @logging_start: "reset debug", "_log.txt"
39
+
40
+
41
+ # Polytonia attemps to compute pitch level and pitch contour label for each syllable, using the prosodic properties (stored in table nucldatID) of these syllables.
42
+ # These labels are stored in a tier with syllable-sized or nucleus-sized intervals (depending on the availability of syllable boundaries). This tier is created in prosomain.praat, see "polytonia_annotation".
43
+ # The input intervals are provided by the Prosogram segmentation selected by the user (possibly based on corpus annotation). Pauses are marked as intervals with label "_".
44
+
45
+
46
+ procedure polytonia_main: .start_time, .end_time
47
+ ; Compute Polytonia labels for speech in range <.start_time> .. <.end_time>.
48
+ @debug_msg: "polytonia_main: entry"
49
+
50
+ ; Initialize polytonia_tier
51
+ ; Use syllable-sized or nucleus-sized intervals for polytonia tiers
52
+ if (segm_type == segm_msyllvow or segm_type == segm_msyllpeak or segm_type == segm_mrhyme)
53
+ .src_tier = syllable_tier ; use syllable boundaries for pitch labels
54
+ elsif (segm_type == segm_vnucl and segfile_available and syllables_available)
55
+ .src_tier = syllable_tier
56
+ else
57
+ .src_tier = nucleus_tier ; use nucleus boundaries for pitch labels
58
+ endif
59
+ selectObject: nucleiID
60
+ Remove tier: polytonia_tier ; This tier is empty when TextGrid is created
61
+ ; Copy intervals from <.src_tier> to polytonia_tier
62
+ Duplicate tier: .src_tier, polytonia_tier, "polytonia"
63
+ if (.src_tier == nucleus_tier)
64
+ @tier_merge_intervals_except: nucleiID, polytonia_tier, "a"
65
+ endif
66
+ @tier_clear_text: nucleiID, polytonia_tier
67
+
68
+ current_speaker = 0 ; no speaker identified yet; speakers are numbered from 1 to N; force initialization
69
+ .ctxt_maxdur = 0.5 ; max duration of left context used for detection of pitch changes
70
+ ; Steps are applied to the nucleus intervals stored in nucleus_tier of nucleiID
71
+ @intervals_from_time_range: nucleiID, nucleus_tier, .start_time, .end_time, "i_first", "i_last"
72
+ @intrasyllabic_contour: i_first, i_last, polytonia_tier
73
+ @polytonia_sustain: i_first, i_last, polytonia_tier
74
+ @polytonia_extrema: i_first, i_last, polytonia_tier
75
+ @polytonia_localchange: i_first, i_last, polytonia_tier, .ctxt_maxdur
76
+ @polytonia_infer: i_first, i_last, polytonia_tier, .ctxt_maxdur
77
+ ; Duplicate tier for Polytonia version 2
78
+ Remove tier: polytonia2_tier
79
+ Duplicate tier: polytonia_tier, polytonia2_tier, "polytonia-iw"
80
+ @polytonia_extrapolate: i_first, i_last, polytonia_tier, 1, .ctxt_maxdur
81
+ @polytonia_extrapolate: i_first, i_last, polytonia_tier, 0, .ctxt_maxdur
82
+ @polytonia_plateau: i_first, i_last, polytonia_tier
83
+ @polytonia_extrapolate: i_first, i_last, polytonia_tier, 0, .ctxt_maxdur
84
+ @polytonia_postproc: polytonia_tier, .start_time, .end_time
85
+
86
+ ; Polytonia-iw (isolated words) adds step for short interpausal stretches with unassigned pitch level
87
+ @intervals_from_time_range: nucleiID, nucleus_tier, .start_time, .end_time, "i_first", "i_last"
88
+ @polytonia_island: i_first, i_last, polytonia2_tier
89
+ @polytonia_extrapolate: i_first, i_last, polytonia2_tier, 1, .ctxt_maxdur
90
+ @polytonia_extrapolate: i_first, i_last, polytonia2_tier, 0, .ctxt_maxdur
91
+ @polytonia_plateau: i_first, i_last, polytonia2_tier
92
+ @polytonia_extrapolate: i_first, i_last, polytonia2_tier, 0, .ctxt_maxdur
93
+ @polytonia_postproc: polytonia2_tier, .start_time, .end_time
94
+ @debug_msg: "polytonia_main: exit"
95
+ endproc
96
+
97
+
98
+ procedure load_speaker_range: .speaker
99
+ ; Set global variables for use by following procedure calls.
100
+ ; Variables in Hz: median, bottom, top, q1_, q3_
101
+ ; Variables in ST: range_, large_, medium_, flat_, upper_range_, lower_range_
102
+ ; Variables (other): current_speaker, nnucl_speaker
103
+ @debug_msg: "load_speaker_range: .speaker='.speaker'"
104
+ if (.speaker <> current_speaker)
105
+ current_speaker = .speaker
106
+ selectObject: profileID
107
+ .row = .speaker
108
+ median = Get value: .row, j_pitch_median_Hz
109
+ bottom = Get value: .row, j_pitch_bottom_Hz
110
+ top = Get value: .row, j_pitch_top_Hz
111
+ range_ = Get value: .row, j_pitch_range
112
+ nnucl_speaker = Get value: .row, j_nrofnucl
113
+ q1_ = Get value: .row, j_rawf0_p25
114
+ q3_ = Get value: .row, j_rawf0_p75
115
+ upper_range_ = 12 * log2 (top/median)
116
+ lower_range_ = 12 * log2 (median/bottom)
117
+ flat_ = 1.2 ; ignored intersyllabic pitch interval
118
+ if (range_ >= 8.5) ; "normal" range
119
+ large_ = 4.5 ; large intersyllabic pitch interval
120
+ medium_ = 3 ; medium intersyllabic pitch interval
121
+ elsif (range_ >= 7) ; narrow range
122
+ large_ = 3.5 ; large intersyllabic pitch interval
123
+ medium_ = 2.5 ; medium intersyllabic pitch interval
124
+ else ; very narrow range
125
+ large_ = 3.2 ; large intersyllabic pitch interval
126
+ medium_ = 2.5 ; medium intersyllabic pitch interval
127
+ endif
128
+ endif
129
+ @debug_msg: "load_speaker_range: exit"
130
+ endproc
131
+
132
+
133
+ procedure get_tier_label_for_nucleus: .i_nucl, .tier
134
+ ; Return label of interval on tier <.tier> for nucleus interval with index <.i_nucl>.
135
+ ; <result1> out: index into <.tier>, or 0 if not found
136
+ ; <label$> out: countour label
137
+ selectObject: nucleiID
138
+ .x1 = Get start time of interval: nucleus_tier, .i_nucl
139
+ .x2 = Get end time of interval: nucleus_tier, .i_nucl
140
+ result1 = Get interval at time: .tier, (.x1+(.x2-.x1)/2)
141
+ label$ = Get label of interval: .tier, result1
142
+ endproc
143
+
144
+
145
+ procedure get_values_for_nucleus: .i
146
+ ; Returns following values in global variables <pj>, <vf0lo>, <vf0hi>, <vf0start>, <vf0end>, <vhesit>
147
+ selectObject: nucleiID
148
+ .s$ = Get label of interval: pointer_tier, .i
149
+ pj = number(.s$)
150
+ selectObject: nucldatID
151
+ ; lopitch, hipitch = f0 in Hz, after stylization
152
+ vf0lo = Get value: pj, lopitch
153
+ vf0hi = Get value: pj, hipitch
154
+ vf0start = Get value: pj, j_f0_start
155
+ vf0end = Get value: pj, j_f0_end
156
+ vhesit = Get value: pj, j_hesitation
157
+ vspeakerid = Get value: pj, j_speaker_id
158
+ endproc
159
+
160
+
161
+ procedure intrasyllabic_contour: .i_first, .i_last, .dest_tier
162
+ ; Determine contour type (level, rise, fall...) of current nucleus
163
+ ; <.i_first>...<.i_last> indices within nucleus_tier, on which to apply prodecure
164
+ ; <.dest_tier> tier where output labels are stored
165
+ @debug_msg: "intrasyllabic_contour: entry"
166
+ for .j from .i_first to .i_last
167
+ selectObject: nucleiID
168
+ @is_nucleus: .j
169
+ if (result)
170
+ .x1 = Get start time of interval: nucleus_tier, .j
171
+ .x2 = Get end time of interval: nucleus_tier, .j
172
+ @get_values_for_nucleus: .j
173
+ @load_speaker_range: vspeakerid
174
+ selectObject: stylID
175
+ .i = Get nearest index from time: .x1
176
+ .i2 = Get nearest index from time: .x2
177
+ .label$ = ""
178
+ .nsegments = .i2-.i ; nrof tonal segments
179
+ while (.i < .i2) ; for each tonal segment
180
+ selectObject: stylID
181
+ .xL = Get time from index: .i
182
+ .xR = Get time from index: .i+1
183
+ .yL = Get value at time: .xL
184
+ .yR = Get value at time: .xR
185
+ .intST = 12 * log2 (.yR/.yL) ; pitch interval (in ST) in current tonal segment
186
+ if (.intST >= large_)
187
+ .label$ += "R"
188
+ elsif (.intST >= medium_)
189
+ .label$ += "r"
190
+ elsif (.intST <= -large_)
191
+ .label$ += "F"
192
+ elsif (.intST <= -medium_)
193
+ .label$ += "f"
194
+ else
195
+ .s$ = "_"
196
+ if (.nsegments > 1 and .xR-.xL < 0.07) ; skip first short plateau !!
197
+ .s$ = ""
198
+ endif
199
+ .label$ += .s$ ; level
200
+ endif
201
+ .i += 1
202
+ endwhile
203
+ .label$ = replace_regex$(.label$, "[Rr][Rr]", "R", 0)
204
+ .label$ = replace_regex$(.label$, "[Ff][Ff]", "F", 0)
205
+ .label$ = replace_regex$(.label$, "___*", "_", 0)
206
+ ;.label$ = replace_regex$(.label$, "C.*", "C", 0)
207
+ selectObject: nucleiID
208
+ .imid = Get interval at time: .dest_tier, .x1+(.x2-.x1)/2
209
+ Set interval text: .dest_tier, .imid, .label$
210
+ endif
211
+ endfor
212
+ @debug_msg: "intrasyllabic_contour: exit"
213
+ endproc
214
+
215
+
216
+ procedure polytonia_sustain: .i_first, .i_last, .dest_tier
217
+ ; Decide whether syllabic contour receives label "S" (for "Sustain")
218
+ ; <.dest_tier> tier to be used for contour labels
219
+ .mindur_sustain = 0.25
220
+ .max_up = 1.5
221
+ .max_down = -1.5
222
+ for .j from .i_first to .i_last
223
+ selectObject: nucleiID
224
+ @is_nucleus: .j
225
+ if (result)
226
+ @get_values_for_nucleus: .j
227
+ .dur = Get value: pj, j_nucldur
228
+ .downST = Get value: pj, j_intrasyldown
229
+ .upST = Get value: pj, j_intrasylup
230
+ @get_tier_label_for_nucleus: .j, .dest_tier
231
+ .contour_index = result1
232
+ if (label$ = "_" and .dur >= .mindur_sustain and .downST > .max_down and .upST < .max_up)
233
+ ; @debug_msg: "polytonia_sustain: dur='.dur:2' downST='.downST:1' upST='.upST:1'"
234
+ selectObject: nucleiID
235
+ Set interval text: .dest_tier, .contour_index, "S"
236
+ endif
237
+ endif
238
+ endfor
239
+ endproc
240
+
241
+
242
+ procedure polytonia_localchange: .i_first, .i_last, .dest_tier, .ctxt_maxdur
243
+ ; Assign pitch levels L (low), M (mid), or H (high) from pitch changes in nucleus and in left context.
244
+ ; Obtain pitch extrema in nuclei in left context, where left context is
245
+ ; - limited to 0.5 s (<.ctxt_maxdur>) : time between end of nucleus in left context and start of target nucleus
246
+ ; - limited to 3 syllables (<.ctxt_width>)
247
+ ; - restricted to syllables pronounced by same speaker
248
+ ; Then assign H, M, or L depending on distance between start pitch and lower pitch in left context
249
+
250
+ @debug_msg: "polytonia_localchange: entry"
251
+ .ctxt_width = 3 ; max nr of syllables in left context
252
+
253
+ for .i from .i_first to .i_last
254
+ selectObject: nucleiID
255
+ .x1 = Get start time of interval: nucleus_tier, .i
256
+ .x2 = Get end time of interval: nucleus_tier, .i
257
+ @is_nucleus: .i
258
+ if (result)
259
+ speaker_interval = Get interval at time: speaker_tier, .x1
260
+ @get_values_for_nucleus: .i
261
+ ; @msg: "-- t='.x1:3' pj='pj', 'vf0lo:1', 'vf0hi:1', 'vf0start:1', hes='vhesit', sp='vspeakerid'"
262
+ .f0_start_i = vf0start ; will be used when left context does not contain valid nuclei
263
+ @load_speaker_range: vspeakerid
264
+ .ctxt_lo = 1000 ; initialize lower F0 value in context (to a very high value)
265
+ .ctxt_hi = 0 ; initialize higher F0 value in context (to a very low value)
266
+ .ctxt_nsyll = 0 ; nrof syllables in context
267
+ .t2 = .x1 ; initialize!! <.t2> will hold end of previous nucleus in repeat loop
268
+ .j = .i
269
+ repeat ; estimate pitch extrema in nuclei in left context
270
+ .j -= 1 ; preceeding interval in tier
271
+ if (.j >= .i_first)
272
+ selectObject: nucleiID
273
+ @is_nucleus: .j
274
+ if (result)
275
+ .t1 = Get start time of interval: nucleus_tier, .j ; starttime of preceeding nucleus
276
+ .t2 = Get end time of interval: nucleus_tier, .j ; endtime of preceeding nucleus
277
+ speaker_interval_j = Get interval at time: speaker_tier, .t1
278
+ if (.x1 - .t2 < .ctxt_maxdur
279
+ ... and speaker_interval == speaker_interval_j) ; same speaker
280
+ @get_values_for_nucleus: .j
281
+ if (not vhesit) ; discard data from hesitations
282
+ if (vf0lo > bottom)
283
+ .ctxt_lo = min (.ctxt_lo, vf0lo)
284
+ endif
285
+ if (vf0hi < top)
286
+ .ctxt_hi = max (.ctxt_hi, vf0hi)
287
+ endif
288
+ if (vf0lo > bottom and vf0hi < top)
289
+ .ctxt_nsyll += 1
290
+ endif
291
+ endif
292
+ endif
293
+ endif
294
+ endif
295
+ until (.ctxt_nsyll == .ctxt_width or (.x1-.t2 >= .ctxt_maxdur) or .j <= .i_first)
296
+ if (.ctxt_nsyll == 0) ; no left context found
297
+ .ctxt_lo = .f0_start_i
298
+ .ctxt_hi = .f0_start_i
299
+ endif
300
+ @debug_msg: "polytonia_localchange: context found LO='.ctxt_lo:1' HI='.ctxt_hi:1' N='.ctxt_nsyll' <--> f0start='.f0_start_i:0'"
301
+
302
+ selectObject: nucleiID
303
+ .jct = Get interval at time: .dest_tier, .x1+(.x2-.x1)/2
304
+ .label$ = Get label of interval: .dest_tier, .jct
305
+ if (index_regex (.label$, "^[_RrFfS]")) ; Level or Rise or Fall or Sustain
306
+ if (.ctxt_lo == undefined or .ctxt_lo < bottom or .ctxt_hi > top)
307
+ ; don't assign pitch level
308
+ else ; assign pitch level H or M, using pitch interval above pitch minimum in left context
309
+ .distST = 12 * log2 (.f0_start_i/.ctxt_lo)
310
+ if (.distST >= large_)
311
+ .label$ = "H" + .label$
312
+ elsif (.distST >= medium_)
313
+ .label$ = "M" + .label$
314
+ else ; assign pitch level L, using pitch interval below pitch maximum in left context
315
+ if not (.ctxt_hi == undefined or .ctxt_hi < bottom)
316
+ .distST = 12 * log2 (.f0_start_i/.ctxt_hi)
317
+ if (.distST <= -large_)
318
+ .label$ = "L" + .label$
319
+ endif
320
+ endif
321
+ endif
322
+ endif
323
+ endif
324
+ Set interval text: .dest_tier, .jct, .label$
325
+ endif
326
+ endfor
327
+ @debug_msg: "polytonia_localchange: exit"
328
+ endproc
329
+
330
+
331
+ procedure polytonia_extrema: .i_first, .i_last, .tier
332
+ ; Assign pitch levels Bottom or Top to contour label, when tonal segment starts or ends in these extreme levels.
333
+ ; Also adds creak symbol
334
+ ; <.tier> tier to be used for contour info
335
+ for .i from .i_first to .i_last
336
+ @get_tier_label_for_nucleus: .i, .tier
337
+ .contour_index = result1
338
+ .seed$ = label$
339
+ @is_nucleus: .i
340
+ if (result)
341
+ @get_values_for_nucleus: .i
342
+ pv_hi = vf0hi
343
+ pv_lo = Get value: pj, f0_min
344
+ @load_speaker_range: vspeakerid
345
+ selectObject: nucleiID
346
+ if (12 * log2 (pv_lo/bottom) <= -4) or (12 * log2 (pv_hi/top) >= 8) ; out of range -> will block pitch level interpretation
347
+ label$ = "o"
348
+ else
349
+ if (index_regex (.seed$, "[Ff]_?$") and vf0end <= bottom) ; fall ends at bottom
350
+ label$ += ",B"
351
+ endif
352
+ if (index_regex (.seed$, "^[_Rr]") and vf0start <= bottom) ; starts at bottom
353
+ label$ = "B" + label$
354
+ endif
355
+ if (range_ >= 11 and nnucl_speaker >= 200 and upper_range_ >= 8)
356
+ if (12 * log2 (vf0start/top) >= 0) ; starts at top
357
+ label$ = "T" + label$
358
+ elsif (index_regex (.seed$, "[Rr_F]$") and (12 * log2 (pv_hi/top) >= 2)) ; rise ends at top
359
+ label$ += ",T"
360
+ endif
361
+ endif
362
+ @get_creak_for_nucleus: .i
363
+ if (result)
364
+ label$ = "C" + label$
365
+ endif
366
+ endif
367
+ Set interval text: .tier, .contour_index, label$
368
+ endif
369
+ endfor
370
+ endproc
371
+
372
+
373
+ procedure polytonia_infer: .i_first, .i_last, .dest_tier, .ctxt_maxdur
374
+ ; Infer pitch level from intrasyllabic variation and speaker pitch range
375
+ ; <.dest_tier> in: tier to be used for contour label
376
+ for .i from .i_first to .i_last
377
+ selectObject: nucleiID
378
+ @is_nucleus: .i
379
+ if (result)
380
+ @get_tier_label_for_nucleus: .i, .dest_tier
381
+ .contour_index = result1
382
+ .seed$ = label$
383
+ if (index_regex (.seed$, "^[_r]*[RFrf]")) ; no pitch level assigned
384
+ .unassigned = 1
385
+ @get_values_for_nucleus: .i
386
+ @load_speaker_range: vspeakerid
387
+ selectObject: nucleiID
388
+ if (index_regex (.seed$, "^[_r]*R")) ; pitch level unassigned AND large rise
389
+ if (vf0start <= median) ; rise starts in lower half of range
390
+ .label$ = "l{" + .seed$
391
+ .unassigned = 0
392
+ elsif (vf0start <= (median + (top-median/4))) ; rise starts just above median
393
+ .label$ = "m{" + .seed$
394
+ .unassigned = 0
395
+ endif
396
+ endif
397
+ if (.unassigned and index_regex (.seed$, "^[_f]*F")); pitch level unassigned AND large fall
398
+ if (vf0end <= bottom and vf0start < median) ; towards bottom, in lower half of range
399
+ .label$ = "l{" + .seed$
400
+ .unassigned = 0
401
+ elsif (vf0start >= (median + (top-median)/2)) ; fall starts in upper quarter of range
402
+ .label$ = "h{" + .seed$
403
+ .unassigned = 0
404
+ endif
405
+ endif
406
+ if (.unassigned and index_regex (.seed$, "^r")) ; pitch level unassigned AND small rise
407
+ @find_previous_nucleus: .i, .i_first, .ctxt_maxdur
408
+ if (result)
409
+ @get_tier_label_for_nucleus: result, .dest_tier
410
+ if (index_regex (label$, "^L")) ; polytonia label of previous syllable
411
+ @get_values_for_nucleus: result
412
+ .distST = 12 * log2 (vf0start/result3) ; result3 = f0_start of prev syllable
413
+ if (.distST >= 0 and .distST < medium_) ; slightly above prev syll which is L
414
+ selectObject: nucleiID
415
+ .label$ = "l{{" + .seed$
416
+ .unassigned = 0
417
+ endif
418
+ endif
419
+ endif
420
+ endif
421
+ if (.unassigned and index_regex (.seed$, "^_")) ; pitch level unassigned AND level
422
+ if (vf0start >= (median + (top-median)/2)) ; starts in upper quarter of range
423
+ .label$ = "h4" + .seed$
424
+ .unassigned = 0
425
+ endif
426
+ endif
427
+ if (.unassigned == 0) ; success
428
+ Set interval text: .dest_tier, .contour_index, .label$
429
+ endif
430
+ endif
431
+ endif
432
+ endfor
433
+ endproc
434
+
435
+
436
+ procedure infer_pitchlevel: .iNucl, .dest_tier
437
+ ; Infer pitch level from pitch range.
438
+ ; <.iNucl> index of syllable in nucleus tier of nucleiID
439
+ ; <.dest_tier> tier where pitch label is written
440
+ .unassigned = 0
441
+ selectObject: nucleiID
442
+ label$ = Get label of interval: nucleus_tier, .iNucl
443
+ @is_nucleus: .iNucl
444
+ if (result)
445
+ @get_tier_label_for_nucleus: .iNucl, .dest_tier
446
+ .contour_index = result1
447
+ .seed$ = label$
448
+ if (index_regex (.seed$, "^[_RFrfSC]")) ; no pitch level assigned
449
+ .unassigned = 1
450
+ .s$ = Get label of interval: pointer_tier, .iNucl
451
+ .p = number (.s$)
452
+ selectObject: nucldatID
453
+ .v = Get value: .p, j_f0_mean
454
+ speaker_j = Get value: .p, j_speaker_id
455
+ @load_speaker_range: speaker_j
456
+ ; median, top, bottom, q1_, q3_ in Hz; range, upper_range_, lower_range_, large_, medium_, flat_ in ST
457
+ selectObject: nucleiID
458
+ if (.v >= q3_)
459
+ label$ = "h" + .seed$
460
+ .unassigned = 0
461
+ elsif (.v < q3_ and .v > q1_)
462
+ label$ = "m" + .seed$
463
+ .unassigned = 0
464
+ else
465
+ label$ = "l" + .seed$
466
+ .unassigned = 0
467
+ endif
468
+ if (not .unassigned) ; success
469
+ Set interval text: .dest_tier, .contour_index, label$
470
+ endif
471
+ endif
472
+ endif
473
+ endproc
474
+
475
+
476
+ procedure polytonia_island: .i_first, .i_last, .dest_tier
477
+ ; For a short (up to 5 syllables) interpausal stretch with unassigned pitch level, assign pitch level on the basis of speaker pitch range.
478
+ ; Find syllable with maximum for mean pitch.
479
+ @debug_msg: "polytonia_island: entry"
480
+ .i = .i_first
481
+ while (.i < .i_last)
482
+ selectObject: nucleiID
483
+ @is_nucleus: .i
484
+ if (result)
485
+ @get_values_for_nucleus: .i
486
+ .after_pause = Get value: pj, j_after_pause
487
+ .before_pause = Get value: pj, j_before_pause
488
+ .mean_max = Get value: pj, j_f0_mean
489
+ if (.after_pause)
490
+ .first_nucl = .i ; first nucleus in interpausal stretch
491
+ .nrofvalid = 0 ; nr of syllabic nuclei in stretch
492
+ .ok = 1
493
+ .speaker_id_prev = vspeakerid
494
+ .i -= 1
495
+ repeat
496
+ .i += 1
497
+ @is_nucleus: .i
498
+ .t1 = Get start time of interval: nucleus_tier, .i
499
+ if (result)
500
+ .nrofvalid += 1
501
+ @get_values_for_nucleus: .i
502
+ .before_pause = Get value: pj, j_before_pause
503
+ .speaker_id = vspeakerid
504
+ .pv_f0mean = Get value: pj, j_f0_mean
505
+ if (.nrofvalid = 1 or .pv_f0mean > .mean_max)
506
+ .i_mean_max = .i ; index in nucleiID for nucleus in stretch with max pitch
507
+ .mean_max = .pv_f0mean
508
+ endif
509
+ @get_tier_label_for_nucleus: .i, .dest_tier
510
+ .unassigned = index_regex (label$, "^[_RFrfSC]")
511
+ .last_nucl = .i ; currently last nucleus in stretch
512
+ .ok = (.speaker_id == .speaker_id_prev) and (.nrofvalid <= 5) and .unassigned
513
+ endif
514
+ @debug_msg: "polytonia_island: i='.i' ('.t1:3') contour_label='.contour_label$' unassigned='.unassigned' ok='.ok'"
515
+ until ((not .ok) or .before_pause)
516
+ if (.ok and .before_pause)
517
+ @infer_pitchlevel: .i_mean_max, .dest_tier
518
+ ; selectObject: nucleiID
519
+ ; .t1 = Get start time of interval: nucleus_tier, .first_nucl
520
+ ; .t2 = Get end time of interval: nucleus_tier, .last_nucl
521
+ ; @debug_msg: "polytonia_island: *** Island FOUND from '.t1:3' to '.t2:3'"
522
+ endif
523
+ endif ; if after pause
524
+ endif ; if is_nucleus
525
+ .i += 1
526
+ endwhile
527
+ @debug_msg: "polytonia_island: exit"
528
+ endproc
529
+
530
+
531
+ procedure polytonia_extrapolate: .i_first, .i_last, .dest_tier, .backward, .ctxt_maxdur
532
+ ; Given a syllable with pitch level assigned, assign pitch level to neighbouring unassigned syllable
533
+ ; <.dest_tier> tier to be used for contour info
534
+ if (.backward)
535
+ .i = .i_last
536
+ .stop_interval = .i_first
537
+ .step = -1
538
+ else
539
+ .i = .i_first
540
+ .stop_interval = .i_last
541
+ .step = 1
542
+ endif
543
+ while (.backward and .i > .stop_interval) or (not .backward and .i < .stop_interval)
544
+ @is_nucleus: .i
545
+ if (result)
546
+ @get_values_for_nucleus: .i
547
+ if (not vhesit)
548
+ @get_tier_label_for_nucleus: .i, .dest_tier
549
+ .seed$ = label$
550
+ if (index_regex (.seed$, "^[HhMmLlBb]")) ; pitch level assigned
551
+ .t = Get start time of interval: nucleus_tier, .i
552
+ .t2 = Get end time of interval: nucleus_tier, .i
553
+ speaker_interval = Get interval at time: speaker_tier, .t
554
+ .pv_start = vf0start
555
+ @get_values_for_nucleus: .i
556
+ @load_speaker_range: vspeakerid
557
+ .success = 0 ; 1 when managed to assign an unassigned syllable
558
+ .found_assigned = 0 ; while extrapolating, encountered syllable with pitch level assigned
559
+ .k = 0 ; distance (in nrof nuclei) between assigned and unassigned
560
+ selectObject: nucleiID
561
+ repeat ; find neighbouring nuclei with unassigned pitch level
562
+ .k += 1
563
+ .other_nucl = .i + .step * .k
564
+ if ((.backward and .other_nucl >= .stop_interval) or (not .backward and .other_nucl <= .stop_interval))
565
+ label$ = Get label of interval: nucleus_tier, .other_nucl
566
+ .t1o = Get start time of interval: nucleus_tier, .other_nucl
567
+ .t2o = Get end time of interval: nucleus_tier, .other_nucl
568
+ if (.backward)
569
+ .dt = .t - .t2o
570
+ else
571
+ .dt = .t1o - .t2
572
+ endif
573
+ @is_nucleus: .other_nucl
574
+ if (result)
575
+ speaker_interval_j = Get interval at time: speaker_tier, .t1o
576
+ if (.dt < .ctxt_maxdur
577
+ ... and speaker_interval == speaker_interval_j) ; same speaker
578
+ @get_tier_label_for_nucleus: .other_nucl, .dest_tier
579
+ .contour_index = result1
580
+ if (index_regex (label$, "^[_RrFfS]")) ; no pitch level assigned
581
+ ; next call stores label in label$
582
+ @get_values_for_nucleus: .other_nucl
583
+ .distST = 12 * log2 (.pv_start/vf0start)
584
+ .success = 1 ; assume success
585
+ .lev$ = "" ; new pitch level
586
+ selectObject: nucleiID
587
+ if (index_regex (.seed$, "^[Hh]") and .distST >= large_) ; UP
588
+ .lev$ = "l"
589
+ ;elsif (index_regex (.seed$, "^[Hh]") and .distST >= medium_)
590
+ ; .lev$ = "m"
591
+ elsif (index_regex (.seed$, "^[Mm]") and .distST >= medium_)
592
+ .lev$ = "l"
593
+ elsif (index_regex (.seed$, "^[Mm]") and .distST <= -medium_) ; DOWN
594
+ .lev$ = "h"
595
+ elsif (index_regex (.seed$, "^[Ll]") and .distST <= -large_)
596
+ .lev$ = "h"
597
+ elsif (index_regex (.seed$, "^[Ll]") and .distST <= -medium_)
598
+ .lev$ = "m"
599
+ elsif (index_regex (.seed$, "^[Bb]") and .distST <= -medium_
600
+ ... and .distST > -large_ and .backward) ; down to BOTTOM
601
+ .lev$ = "l"
602
+ elsif (index_regex (.seed$, "^[Ll]") and abs(.distST) <= flat_) ; SAME LEVEL
603
+ .lev$ = "l"
604
+ elsif (index_regex (.seed$, "^[Mm]") and abs(.distST) <= flat_)
605
+ .lev$ = "m"
606
+ elsif (index_regex (.seed$, "^[Hh]") and abs(.distST) <= flat_)
607
+ .lev$ = "h"
608
+ else
609
+ .success = 0 ; unable to extrapolate
610
+ endif
611
+ Set interval text: .dest_tier, .contour_index, .lev$ + label$
612
+ else
613
+ .found_assigned = 1
614
+ endif
615
+ endif
616
+ endif
617
+ endif
618
+ until (.other_nucl = .stop_interval or .dt >= .ctxt_maxdur or .success or .found_assigned)
619
+ endif
620
+ endif ; no hesitation
621
+ endif ; valid nucleus
622
+ .i += .step
623
+ endwhile
624
+ endproc
625
+
626
+
627
+ procedure polytonia_plateau: .i_first, .i_last, .dest_tier
628
+ ; Given a series of syllables without pitch change, attribute pitch level
629
+ ; <.dest_tier> tier to be used for contour info
630
+ .i = .i_first
631
+ while (.i < .i_last)
632
+ @is_nucleus: .i
633
+ if (result)
634
+ s$ = Get label of interval: pointer_tier, .i
635
+ @get_values_for_nucleus: .i
636
+ if (not vhesit)
637
+ @get_tier_label_for_nucleus: .i, .dest_tier
638
+ .contour_index = result1
639
+ .seed$ = label$
640
+ if (index_regex (.seed$, "^[_RrFfS]")) ; first syll of sequence, pitch level unassigned
641
+ .nsyll = 1
642
+ .t1 = Get start time of interval: nucleus_tier, .i
643
+ .t2 = Get end time of interval: nucleus_tier, .i
644
+ .speaker_interval = Get interval at time: speaker_tier, .t1
645
+ @get_values_for_nucleus: .i
646
+ .pv_start = vf0start
647
+ @load_speaker_range: vspeakerid
648
+ .success = 1 ;
649
+ .end = 0 ; encountered end condition
650
+ .other_nucl = .i
651
+ repeat ; check following nuclei which are unassigned
652
+ selectObject: nucleiID
653
+ .other_nucl += 1
654
+ if (.other_nucl <= .i_last)
655
+ .t1o = Get start time of interval: nucleus_tier, .other_nucl
656
+ .t2o = Get end time of interval: nucleus_tier, .other_nucl
657
+ @is_nucleus: .other_nucl
658
+ if (result)
659
+ .speaker_interval_j = Get interval at time: speaker_tier, .t1o
660
+ if (.t1o - .t2 <= 0.2
661
+ ... and .speaker_interval == .speaker_interval_j) ; same speaker
662
+ @get_tier_label_for_nucleus: .other_nucl, .dest_tier
663
+ if (index_regex (label$, "^_$")) ; no pitch level assigned AND level
664
+ @get_values_for_nucleus: .other_nucl
665
+ .distST = 12 * log2 (.pv_start/vf0start)
666
+ if (abs(.distST) > flat_)
667
+ .success = 0
668
+ else
669
+ .nsyll += 1
670
+ endif
671
+ if (.success and .nsyll >= 3)
672
+ selectObject: nucleiID
673
+ .distST = 12 * log2 (.pv_start/median)
674
+ if (.pv_start < median and (12 * log2 (.pv_start/bottom) >= 1))
675
+ Set interval text: .dest_tier, .contour_index, "L" + .seed$
676
+ .end = 1
677
+ .i = .other_nucl ; skip syllables of plateau
678
+ elsif (.distST >= 1 and .distST <= upper_range_/2)
679
+ Set interval text: .dest_tier, .contour_index, "M" + .seed$
680
+ end = 1
681
+ .i = .other_nucl ; skip syllables of plateau
682
+ endif
683
+ endif
684
+ else ; pitch level assigned for this syllable
685
+ .end = 1
686
+ .i = .other_nucl ; skip syllables
687
+ endif
688
+ .t2 = .t2o
689
+ else
690
+ .end = 1
691
+ .i = .other_nucl ; skip syllables
692
+ endif
693
+ endif
694
+ endif ; .other_nucl <= .i_last
695
+ until (.other_nucl = .i_last or (not .success) or .end)
696
+ endif ; unassigned
697
+ endif ; no hesitation
698
+ endif
699
+ .i += 1
700
+ endwhile
701
+ endproc
702
+
703
+
704
+ procedure polytonia_postproc: .tier, .start_time, .end_time
705
+ ; Post-processing of Polytonia tier
706
+ @intervals_from_time_range: nucleiID, .tier, .start_time, .end_time, "j_first", "j_last"
707
+ for .j from j_first to j_last
708
+ s$ = Get label of interval: .tier, .j
709
+ s$ = replace_regex$(s$, "[{4o]", "", 0) ; remove debugging comments
710
+ s$ = replace_regex$(s$, "([lmhb])", "\U\1", 0) ; to uppercase
711
+ s$ = replace_regex$(s$, "^(C?[LlMmHhBbT]?)_$", "\1", 1) ; remove single trailing "_"
712
+ s$ = replace_regex$(s$, "FF", "F", 1) ; simplify double fall
713
+ Set interval text: .tier, .j, s$
714
+ endfor
715
+ endproc
716
+
717
+
718
+ procedure find_previous_nucleus: .current_nucleus, .stop_interval, .ctxt_maxdur
719
+ ; Find index of preceding nucleus of same speaker and within time range
720
+ ; <.stop_interval> lower index into nucleiID
721
+ ; <result> index of previous nucleus or 0 if not found
722
+ .found = 0
723
+ selectObject: nucleiID
724
+ .t = Get start time of interval: nucleus_tier, .current_nucleus
725
+ .speaker_interval = Get interval at time: speaker_tier, .t
726
+ .prev = .current_nucleus
727
+ repeat
728
+ .prev -= 1
729
+ .dt = .ctxt_maxdur
730
+ if (.prev >= .stop_interval)
731
+ selectObject: nucleiID
732
+ .t1 = Get start time of interval: nucleus_tier, .prev
733
+ .t2 = Get end time of interval: nucleus_tier, .prev
734
+ .dt = .t-.t2
735
+ @is_nucleus: .prev
736
+ if (result)
737
+ .speaker_interval_j = Get interval at time: speaker_tier, .t1
738
+ if (.dt < .ctxt_maxdur and .speaker_interval == .speaker_interval_j) ; same speaker
739
+ .found = 1
740
+ endif
741
+ endif
742
+ endif
743
+ until (.prev <= .stop_interval or .dt >= .ctxt_maxdur or .found)
744
+ if (.found)
745
+ result = .prev
746
+ else
747
+ result = 0
748
+ endif
749
+ endproc
750
+
751
+
752
+ procedure get_creak_for_nucleus: .i_nucl
753
+ ; <result> return 1 if nucleus is creaky
754
+ result = 0
755
+ selectObject: nucleiID
756
+ .x = Get start time of interval: nucleus_tier, .i_nucl
757
+ .x2 = Get end time of interval: nucleus_tier, .i_nucl
758
+ .n = Get number of intervals: creak_tier
759
+ .i = Get interval at time: creak_tier, .x
760
+ repeat
761
+ .s$ = Get label of interval: creak_tier, .i
762
+ if (index_regex (.s$, "^[Cc]"))
763
+ result = 1
764
+ else
765
+ .x = Get end time of interval: creak_tier, .i
766
+ .i += 1
767
+ if (.i > .n)
768
+ .x = .x2
769
+ endif
770
+ endif
771
+ until (result or .x >= .x2)
772
+ endproc
773
+