cldr-segments-full 47.0.0 → 48.0.0-ALPHA3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bower.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "cldr-segments-full",
3
- "version": "47.0.0",
3
+ "version": "48.0.0-ALPHA3",
4
4
  "dependencies": {
5
- "cldr-core": "47.0.0"
5
+ "cldr-core": "48.0.0-ALPHA3"
6
6
  },
7
7
  "main": "segments/**/*.json",
8
8
  "ignore": [
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "cldr-segments-full",
3
- "version": "47.0.0",
3
+ "version": "48.0.0-ALPHA3",
4
4
  "peerDependencies": {
5
- "cldr-core": "47.0.0"
5
+ "cldr-core": "48.0.0-ALPHA3"
6
6
  },
7
7
  "description": "Text Segmentation data",
8
8
  "homepage": "https://cldr.unicode.org",
@@ -24,6 +24,6 @@
24
24
  },
25
25
  "license": "Unicode-3.0",
26
26
  "bugs": "https://cldr.unicode.org/index/bug-reports#TOC-Filing-a-Ticket",
27
- "cldrVersion": "47",
27
+ "cldrVersion": "48",
28
28
  "unicodeVersion": "16.0.0"
29
29
  }
@@ -45,9 +45,6 @@
45
45
  {
46
46
  "$LVT": "\\p{Grapheme_Cluster_Break=LVT}"
47
47
  },
48
- {
49
- "$ConjunctLinkingScripts": "[\\p{Gujr}\\p{sc=Telu}\\p{sc=Mlym}\\p{sc=Orya}\\p{sc=Beng}\\p{sc=Deva}]"
50
- },
51
48
  {
52
49
  "$ConjunctLinker": "\\p{Indic_Conjunct_Break=Linker}"
53
50
  },
@@ -55,10 +52,13 @@
55
52
  "$LinkingConsonant": "\\p{Indic_Conjunct_Break=Consonant}"
56
53
  },
57
54
  {
58
- "$ExtPict": "\\p{Extended_Pictographic}"
55
+ "$ExtPict": "\\p{Extended_Pictographic=True}"
56
+ },
57
+ {
58
+ "$ConjunctExtender": "[\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]"
59
59
  },
60
60
  {
61
- "$ExtCccZwj": "[\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]"
61
+ "$XX": "\\p{Grapheme_Cluster_Break=Other}"
62
62
  }
63
63
  ],
64
64
  "segmentRules": {
@@ -71,7 +71,7 @@
71
71
  "9": " × ($Extend | $ZWJ) ",
72
72
  "9.1": " × $SpacingMark ",
73
73
  "9.2": " $Prepend × ",
74
- "9.3": " $LinkingConsonant $ExtCccZwj* $ConjunctLinker $ExtCccZwj* × $LinkingConsonant ",
74
+ "9.3": " $LinkingConsonant $ConjunctExtender* $ConjunctLinker $ConjunctExtender* × $LinkingConsonant ",
75
75
  "11": " $ExtPict $Extend* $ZWJ × $ExtPict ",
76
76
  "12": " ^ ($RI $RI)* $RI × $RI ",
77
77
  "13": " [^$RI] ($RI $RI)* $RI × $RI "
@@ -86,7 +86,7 @@
86
86
  "$AK": "\\p{Line_Break=Aksara}"
87
87
  },
88
88
  {
89
- "$AL": "\\p{Line_Break=Alphabetic}"
89
+ "$ALorig": "\\p{Line_Break=Alphabetic}"
90
90
  },
91
91
  {
92
92
  "$AP": "\\p{Line_Break=Aksara_Prebase}"
@@ -116,7 +116,7 @@
116
116
  "$CP": "\\p{Line_Break=CP}"
117
117
  },
118
118
  {
119
- "$CM1": "\\p{Line_Break=Combining_Mark}"
119
+ "$CMorig": "\\p{Line_Break=Combining_Mark}"
120
120
  },
121
121
  {
122
122
  "$CR": "\\p{Line_Break=Carriage_Return}"
@@ -133,6 +133,9 @@
133
133
  {
134
134
  "$H3": "\\p{Line_Break=H3}"
135
135
  },
136
+ {
137
+ "$HH": "\\p{Line_Break=Unambiguous_Hyphen}"
138
+ },
136
139
  {
137
140
  "$HL": "\\p{Line_Break=HL}"
138
141
  },
@@ -164,7 +167,7 @@
164
167
  "$NL": "\\p{Line_Break=Next_Line}"
165
168
  },
166
169
  {
167
- "$NS": "\\p{Line_Break=Nonstarter}"
170
+ "$NSorig": "\\p{Line_Break=Nonstarter}"
168
171
  },
169
172
  {
170
173
  "$NU": "\\p{Line_Break=Numeric}"
@@ -182,7 +185,13 @@
182
185
  "$QU": "\\p{Line_Break=Quotation}"
183
186
  },
184
187
  {
185
- "$SA": "\\p{Line_Break=Complex_Context}"
188
+ "$SA_Mn": "[\\p{Line_Break=Complex_Context}&\\p{gc=Mn}]"
189
+ },
190
+ {
191
+ "$SA_Mc": "[\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]"
192
+ },
193
+ {
194
+ "$SAmMnmMc": "[\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]"
186
195
  },
187
196
  {
188
197
  "$SG": "\\p{Line_Break=Surrogate}"
@@ -220,44 +229,41 @@
220
229
  {
221
230
  "$EM": "\\p{Line_Break=E_Modifier}"
222
231
  },
223
- {
224
- "$ZWJ_O": "\\p{Line_Break=ZWJ}"
225
- },
226
232
  {
227
233
  "$ZWJ": "\\p{Line_Break=ZWJ}"
228
234
  },
229
235
  {
230
- "$QU_Pi": "[$QU & \\p{gc=Pi}]"
236
+ "$Pi": "\\p{gc=Pi}"
231
237
  },
232
238
  {
233
- "$QU_Pf": "[$QU & \\p{gc=Pf}]"
239
+ "$Pf": "\\p{gc=Pf}"
234
240
  },
235
241
  {
236
- "$QUmPi": "[$QU - \\p{gc=Pi}]"
242
+ "$QU_Pi": "[$QU & $Pi]"
237
243
  },
238
244
  {
239
- "$QUmPf": "[$QU - \\p{gc=Pf}]"
245
+ "$QU_Pf": "[$QU & $Pf]"
240
246
  },
241
247
  {
242
- "$NotEastAsian": "[^\\p{ea=F}\\p{ea=W}\\p{ea=H}]"
248
+ "$QUmPi": "[$QU - $Pi]"
243
249
  },
244
250
  {
245
- "$NonEastAsianBA": "[$BA & $NotEastAsian]"
251
+ "$QUmPf": "[$QU - $Pf]"
246
252
  },
247
253
  {
248
- "$DottedCircle": ""
254
+ "$EastAsian": "[\\p{ea=F}\\p{ea=W}\\p{ea=H}]"
249
255
  },
250
256
  {
251
- "$Hyphen": "[\\u2010]"
257
+ "$DottedCircle": "[]"
252
258
  },
253
259
  {
254
- "$CP30": "[$CP-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]"
260
+ "$CPmEastAsian": "[$CP-$EastAsian]"
255
261
  },
256
262
  {
257
- "$OP30": "[$OP-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]"
263
+ "$OPmEastAsian": "[$OP-$EastAsian]"
258
264
  },
259
265
  {
260
- "$ExtPictUnassigned": "[\\p{Extended_Pictographic}&\\p{gc=Cn}]"
266
+ "$ExtPictUnassigned": "[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]"
261
267
  },
262
268
  {
263
269
  "$sot": "^"
@@ -266,190 +272,13 @@
266
272
  "$eot": "(?!.)"
267
273
  },
268
274
  {
269
- "$CM": "[$CM1 $ZWJ]"
270
- },
271
- {
272
- "$AL": "[$AI $AL $SG $XX $SA]"
273
- },
274
- {
275
- "$NS": "[$NS $CJ]"
276
- },
277
- {
278
- "$X": "$CM*"
279
- },
280
- {
281
- "$Spec1_": "[$SP $BK $CR $LF $NL $ZW]"
282
- },
283
- {
284
- "$Spec2_": "[^ $SP $BK $CR $LF $NL $ZW]"
285
- },
286
- {
287
- "$Spec3a_": "[^ $SP $BA $HY $CM]"
288
- },
289
- {
290
- "$Spec3b_": "[^ $BA $HY $CM]"
291
- },
292
- {
293
- "$Spec4_": "[^ $NU $CM]"
294
- },
295
- {
296
- "$AI": "($AI $X)"
297
- },
298
- {
299
- "$AK": "($AK $X)"
300
- },
301
- {
302
- "$AL": "($AL $X)"
303
- },
304
- {
305
- "$AP": "($AP $X)"
306
- },
307
- {
308
- "$AS": "($AS $X)"
309
- },
310
- {
311
- "$B2": "($B2 $X)"
312
- },
313
- {
314
- "$BA": "($BA $X)"
315
- },
316
- {
317
- "$BB": "($BB $X)"
318
- },
319
- {
320
- "$CB": "($CB $X)"
321
- },
322
- {
323
- "$CL": "($CL $X)"
324
- },
325
- {
326
- "$CP": "($CP $X)"
327
- },
328
- {
329
- "$CM": "($CM $X)"
330
- },
331
- {
332
- "$EX": "($EX $X)"
333
- },
334
- {
335
- "$GL": "($GL $X)"
336
- },
337
- {
338
- "$H2": "($H2 $X)"
339
- },
340
- {
341
- "$H3": "($H3 $X)"
342
- },
343
- {
344
- "$HL": "($HL $X)"
345
- },
346
- {
347
- "$HY": "($HY $X)"
348
- },
349
- {
350
- "$ID": "($ID $X)"
351
- },
352
- {
353
- "$IN": "($IN $X)"
354
- },
355
- {
356
- "$IS": "($IS $X)"
357
- },
358
- {
359
- "$JL": "($JL $X)"
360
- },
361
- {
362
- "$JT": "($JT $X)"
363
- },
364
- {
365
- "$JV": "($JV $X)"
366
- },
367
- {
368
- "$NS": "($NS $X)"
369
- },
370
- {
371
- "$NU": "($NU $X)"
372
- },
373
- {
374
- "$OP": "($OP $X)"
375
- },
376
- {
377
- "$PO": "($PO $X)"
378
- },
379
- {
380
- "$PR": "($PR $X)"
381
- },
382
- {
383
- "$QU": "($QU $X)"
384
- },
385
- {
386
- "$SA": "($SA $X)"
275
+ "$AL": "[$AI $ALorig $SG $XX $SAmMnmMc]"
387
276
  },
388
277
  {
389
- "$SG": "($SG $X)"
278
+ "$CM": "[$CMorig $SA_Mn $SA_Mc]"
390
279
  },
391
280
  {
392
- "$SY": "($SY $X)"
393
- },
394
- {
395
- "$VF": "($VF $X)"
396
- },
397
- {
398
- "$VI": "($VI $X)"
399
- },
400
- {
401
- "$WJ": "($WJ $X)"
402
- },
403
- {
404
- "$XX": "($XX $X)"
405
- },
406
- {
407
- "$RI": "($RI $X)"
408
- },
409
- {
410
- "$EB": "($EB $X)"
411
- },
412
- {
413
- "$EM": "($EM $X)"
414
- },
415
- {
416
- "$ZWJ": "($ZWJ $X)"
417
- },
418
- {
419
- "$QU_Pi": "($QU_Pi $X)"
420
- },
421
- {
422
- "$QU_Pf": "($QU_Pf $X)"
423
- },
424
- {
425
- "$QUmPi": "($QUmPi $X)"
426
- },
427
- {
428
- "$QUmPf": "($QUmPf $X)"
429
- },
430
- {
431
- "$NotEastAsian": "( $NotEastAsian | [$NotEastAsian - $Spec1_] $X)"
432
- },
433
- {
434
- "$NonEastAsianBA": "(NonEastAsianBA $X)"
435
- },
436
- {
437
- "$DottedCircle": "($DottedCircle $X)"
438
- },
439
- {
440
- "$Hyphen": "($Hyphen $X)"
441
- },
442
- {
443
- "$CP30": "($CP30 $X)"
444
- },
445
- {
446
- "$OP30": "($OP30 $X)"
447
- },
448
- {
449
- "$AL": "($AL | ^ $CM | (?<=$Spec1_) $CM)"
450
- },
451
- {
452
- "$NotEastAsian": "( $NotEastAsian | ^ $CM | (?<=$Spec1_) $CM )"
281
+ "$NS": "[$NSorig $CJ]"
453
282
  }
454
283
  ],
455
284
  "segmentRules": {
@@ -462,20 +291,19 @@
462
291
  "7.01": " × $SP ",
463
292
  "7.02": " × $ZW ",
464
293
  "8": " $ZW $SP* ÷ ",
465
- "8.1": " $ZWJ_O × ",
466
- "9": " $Spec2_ × $CM ",
294
+ "8.1": " $ZWJ × ",
295
+ "9": " (?<X>[^$BK $CR $LF $NL $SP $ZW]) ( $CM | $ZWJ )* → ${X} ",
296
+ "10": " ( $CM | $ZWJ ) → A ",
467
297
  "11.01": " × $WJ ",
468
298
  "11.02": " $WJ × ",
469
299
  "12": " $GL × ",
470
- "12.1": " $Spec3a_ × $GL ",
471
- "12.2": " $Spec3b_ $CM+ × $GL ",
472
- "12.3": " ^ $CM+ × $GL ",
300
+ "12.1": " [^ $SP $BA $HY $HH] × $GL ",
473
301
  "13.01": " × $EX ",
474
302
  "13.02": " × $CL ",
475
303
  "13.03": " × $CP ",
476
304
  "13.04": " × $SY ",
477
305
  "14": " $OP $SP* × ",
478
- "15.11": " ( $sot | $BK | $CR | $LF | $NL | $OP | $QU | $GL | $SP | $ZW ) $QU_Pi $SP* × ",
306
+ "15.11": " ( $BK | $CR | $LF | $NL | $OP | $QU | $GL | $SP | $ZW | $sot ) $QU_Pi $SP* × ",
479
307
  "15.21": " × $QU_Pf ( $SP | $GL | $WJ | $CL | $QU | $CP | $EX | $IS | $SY | $BK | $CR | $LF | $NL | $ZW | $eot ) ",
480
308
  "15.3": " $SP ÷ $IS $NU ",
481
309
  "15.4": " × $IS ",
@@ -484,18 +312,19 @@
484
312
  "18": " $SP ÷ ",
485
313
  "19.01": " × $QUmPi ",
486
314
  "19.02": " $QUmPf × ",
487
- "19.1": " $NotEastAsian × $QU ",
488
- "19.11": " × $QU ( $NotEastAsian | $eot ) ",
489
- "19.12": " $QU × $NotEastAsian ",
490
- "19.13": " ( $sot | $NotEastAsian ) $QU × ",
315
+ "19.1": " [^$EastAsian] × $QU ",
316
+ "19.11": " × $QU ( [^$EastAsian] | $eot ) ",
317
+ "19.12": " $QU × [^$EastAsian] ",
318
+ "19.13": " ( [^$EastAsian] | $sot ) $QU × ",
491
319
  "20.01": " ÷ $CB ",
492
320
  "20.02": " $CB ÷ ",
493
- "20.1": " ( $sot | $BK | $CR | $LF | $NL | $SP | $ZW | $CB | $GL ) ( $HY | $Hyphen ) × $AL ",
321
+ "20.1": " ( $BK | $CR | $LF | $NL | $SP | $ZW | $CB | $GL | $sot ) ( $HY | $HH ) × ( $AL | $HL ) ",
494
322
  "21.01": " × $BA ",
495
- "21.02": " × $HY ",
496
- "21.03": " × $NS ",
497
- "21.04": " $BB × ",
498
- "21.1": " $HL ($HY | $NonEastAsianBA) × [^$HL] ",
323
+ "21.02": " × $HH ",
324
+ "21.03": " × $HY ",
325
+ "21.04": " × $NS ",
326
+ "21.05": " $BB × ",
327
+ "21.1": " $HL ($HY | $HH) × [^$HL] ",
499
328
  "21.2": " $SY × $HL ",
500
329
  "22": " × $IN ",
501
330
  "23.02": " ($AL | $HL) × $NU ",
@@ -530,8 +359,8 @@
530
359
  "28.13": " ($AK | $DottedCircle | $AS) $VI × ($AK | $DottedCircle) ",
531
360
  "28.14": " ($AK | $DottedCircle | $AS) × ($AK | $DottedCircle | $AS) $VF ",
532
361
  "29": " $IS × ($AL | $HL) ",
533
- "30.01": " ($AL | $HL | $NU) × $OP30 ",
534
- "30.02": " $CP30 × ($AL | $HL | $NU) ",
362
+ "30.01": " ($AL | $HL | $NU) × $OPmEastAsian ",
363
+ "30.02": " $CPmEastAsian × ($AL | $HL | $NU) ",
535
364
  "30.11": " $sot ($RI $RI)* $RI × $RI ",
536
365
  "30.12": " [^$RI] ($RI $RI)* $RI × $RI ",
537
366
  "30.13": " $RI ÷ $RI ",
@@ -584,55 +413,25 @@
584
413
  "$SContinue": "\\p{Sentence_Break=SContinue}"
585
414
  },
586
415
  {
587
- "$Any": "."
588
- },
589
- {
590
- "$FE": "[$Format $Extend]"
591
- },
592
- {
593
- "$NotPreLower_": "[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]"
416
+ "$XX": "\\p{Sentence_Break=Other}"
594
417
  },
595
418
  {
596
- "$Sp": "($Sp $FE*)"
597
- },
598
- {
599
- "$Lower": "($Lower $FE*)"
600
- },
601
- {
602
- "$Upper": "($Upper $FE*)"
603
- },
604
- {
605
- "$OLetter": "($OLetter $FE*)"
606
- },
607
- {
608
- "$Numeric": "($Numeric $FE*)"
609
- },
610
- {
611
- "$ATerm": "($ATerm $FE*)"
612
- },
613
- {
614
- "$STerm": "($STerm $FE*)"
615
- },
616
- {
617
- "$Close": "($Close $FE*)"
618
- },
619
- {
620
- "$SContinue": "($SContinue $FE*)"
419
+ "$Any": "."
621
420
  },
622
421
  {
623
- "$ParaSep": "($Sep | $CR | $LF)"
422
+ "$ParaSep": "[$Sep $CR $LF]"
624
423
  },
625
424
  {
626
- "$SATerm": "($STerm | $ATerm)"
425
+ "$SATerm": "[$STerm $ATerm]"
627
426
  }
628
427
  ],
629
428
  "segmentRules": {
630
429
  "3": " $CR × $LF ",
631
430
  "4": " $ParaSep ÷ ",
632
- "5": " × [$Format $Extend] ",
431
+ "5": " (?<X>[^$ParaSep]) ( $Extend | $Format )* → ${X} ",
633
432
  "6": " $ATerm × $Numeric ",
634
433
  "7": " ($Upper | $Lower) $ATerm × $Upper ",
635
- "8": " $ATerm $Close* $Sp* × $NotPreLower_* $Lower ",
434
+ "8": " $ATerm $Close* $Sp* × [^ $OLetter $Upper $Lower $ParaSep $SATerm]* $Lower ",
636
435
  "8.1": " $SATerm $Close* $Sp* × ($SContinue | $SATerm) ",
637
436
  "9": " $SATerm $Close* × ( $Close | $Sp | $ParaSep ) ",
638
437
  "10": " $SATerm $Close* $Sp* × ( $Sp | $ParaSep ) ",
@@ -694,61 +493,19 @@
694
493
  "$ZWJ": "\\p{Word_Break=ZWJ}"
695
494
  },
696
495
  {
697
- "$ExtPict": "\\p{Extended_Pictographic}"
496
+ "$ExtPict": "\\p{Extended_Pictographic=True}"
698
497
  },
699
498
  {
700
499
  "$WSegSpace": "\\p{Word_Break=WSegSpace}"
701
500
  },
702
501
  {
703
- "$AHLetter": "($ALetter | $Hebrew_Letter)"
704
- },
705
- {
706
- "$MidNumLetQ": "($MidNumLet | $Single_Quote)"
707
- },
708
- {
709
- "$FE": "[$Format $Extend $ZWJ]"
710
- },
711
- {
712
- "$NotBreak_": "[^ $Newline $CR $LF ]"
713
- },
714
- {
715
- "$Katakana": "($Katakana $FE*)"
716
- },
717
- {
718
- "$ALetter": "($ALetter $FE*)"
719
- },
720
- {
721
- "$MidLetter": "($MidLetter $FE*)"
722
- },
723
- {
724
- "$MidNum": "($MidNum $FE*)"
725
- },
726
- {
727
- "$MidNumLet": "($MidNumLet $FE*)"
728
- },
729
- {
730
- "$Numeric": "($Numeric $FE*)"
731
- },
732
- {
733
- "$ExtendNumLet": "($ExtendNumLet $FE*)"
734
- },
735
- {
736
- "$RI": "($RI $FE*)"
737
- },
738
- {
739
- "$Hebrew_Letter": "($Hebrew_Letter $FE*)"
740
- },
741
- {
742
- "$Double_Quote": "($Double_Quote $FE*)"
743
- },
744
- {
745
- "$Single_Quote": "($Single_Quote $FE*)"
502
+ "$XX": "\\p{Word_Break=Other}"
746
503
  },
747
504
  {
748
- "$AHLetter": "($AHLetter $FE*)"
505
+ "$AHLetter": "[$ALetter $Hebrew_Letter]"
749
506
  },
750
507
  {
751
- "$MidNumLetQ": "($MidNumLetQ $FE*)"
508
+ "$MidNumLetQ": "[$MidNumLet $Single_Quote]"
752
509
  }
753
510
  ],
754
511
  "segmentRules": {
@@ -757,7 +514,7 @@
757
514
  "3.2": " ÷ ($Newline | $CR | $LF) ",
758
515
  "3.3": " $ZWJ × $ExtPict ",
759
516
  "3.4": " $WSegSpace × $WSegSpace ",
760
- "4": " $NotBreak_ × [$Format $Extend $ZWJ] ",
517
+ "4": " (?<X>[^$CR $LF $Newline]) ($Extend | $Format | $ZWJ)* → ${X} ",
761
518
  "5": " $AHLetter × $AHLetter ",
762
519
  "6": " $AHLetter × ($MidLetter | $MidNumLetQ) $AHLetter ",
763
520
  "7": " $AHLetter ($MidLetter | $MidNumLetQ) × $AHLetter ",