regexp_parser 2.10.0 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -1
- data/LICENSE +1 -1
- data/Rakefile +5 -3
- data/lib/regexp_parser/error.rb +2 -0
- data/lib/regexp_parser/expression/base.rb +2 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +2 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +2 -0
- data/lib/regexp_parser/expression/classes/backreference.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_set/intersection.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_set.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_type.rb +2 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -0
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -0
- data/lib/regexp_parser/expression/classes/group.rb +2 -0
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +2 -0
- data/lib/regexp_parser/expression/classes/posix_class.rb +2 -0
- data/lib/regexp_parser/expression/classes/root.rb +2 -0
- data/lib/regexp_parser/expression/classes/unicode_property.rb +2 -0
- data/lib/regexp_parser/expression/methods/construct.rb +2 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +2 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +8 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +2 -0
- data/lib/regexp_parser/expression/methods/match.rb +2 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -0
- data/lib/regexp_parser/expression/methods/negative.rb +2 -0
- data/lib/regexp_parser/expression/methods/options.rb +2 -0
- data/lib/regexp_parser/expression/methods/parts.rb +2 -0
- data/lib/regexp_parser/expression/methods/printing.rb +2 -0
- data/lib/regexp_parser/expression/methods/referenced_expressions.rb +2 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +2 -0
- data/lib/regexp_parser/expression/methods/tests.rb +2 -0
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -0
- data/lib/regexp_parser/expression/quantifier.rb +3 -1
- data/lib/regexp_parser/expression/sequence.rb +2 -0
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -0
- data/lib/regexp_parser/expression/shared.rb +6 -3
- data/lib/regexp_parser/expression/subexpression.rb +2 -0
- data/lib/regexp_parser/expression.rb +2 -0
- data/lib/regexp_parser/lexer.rb +2 -0
- data/lib/regexp_parser/parser.rb +3 -0
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +2 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +2 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +2 -0
- data/lib/regexp_parser/scanner/properties/long.csv +32 -0
- data/lib/regexp_parser/scanner/properties/short.csv +12 -0
- data/lib/regexp_parser/scanner/scanner.rl +44 -8
- data/lib/regexp_parser/scanner.rb +786 -656
- data/lib/regexp_parser/syntax/any.rb +2 -0
- data/lib/regexp_parser/syntax/base.rb +2 -0
- data/lib/regexp_parser/syntax/token/anchor.rb +5 -3
- data/lib/regexp_parser/syntax/token/assertion.rb +4 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +8 -6
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -1
- data/lib/regexp_parser/syntax/token/character_type.rb +6 -4
- data/lib/regexp_parser/syntax/token/conditional.rb +5 -3
- data/lib/regexp_parser/syntax/token/escape.rb +9 -7
- data/lib/regexp_parser/syntax/token/group.rb +8 -6
- data/lib/regexp_parser/syntax/token/keep.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +4 -2
- data/lib/regexp_parser/syntax/token/posix_class.rb +4 -2
- data/lib/regexp_parser/syntax/token/quantifier.rb +8 -6
- data/lib/regexp_parser/syntax/token/unicode_property.rb +134 -74
- data/lib/regexp_parser/syntax/token/virtual.rb +5 -3
- data/lib/regexp_parser/syntax/token.rb +5 -3
- data/lib/regexp_parser/syntax/version_lookup.rb +4 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -0
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +2 -0
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.5.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/4.0.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +2 -0
- data/lib/regexp_parser/syntax.rb +2 -0
- data/lib/regexp_parser/token.rb +2 -0
- data/lib/regexp_parser/version.rb +3 -1
- data/lib/regexp_parser.rb +2 -0
- data/regexp_parser.gemspec +2 -0
- metadata +5 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: aa5734a20a0705226c021d9a0efef48a6ca24b5b18f15f93b34c12ffe5021d1e
|
|
4
|
+
data.tar.gz: fa77ac98b3bd17d6bdba43d387ebbbd84a6a0a5fa86123c7eee68646e90ce37c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4bb62063aaa64e3828c5abf551fbf5ce83f726b5ebeb56af3e9210fd7e8c110c7f8f9ba1fcafd0dd0f66b8e8f36f04c1d72e5a739c4ace8fa19978dfc1a794b1
|
|
7
|
+
data.tar.gz: a82384912534ec3ca98d7665434c655468b15e4d75d6c1f13405f0b777d818746c6686d36049a158f0df9b829c129026487e5c3e6cf8a8f7c7ca65155a937db3
|
data/Gemfile
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
source 'https://rubygems.org'
|
|
2
4
|
|
|
3
5
|
gemspec
|
|
@@ -10,6 +12,6 @@ group :development, :test do
|
|
|
10
12
|
if RUBY_VERSION.to_f >= 2.7
|
|
11
13
|
gem 'benchmark-ips', '~> 2.1'
|
|
12
14
|
gem 'gouteur', '~> 1.1'
|
|
13
|
-
gem 'rubocop', '
|
|
15
|
+
gem 'rubocop', '>= 1.80.2'
|
|
14
16
|
end
|
|
15
17
|
end
|
data/LICENSE
CHANGED
data/Rakefile
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'bundler'
|
|
2
4
|
require 'rubygems'
|
|
3
5
|
require 'rubygems/package_task'
|
|
@@ -14,10 +16,10 @@ RSpec::Core::RakeTask.new(:spec)
|
|
|
14
16
|
task :default => [:'test:full']
|
|
15
17
|
|
|
16
18
|
namespace :test do
|
|
17
|
-
task full: [:
|
|
19
|
+
task full: [:ragel, :spec]
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
|
21
23
|
# latest scanner code is generated and included in the build.
|
|
22
|
-
desc "Runs ragel
|
|
23
|
-
task :
|
|
24
|
+
desc "Runs ragel before building the gem"
|
|
25
|
+
task build: :ragel
|
data/lib/regexp_parser/error.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module EscapeSequence
|
|
3
5
|
Base = Class.new(Regexp::Expression::Base)
|
|
@@ -18,6 +20,7 @@ module Regexp::Expression
|
|
|
18
20
|
Codepoint = Class.new(Base) # e.g. \u000A
|
|
19
21
|
|
|
20
22
|
CodepointList = Class.new(Base) # e.g. \u{A B}
|
|
23
|
+
UTF8Hex = Class.new(Base) # e.g. \xE2\x82\xAC
|
|
21
24
|
|
|
22
25
|
AbstractMetaControlSequence = Class.new(Base)
|
|
23
26
|
Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression::EscapeSequence
|
|
2
4
|
AsciiEscape.class_eval { def codepoint; 0x1B end }
|
|
3
5
|
Backspace.class_eval { def codepoint; 0x8 end }
|
|
@@ -15,6 +17,12 @@ module Regexp::Expression::EscapeSequence
|
|
|
15
17
|
Hex.class_eval { def codepoint; text[/\h+/].hex end }
|
|
16
18
|
Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
|
|
17
19
|
|
|
20
|
+
UTF8Hex.class_eval do
|
|
21
|
+
def codepoint
|
|
22
|
+
text.scan(/\h+/).map(&:hex).pack('C*').force_encoding('utf-8').ord
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
18
26
|
CodepointList.class_eval do
|
|
19
27
|
# Maybe this should be a unique top-level expression class?
|
|
20
28
|
def char
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
|
3
5
|
# call super in #initialize, but raise in #quantifier= and #quantify,
|
|
@@ -6,7 +8,7 @@ module Regexp::Expression
|
|
|
6
8
|
class Quantifier
|
|
7
9
|
include Regexp::Expression::Shared
|
|
8
10
|
|
|
9
|
-
MODES = %i[greedy possessive reluctant]
|
|
11
|
+
MODES = %i[greedy possessive reluctant].freeze
|
|
10
12
|
|
|
11
13
|
def initialize(*args)
|
|
12
14
|
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module Shared
|
|
3
5
|
module ClassMethods; end # filled in ./methods/*.rb
|
|
@@ -70,11 +72,12 @@ module Regexp::Expression
|
|
|
70
72
|
# lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
|
|
71
73
|
#
|
|
72
74
|
def to_s(format = :full)
|
|
73
|
-
base =
|
|
75
|
+
base = ''.dup
|
|
76
|
+
parts.each do |part|
|
|
74
77
|
if part.instance_of?(String)
|
|
75
|
-
|
|
78
|
+
base << part
|
|
76
79
|
elsif !part.custom_to_s_handling
|
|
77
|
-
|
|
80
|
+
base << part.to_s(:original)
|
|
78
81
|
end
|
|
79
82
|
end
|
|
80
83
|
"#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
|
data/lib/regexp_parser/lexer.rb
CHANGED
data/lib/regexp_parser/parser.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require_relative 'error'
|
|
2
4
|
require_relative 'expression'
|
|
3
5
|
|
|
@@ -319,6 +321,7 @@ class Regexp::Parser
|
|
|
319
321
|
when :codepoint_list; node << EscapeSequence::CodepointList.new(token, active_opts)
|
|
320
322
|
when :hex; node << EscapeSequence::Hex.new(token, active_opts)
|
|
321
323
|
when :octal; node << EscapeSequence::Octal.new(token, active_opts)
|
|
324
|
+
when :utf8_hex; node << EscapeSequence::UTF8Hex.new(token, active_opts)
|
|
322
325
|
|
|
323
326
|
when :control
|
|
324
327
|
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
|
@@ -9,6 +9,8 @@ age=13.0,age=13.0
|
|
|
9
9
|
age=14.0,age=14.0
|
|
10
10
|
age=15.0,age=15.0
|
|
11
11
|
age=15.1,age=15.1
|
|
12
|
+
age=16.0,age=16.0
|
|
13
|
+
age=17.0,age=17.0
|
|
12
14
|
age=2.0,age=2.0
|
|
13
15
|
age=2.1,age=2.1
|
|
14
16
|
age=3.0,age=3.0
|
|
@@ -43,6 +45,7 @@ bamum,bamum
|
|
|
43
45
|
bassavah,bassa_vah
|
|
44
46
|
batak,batak
|
|
45
47
|
bengali,bengali
|
|
48
|
+
beriaerfe,beria_erfe
|
|
46
49
|
bhaiksuki,bhaiksuki
|
|
47
50
|
bidicontrol,bidi_control
|
|
48
51
|
blank,blank
|
|
@@ -103,6 +106,7 @@ extendedpictographic,extended_pictographic
|
|
|
103
106
|
extender,extender
|
|
104
107
|
finalpunctuation,final_punctuation
|
|
105
108
|
format,format
|
|
109
|
+
garay,garay
|
|
106
110
|
georgian,georgian
|
|
107
111
|
glagolitic,glagolitic
|
|
108
112
|
gothic,gothic
|
|
@@ -128,6 +132,7 @@ greek,greek
|
|
|
128
132
|
gujarati,gujarati
|
|
129
133
|
gunjalagondi,gunjala_gondi
|
|
130
134
|
gurmukhi,gurmukhi
|
|
135
|
+
gurungkhema,gurung_khema
|
|
131
136
|
han,han
|
|
132
137
|
hangul,hangul
|
|
133
138
|
hanifirohingya,hanifi_rohingya
|
|
@@ -173,6 +178,7 @@ inbasiclatin,in_basic_latin
|
|
|
173
178
|
inbassavah,in_bassa_vah
|
|
174
179
|
inbatak,in_batak
|
|
175
180
|
inbengali,in_bengali
|
|
181
|
+
inberiaerfe,in_beria_erfe
|
|
176
182
|
inbhaiksuki,in_bhaiksuki
|
|
177
183
|
inblockelements,in_block_elements
|
|
178
184
|
inbopomofo,in_bopomofo
|
|
@@ -208,6 +214,7 @@ incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f
|
|
|
208
214
|
incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g
|
|
209
215
|
incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h
|
|
210
216
|
incjkunifiedideographsextensioni,in_cjk_unified_ideographs_extension_i
|
|
217
|
+
incjkunifiedideographsextensionj,in_cjk_unified_ideographs_extension_j
|
|
211
218
|
incombiningdiacriticalmarks,in_combining_diacritical_marks
|
|
212
219
|
incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended
|
|
213
220
|
incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols
|
|
@@ -241,6 +248,7 @@ induployan,in_duployan
|
|
|
241
248
|
inearlydynasticcuneiform,in_early_dynastic_cuneiform
|
|
242
249
|
inegyptianhieroglyphformatcontrols,in_egyptian_hieroglyph_format_controls
|
|
243
250
|
inegyptianhieroglyphs,in_egyptian_hieroglyphs
|
|
251
|
+
inegyptianhieroglyphsextendeda,in_egyptian_hieroglyphs_extended_a
|
|
244
252
|
inelbasan,in_elbasan
|
|
245
253
|
inelymaic,in_elymaic
|
|
246
254
|
inemoticons,in_emoticons
|
|
@@ -253,6 +261,7 @@ inethiopicextended,in_ethiopic_extended
|
|
|
253
261
|
inethiopicextendeda,in_ethiopic_extended_a
|
|
254
262
|
inethiopicextendedb,in_ethiopic_extended_b
|
|
255
263
|
inethiopicsupplement,in_ethiopic_supplement
|
|
264
|
+
ingaray,in_garay
|
|
256
265
|
ingeneralpunctuation,in_general_punctuation
|
|
257
266
|
ingeometricshapes,in_geometric_shapes
|
|
258
267
|
ingeometricshapesextended,in_geometric_shapes_extended
|
|
@@ -268,6 +277,7 @@ ingreekextended,in_greek_extended
|
|
|
268
277
|
ingujarati,in_gujarati
|
|
269
278
|
ingunjalagondi,in_gunjala_gondi
|
|
270
279
|
ingurmukhi,in_gurmukhi
|
|
280
|
+
ingurungkhema,in_gurung_khema
|
|
271
281
|
inhalfwidthandfullwidthforms,in_halfwidth_and_fullwidth_forms
|
|
272
282
|
inhangulcompatibilityjamo,in_hangul_compatibility_jamo
|
|
273
283
|
inhanguljamo,in_hangul_jamo
|
|
@@ -309,6 +319,7 @@ inkhmer,in_khmer
|
|
|
309
319
|
inkhmersymbols,in_khmer_symbols
|
|
310
320
|
inkhojki,in_khojki
|
|
311
321
|
inkhudawadi,in_khudawadi
|
|
322
|
+
inkiratrai,in_kirat_rai
|
|
312
323
|
inlao,in_lao
|
|
313
324
|
inlatin1supplement,in_latin_1_supplement
|
|
314
325
|
inlatinextendeda,in_latin_extended_a
|
|
@@ -353,6 +364,7 @@ inmiscellaneousmathematicalsymbolsb,in_miscellaneous_mathematical_symbols_b
|
|
|
353
364
|
inmiscellaneoussymbols,in_miscellaneous_symbols
|
|
354
365
|
inmiscellaneoussymbolsandarrows,in_miscellaneous_symbols_and_arrows
|
|
355
366
|
inmiscellaneoussymbolsandpictographs,in_miscellaneous_symbols_and_pictographs
|
|
367
|
+
inmiscellaneoussymbolssupplement,in_miscellaneous_symbols_supplement
|
|
356
368
|
inmiscellaneoustechnical,in_miscellaneous_technical
|
|
357
369
|
inmodi,in_modi
|
|
358
370
|
inmodifiertoneletters,in_modifier_tone_letters
|
|
@@ -364,6 +376,7 @@ inmusicalsymbols,in_musical_symbols
|
|
|
364
376
|
inmyanmar,in_myanmar
|
|
365
377
|
inmyanmarextendeda,in_myanmar_extended_a
|
|
366
378
|
inmyanmarextendedb,in_myanmar_extended_b
|
|
379
|
+
inmyanmarextendedc,in_myanmar_extended_c
|
|
367
380
|
innabataean,in_nabataean
|
|
368
381
|
innagmundari,in_nag_mundari
|
|
369
382
|
innandinagari,in_nandinagari
|
|
@@ -385,6 +398,7 @@ inoldsogdian,in_old_sogdian
|
|
|
385
398
|
inoldsoutharabian,in_old_south_arabian
|
|
386
399
|
inoldturkic,in_old_turkic
|
|
387
400
|
inolduyghur,in_old_uyghur
|
|
401
|
+
inolonal,in_ol_onal
|
|
388
402
|
inopticalcharacterrecognition,in_optical_character_recognition
|
|
389
403
|
inoriya,in_oriya
|
|
390
404
|
inornamentaldingbats,in_ornamental_dingbats
|
|
@@ -410,9 +424,11 @@ insaurashtra,in_saurashtra
|
|
|
410
424
|
inscriptionalpahlavi,inscriptional_pahlavi
|
|
411
425
|
inscriptionalparthian,inscriptional_parthian
|
|
412
426
|
insharada,in_sharada
|
|
427
|
+
insharadasupplement,in_sharada_supplement
|
|
413
428
|
inshavian,in_shavian
|
|
414
429
|
inshorthandformatcontrols,in_shorthand_format_controls
|
|
415
430
|
insiddham,in_siddham
|
|
431
|
+
insidetic,in_sidetic
|
|
416
432
|
insinhala,in_sinhala
|
|
417
433
|
insinhalaarchaicnumbers,in_sinhala_archaic_numbers
|
|
418
434
|
insmallformvariants,in_small_form_variants
|
|
@@ -424,6 +440,7 @@ inspacingmodifierletters,in_spacing_modifier_letters
|
|
|
424
440
|
inspecials,in_specials
|
|
425
441
|
insundanese,in_sundanese
|
|
426
442
|
insundanesesupplement,in_sundanese_supplement
|
|
443
|
+
insunuwar,in_sunuwar
|
|
427
444
|
insuperscriptsandsubscripts,in_superscripts_and_subscripts
|
|
428
445
|
insupplementalarrowsa,in_supplemental_arrows_a
|
|
429
446
|
insupplementalarrowsb,in_supplemental_arrows_b
|
|
@@ -437,6 +454,7 @@ insuttonsignwriting,in_sutton_signwriting
|
|
|
437
454
|
insylotinagri,in_syloti_nagri
|
|
438
455
|
insymbolsandpictographsextendeda,in_symbols_and_pictographs_extended_a
|
|
439
456
|
insymbolsforlegacycomputing,in_symbols_for_legacy_computing
|
|
457
|
+
insymbolsforlegacycomputingsupplement,in_symbols_for_legacy_computing_supplement
|
|
440
458
|
insyriac,in_syriac
|
|
441
459
|
insyriacsupplement,in_syriac_supplement
|
|
442
460
|
intagalog,in_tagalog
|
|
@@ -446,12 +464,14 @@ intaile,in_tai_le
|
|
|
446
464
|
intaitham,in_tai_tham
|
|
447
465
|
intaiviet,in_tai_viet
|
|
448
466
|
intaixuanjingsymbols,in_tai_xuan_jing_symbols
|
|
467
|
+
intaiyo,in_tai_yo
|
|
449
468
|
intakri,in_takri
|
|
450
469
|
intamil,in_tamil
|
|
451
470
|
intamilsupplement,in_tamil_supplement
|
|
452
471
|
intangsa,in_tangsa
|
|
453
472
|
intangut,in_tangut
|
|
454
473
|
intangutcomponents,in_tangut_components
|
|
474
|
+
intangutcomponentssupplement,in_tangut_components_supplement
|
|
455
475
|
intangutsupplement,in_tangut_supplement
|
|
456
476
|
intelugu,in_telugu
|
|
457
477
|
inthaana,in_thaana
|
|
@@ -459,8 +479,11 @@ inthai,in_thai
|
|
|
459
479
|
intibetan,in_tibetan
|
|
460
480
|
intifinagh,in_tifinagh
|
|
461
481
|
intirhuta,in_tirhuta
|
|
482
|
+
intodhri,in_todhri
|
|
483
|
+
intolongsiki,in_tolong_siki
|
|
462
484
|
intoto,in_toto
|
|
463
485
|
intransportandmapsymbols,in_transport_and_map_symbols
|
|
486
|
+
intulutigalari,in_tulu_tigalari
|
|
464
487
|
inugaritic,in_ugaritic
|
|
465
488
|
inunifiedcanadianaboriginalsyllabics,in_unified_canadian_aboriginal_syllabics
|
|
466
489
|
inunifiedcanadianaboriginalsyllabicsextended,in_unified_canadian_aboriginal_syllabics_extended
|
|
@@ -491,6 +514,7 @@ khitansmallscript,khitan_small_script
|
|
|
491
514
|
khmer,khmer
|
|
492
515
|
khojki,khojki
|
|
493
516
|
khudawadi,khudawadi
|
|
517
|
+
kiratrai,kirat_rai
|
|
494
518
|
lao,lao
|
|
495
519
|
latin,latin
|
|
496
520
|
lepcha,lepcha
|
|
@@ -524,6 +548,7 @@ meroiticcursive,meroitic_cursive
|
|
|
524
548
|
meroitichieroglyphs,meroitic_hieroglyphs
|
|
525
549
|
miao,miao
|
|
526
550
|
modi,modi
|
|
551
|
+
modifiercombiningmark,modifier_combining_mark
|
|
527
552
|
modifierletter,modifier_letter
|
|
528
553
|
modifiersymbol,modifier_symbol
|
|
529
554
|
mongolian,mongolian
|
|
@@ -553,6 +578,7 @@ oldsogdian,old_sogdian
|
|
|
553
578
|
oldsoutharabian,old_south_arabian
|
|
554
579
|
oldturkic,old_turkic
|
|
555
580
|
olduyghur,old_uyghur
|
|
581
|
+
olonal,ol_onal
|
|
556
582
|
openpunctuation,open_punctuation
|
|
557
583
|
oriya,oriya
|
|
558
584
|
osage,osage
|
|
@@ -596,6 +622,7 @@ separator,separator
|
|
|
596
622
|
sharada,sharada
|
|
597
623
|
shavian,shavian
|
|
598
624
|
siddham,siddham
|
|
625
|
+
sidetic,sidetic
|
|
599
626
|
signwriting,signwriting
|
|
600
627
|
sinhala,sinhala
|
|
601
628
|
softdotted,soft_dotted
|
|
@@ -606,6 +633,7 @@ space,space
|
|
|
606
633
|
spaceseparator,space_separator
|
|
607
634
|
spacingmark,spacing_mark
|
|
608
635
|
sundanese,sundanese
|
|
636
|
+
sunuwar,sunuwar
|
|
609
637
|
surrogate,surrogate
|
|
610
638
|
sylotinagri,syloti_nagri
|
|
611
639
|
symbol,symbol
|
|
@@ -615,6 +643,7 @@ tagbanwa,tagbanwa
|
|
|
615
643
|
taile,tai_le
|
|
616
644
|
taitham,tai_tham
|
|
617
645
|
taiviet,tai_viet
|
|
646
|
+
taiyo,tai_yo
|
|
618
647
|
takri,takri
|
|
619
648
|
tamil,tamil
|
|
620
649
|
tangsa,tangsa
|
|
@@ -627,7 +656,10 @@ tibetan,tibetan
|
|
|
627
656
|
tifinagh,tifinagh
|
|
628
657
|
tirhuta,tirhuta
|
|
629
658
|
titlecaseletter,titlecase_letter
|
|
659
|
+
todhri,todhri
|
|
660
|
+
tolongsiki,tolong_siki
|
|
630
661
|
toto,toto
|
|
662
|
+
tulutigalari,tulu_tigalari
|
|
631
663
|
ugaritic,ugaritic
|
|
632
664
|
unassigned,unassigned
|
|
633
665
|
unifiedideograph,unified_ideograph
|