regexp_parser 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ccb0d0c3dacb9f2285a8c2458dd9e8a207a4c836
4
- data.tar.gz: 27f379340236393b559aa622bd2763d3d27ec3a1
3
+ metadata.gz: 682c13ce2716430eea274098ce40b3e842b47eba
4
+ data.tar.gz: 15faf04abe034eb801292781bb2d1503f0b1df0d
5
5
  SHA512:
6
- metadata.gz: 29fe46ace91ba3b6b106423fa212707e98ccc7f1559094f14bc9a017f4ec28e2ab9deae1e000a71c8329d65791445e73f004eda77cbd7293865ae518df1b894d
7
- data.tar.gz: 0f6f3530c2de9e60782ed914deb594137ce37735b21a50f305f1548df06896d6618b95cd69e25de9dcc0a58b03d8a08306355e67c47478e4ef09cb278caf815c
6
+ metadata.gz: 5991a90cc872e72b4361692deb7d5370c70f5baafeed5afe3e90db4a95947bdaf7e072f77c6e2ca07eb839c79bcebd9022312bcbe3f8ccfc59b84b445d889c1c
7
+ data.tar.gz: 46d4cebfd6c904002da41db9cfc7747d42775c6e55d9e2853a4fca1a18652ef368e381057dd8a83fa59908c68f2f3c45e6bee19b0a3b8fa282036cfcdef90443
data/ChangeLog CHANGED
@@ -1,3 +1,15 @@
1
+ Sun Aug 6 2015 Ammar Ali <ammarabuali@gmail.com>
2
+
3
+ * Added UnicodeBlocks support to the parser.
4
+
5
+ Mon Aug 3 2015 Garen Torikian <gjtorikian@gmail.com>
6
+
7
+ * Added UnicodeBlocks support to the scanner.
8
+
9
+ Sat Apr 18 14:38:12 2015 Ammar Ali <ammarabuali@gmail.com>
10
+
11
+ * Updated ruby versions for latest releases.
12
+
1
13
  Wed Dec 3 05:21:27 2014 Ammar Ali <ammarabuali@gmail.com>
2
14
 
3
15
  * Added expand_members method to CharacterSet, returns traditional
data/README.md CHANGED
@@ -355,12 +355,7 @@ _Note that not all of these are available in all versions of Ruby_
355
355
  | &emsp;&nbsp;_**General Categories**_ | `\p{Lu}`, `\P{Cs}` | &#x2713; |
356
356
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}` | &#x2713; |
357
357
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}` | &#x2713; |
358
-
359
-
360
- <br/>
361
- ##### Missing Features
362
-
363
- - Unicode blocks, e.g. \p{InArrows}, \p{InArmenian}. _(h/t @gjtorikian for pointing it out)_
358
+ | &emsp;&nbsp;_**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}` | &#x2713; |
364
359
 
365
360
  ##### Inapplicable Features
366
361
 
@@ -103,6 +103,7 @@ module Regexp::Expression
103
103
  class Age < UnicodeProperty::Base; end
104
104
  class Derived < UnicodeProperty::Base; end
105
105
  class Script < UnicodeProperty::Base; end
106
+ class Block < UnicodeProperty::Base; end
106
107
  end
107
108
 
108
109
  end # module Regexp::Expression
@@ -270,6 +270,9 @@ module Regexp::Parser
270
270
  when *Regexp::Syntax::Token::UnicodeProperty::Script
271
271
  @node << Script.new(token)
272
272
 
273
+ when *Regexp::Syntax::Token::UnicodeProperty::UnicodeBlock
274
+ @node << Block.new(token)
275
+
273
276
  else
274
277
  raise UnknownTokenError.new('UnicodeProperty', token)
275
278
  end
@@ -346,9 +346,9 @@ self._re_scanner_indicies = [
346
346
  40, 40, 40, 40, 40, 40, 40, 40,
347
347
  40, 40, 40, 40, 40, 40, 40, 40,
348
348
  41, 40, 40, 40, 40, 40, 40, 40,
349
- 40, 40, 40, 40, 40, 40, 40, 40,
350
- 40, 40, 40, 40, 40, 40, 40, 40,
351
- 40, 40, 40, 40, 40, 40, 40, 40,
349
+ 40, 40, 40, 40, 40, 41, 40, 40,
350
+ 41, 41, 41, 41, 41, 41, 41, 41,
351
+ 41, 41, 40, 40, 40, 40, 40, 40,
352
352
  40, 42, 41, 41, 41, 41, 41, 41,
353
353
  41, 41, 41, 41, 41, 41, 41, 41,
354
354
  41, 41, 41, 41, 41, 41, 41, 41,
@@ -361,8 +361,8 @@ self._re_scanner_indicies = [
361
361
  40, 40, 40, 40, 40, 40, 40, 40,
362
362
  40, 40, 40, 41, 40, 40, 40, 40,
363
363
  40, 40, 40, 40, 40, 40, 40, 40,
364
- 40, 40, 40, 40, 40, 40, 40, 40,
365
- 40, 40, 40, 40, 40, 40, 40, 40,
364
+ 41, 40, 40, 41, 41, 41, 41, 41,
365
+ 41, 41, 41, 41, 41, 40, 40, 40,
366
366
  40, 40, 40, 40, 41, 41, 41, 41,
367
367
  41, 41, 41, 41, 41, 41, 41, 41,
368
368
  41, 41, 41, 41, 41, 41, 41, 41,
@@ -375,9 +375,9 @@ self._re_scanner_indicies = [
375
375
  40, 40, 40, 40, 40, 40, 40, 40,
376
376
  40, 40, 40, 40, 40, 40, 40, 40,
377
377
  40, 41, 40, 40, 40, 40, 40, 40,
378
- 40, 40, 40, 40, 40, 40, 40, 40,
379
- 40, 40, 40, 40, 40, 40, 40, 40,
380
- 40, 40, 40, 40, 40, 40, 40, 40,
378
+ 40, 40, 40, 40, 40, 40, 41, 40,
379
+ 40, 41, 41, 41, 41, 41, 41, 41,
380
+ 41, 41, 41, 40, 40, 40, 40, 40,
381
381
  40, 40, 41, 41, 41, 41, 41, 41,
382
382
  45, 41, 41, 41, 41, 41, 41, 41,
383
383
  41, 41, 41, 41, 41, 41, 41, 41,
@@ -390,9 +390,9 @@ self._re_scanner_indicies = [
390
390
  40, 40, 40, 40, 40, 40, 40, 40,
391
391
  40, 40, 40, 40, 40, 40, 40, 41,
392
392
  40, 40, 40, 40, 40, 40, 40, 40,
393
- 40, 40, 40, 40, 40, 40, 40, 40,
394
- 40, 40, 40, 40, 40, 40, 40, 40,
395
- 40, 40, 40, 40, 40, 40, 40, 40,
393
+ 40, 40, 40, 40, 41, 40, 40, 41,
394
+ 41, 41, 41, 41, 41, 41, 41, 41,
395
+ 41, 40, 40, 40, 40, 40, 40, 40,
396
396
  41, 41, 41, 41, 46, 41, 41, 41,
397
397
  41, 41, 41, 41, 41, 41, 41, 41,
398
398
  41, 41, 41, 41, 41, 41, 41, 41,
@@ -405,8 +405,8 @@ self._re_scanner_indicies = [
405
405
  40, 40, 40, 40, 40, 40, 40, 40,
406
406
  40, 40, 40, 40, 40, 41, 40, 40,
407
407
  40, 40, 40, 40, 40, 40, 40, 40,
408
- 40, 40, 40, 40, 40, 40, 40, 40,
409
- 40, 40, 40, 40, 40, 40, 40, 40,
408
+ 40, 40, 41, 40, 40, 41, 41, 41,
409
+ 41, 41, 41, 41, 41, 41, 41, 40,
410
410
  40, 40, 47, 40, 40, 40, 41, 41,
411
411
  41, 41, 41, 41, 41, 41, 41, 41,
412
412
  41, 41, 41, 41, 41, 41, 41, 41,
@@ -424,9 +424,9 @@ self._re_scanner_indicies = [
424
424
  40, 40, 40, 40, 40, 40, 40, 40,
425
425
  40, 40, 40, 40, 40, 40, 40, 41,
426
426
  40, 40, 40, 40, 40, 40, 40, 40,
427
- 40, 40, 40, 40, 40, 40, 40, 40,
428
- 40, 40, 40, 40, 40, 40, 40, 40,
429
- 40, 40, 40, 40, 40, 40, 40, 40,
427
+ 40, 40, 40, 40, 41, 40, 40, 41,
428
+ 41, 41, 41, 41, 41, 41, 41, 41,
429
+ 41, 40, 40, 40, 40, 40, 40, 40,
430
430
  42, 41, 41, 41, 41, 41, 41, 41,
431
431
  41, 41, 41, 41, 41, 41, 41, 41,
432
432
  41, 41, 41, 41, 41, 41, 41, 41,
@@ -2205,7 +2205,7 @@ te = p+1
2205
2205
  self.emit(type, :script_tagalog, text, ts-1, te)
2206
2206
  when 'thaa', 'thaana'
2207
2207
  self.emit(type, :script_thaana, text, ts-1, te)
2208
- when 'thai'
2208
+ when 'thai'
2209
2209
  self.emit(type, :script_thai, text, ts-1, te)
2210
2210
  when 'tibt', 'tibetan'
2211
2211
  self.emit(type, :script_tibetan, text, ts-1, te)
@@ -2230,6 +2230,220 @@ te = p+1
2230
2230
  when 'zzzz', 'unknown'
2231
2231
  self.emit(type, :script_unknown, text, ts-1, te)
2232
2232
 
2233
+ # Unicode blocks
2234
+ when 'inalphabeticpresentationforms'
2235
+ self.emit(type, :block_inalphabetic_presentation_forms, text, ts-1, te)
2236
+ when 'inalphabeticpresentationforms'
2237
+ self.emit(type, :block_inalphabetic_presentation_forms, text, ts-1, te)
2238
+ when 'inarabicpresentationforms-a'
2239
+ self.emit(type, :block_inarabic_presentation_forms_a, text, ts-1, te)
2240
+ when 'inarabicpresentationforms-b'
2241
+ self.emit(type, :block_inarabic_presentation_forms_b, text, ts-1, te)
2242
+ when 'inarabic'
2243
+ self.emit(type, :block_inarabic, text, ts-1, te)
2244
+ when 'inarmenian'
2245
+ self.emit(type, :block_inarmenian, text, ts-1, te)
2246
+ when 'inarrows'
2247
+ self.emit(type, :block_inarrows, text, ts-1, te)
2248
+ when 'inbasiclatin'
2249
+ self.emit(type, :block_inbasic_latin, text, ts-1, te)
2250
+ when 'inbengali'
2251
+ self.emit(type, :block_inbengali, text, ts-1, te)
2252
+ when 'inblockelements'
2253
+ self.emit(type, :block_inblock_elements, text, ts-1, te)
2254
+ when 'inbopomofoextended'
2255
+ self.emit(type, :block_inbopomofo_extended, text, ts-1, te)
2256
+ when 'inbopomofo'
2257
+ self.emit(type, :block_inbopomofo, text, ts-1, te)
2258
+ when 'inboxdrawing'
2259
+ self.emit(type, :block_inbox_drawing, text, ts-1, te)
2260
+ when 'inbraillepatterns'
2261
+ self.emit(type, :block_inbraille_patterns, text, ts-1, te)
2262
+ when 'inbuhid'
2263
+ self.emit(type, :block_inbuhid, text, ts-1, te)
2264
+ when 'incjkcompatibilityforms'
2265
+ self.emit(type, :block_incjk_compatibility_forms, text, ts-1, te)
2266
+ when 'incjkcompatibilityideographs'
2267
+ self.emit(type, :block_incjk_compatibility_ideographs, text, ts-1, te)
2268
+ when 'incjkcompatibility'
2269
+ self.emit(type, :block_incjk_compatibility, text, ts-1, te)
2270
+ when 'incjkradicalssupplement'
2271
+ self.emit(type, :block_incjk_radicals_supplement, text, ts-1, te)
2272
+ when 'incjksymbolsandpunctuation'
2273
+ self.emit(type, :block_incjk_symbols_and_punctuation, text, ts-1, te)
2274
+ when 'incjkunifiedideographsextensiona'
2275
+ self.emit(type, :block_incjk_unified_ideographs_extension_a, text, ts-1, te)
2276
+ when 'incjkunifiedideographs'
2277
+ self.emit(type, :block_incjk_unified_ideographs, text, ts-1, te)
2278
+ when 'incherokee'
2279
+ self.emit(type, :block_incherokee, text, ts-1, te)
2280
+ when 'incombiningdiacriticalmarksforsymbols'
2281
+ self.emit(type, :block_incombining_diacritical_marks_for_symbols, text, ts-1, te)
2282
+ when 'incombiningdiacriticalmarks'
2283
+ self.emit(type, :block_incombining_diacritical_marks, text, ts-1, te)
2284
+ when 'incombininghalfmarks'
2285
+ self.emit(type, :block_incombining_half_marks, text, ts-1, te)
2286
+ when 'incontrolpictures'
2287
+ self.emit(type, :block_incontrol_pictures, text, ts-1, te)
2288
+ when 'incurrencysymbols'
2289
+ self.emit(type, :block_incurrency_symbols, text, ts-1, te)
2290
+ when 'incyrillicsupplementary'
2291
+ self.emit(type, :block_incyrillic_supplementary, text, ts-1, te)
2292
+ when 'incyrillic'
2293
+ self.emit(type, :block_incyrillic, text, ts-1, te)
2294
+ when 'indevanagari'
2295
+ self.emit(type, :block_indevanagari, text, ts-1, te)
2296
+ when 'indingbats'
2297
+ self.emit(type, :block_indingbats, text, ts-1, te)
2298
+ when 'inenclosedalphanumerics'
2299
+ self.emit(type, :block_inenclosed_alphanumerics, text, ts-1, te)
2300
+ when 'inenclosedcjklettersandmonths'
2301
+ self.emit(type, :block_inenclosed_cjk_letters_and_months, text, ts-1, te)
2302
+ when 'inethiopic'
2303
+ self.emit(type, :block_inethiopic, text, ts-1, te)
2304
+ when 'ingeneralpunctuation'
2305
+ self.emit(type, :block_ingeneral_punctuation, text, ts-1, te)
2306
+ when 'ingeometricshapes'
2307
+ self.emit(type, :block_ingeometric_shapes, text, ts-1, te)
2308
+ when 'ingeorgian'
2309
+ self.emit(type, :block_ingeorgian, text, ts-1, te)
2310
+ when 'ingreekextended'
2311
+ self.emit(type, :block_ingreek_extended, text, ts-1, te)
2312
+ when 'ingreekandcoptic'
2313
+ self.emit(type, :block_ingreek_and_coptic, text, ts-1, te)
2314
+ when 'ingujarati'
2315
+ self.emit(type, :block_ingujarati, text, ts-1, te)
2316
+ when 'ingurmukhi'
2317
+ self.emit(type, :block_ingurmukhi, text, ts-1, te)
2318
+ when 'inhalfwidthandfullwidthforms'
2319
+ self.emit(type, :block_inhalfwidth_and_fullwidth_forms, text, ts-1, te)
2320
+ when 'inhangulcompatibilityjamo'
2321
+ self.emit(type, :block_inhangul_compatibility_jamo, text, ts-1, te)
2322
+ when 'inhanguljamo'
2323
+ self.emit(type, :block_inhangul_jamo, text, ts-1, te)
2324
+ when 'inhangulsyllables'
2325
+ self.emit(type, :block_inhangul_syllables, text, ts-1, te)
2326
+ when 'inhanunoo'
2327
+ self.emit(type, :block_inhanunoo, text, ts-1, te)
2328
+ when 'inhebrew'
2329
+ self.emit(type, :block_inhebrew, text, ts-1, te)
2330
+ when 'inhighprivateusesurrogates'
2331
+ self.emit(type, :block_inhigh_private_use_surrogates, text, ts-1, te)
2332
+ when 'inhighsurrogates'
2333
+ self.emit(type, :block_inhigh_surrogates, text, ts-1, te)
2334
+ when 'inhiragana'
2335
+ self.emit(type, :block_inhiragana, text, ts-1, te)
2336
+ when 'inipaextensions'
2337
+ self.emit(type, :block_inipa_extensions, text, ts-1, te)
2338
+ when 'inideographicdescriptioncharacters'
2339
+ self.emit(type, :block_inideographic_description_characters, text, ts-1, te)
2340
+ when 'inkanbun'
2341
+ self.emit(type, :block_inkanbun, text, ts-1, te)
2342
+ when 'inkangxiradicals'
2343
+ self.emit(type, :block_inkangxi_radicals, text, ts-1, te)
2344
+ when 'inkannada'
2345
+ self.emit(type, :block_inkannada, text, ts-1, te)
2346
+ when 'inkatakanaphoneticextensions'
2347
+ self.emit(type, :block_inkatakana_phonetic_extensions, text, ts-1, te)
2348
+ when 'inkatakana'
2349
+ self.emit(type, :block_inkatakana, text, ts-1, te)
2350
+ when 'inkhmersymbols'
2351
+ self.emit(type, :block_inkhmer_symbols, text, ts-1, te)
2352
+ when 'inkhmer'
2353
+ self.emit(type, :block_inkhmer, text, ts-1, te)
2354
+ when 'inlao'
2355
+ self.emit(type, :block_inlao, text, ts-1, te)
2356
+ when 'inlatin-1supplement'
2357
+ self.emit(type, :block_inlatin_1_supplement, text, ts-1, te)
2358
+ when 'inlatinextended-a'
2359
+ self.emit(type, :block_inlatin_extended_a, text, ts-1, te)
2360
+ when 'inlatinextended-b'
2361
+ self.emit(type, :block_inlatin_extended_b, text, ts-1, te)
2362
+ when 'inlatinextendedadditional'
2363
+ self.emit(type, :block_inlatin_extended_additional, text, ts-1, te)
2364
+ when 'inletterlikesymbols'
2365
+ self.emit(type, :block_inletterlike_symbols, text, ts-1, te)
2366
+ when 'inlimbu'
2367
+ self.emit(type, :block_inlimbu, text, ts-1, te)
2368
+ when 'inlowsurrogates'
2369
+ self.emit(type, :block_inlow_surrogates, text, ts-1, te)
2370
+ when 'inmalayalam'
2371
+ self.emit(type, :block_inmalayalam, text, ts-1, te)
2372
+ when 'inmathematicaloperators'
2373
+ self.emit(type, :block_inmathematical_operators, text, ts-1, te)
2374
+ when 'inmiscellaneousmathematicalsymbols-a'
2375
+ self.emit(type, :block_inmiscellaneous_mathematical_symbols_a, text, ts-1, te)
2376
+ when 'inmiscellaneousmathematicalsymbols-b'
2377
+ self.emit(type, :block_inmiscellaneous_mathematical_symbols_b, text, ts-1, te)
2378
+ when 'inmiscellaneoussymbolsandarrows'
2379
+ self.emit(type, :block_inmiscellaneous_symbols_and_arrows, text, ts-1, te)
2380
+ when 'inmiscellaneoussymbols'
2381
+ self.emit(type, :block_inmiscellaneous_symbols, text, ts-1, te)
2382
+ when 'inmiscellaneoustechnical'
2383
+ self.emit(type, :block_inmiscellaneous_technical, text, ts-1, te)
2384
+ when 'inmongolian'
2385
+ self.emit(type, :block_inmongolian, text, ts-1, te)
2386
+ when 'inmyanmar'
2387
+ self.emit(type, :block_inmyanmar, text, ts-1, te)
2388
+ when 'innumberforms'
2389
+ self.emit(type, :block_innumber_forms, text, ts-1, te)
2390
+ when 'inogham'
2391
+ self.emit(type, :block_inogham, text, ts-1, te)
2392
+ when 'inopticalcharacterrecognition'
2393
+ self.emit(type, :block_inoptical_character_recognition, text, ts-1, te)
2394
+ when 'inoriya'
2395
+ self.emit(type, :block_inoriya, text, ts-1, te)
2396
+ when 'inphoneticextensions'
2397
+ self.emit(type, :block_inphonetic_extensions, text, ts-1, te)
2398
+ when 'inprivateusearea'
2399
+ self.emit(type, :block_inprivate_use_area, text, ts-1, te)
2400
+ when 'inrunic'
2401
+ self.emit(type, :block_inrunic, text, ts-1, te)
2402
+ when 'insinhala'
2403
+ self.emit(type, :block_insinhala, text, ts-1, te)
2404
+ when 'insmallformvariants'
2405
+ self.emit(type, :block_insmall_form_variants, text, ts-1, te)
2406
+ when 'inspacingmodifierletters'
2407
+ self.emit(type, :block_inspacing_modifier_letters, text, ts-1, te)
2408
+ when 'inspecials'
2409
+ self.emit(type, :block_inspecials, text, ts-1, te)
2410
+ when 'insuperscriptsandsubscripts'
2411
+ self.emit(type, :block_insuperscripts_and_subscripts, text, ts-1, te)
2412
+ when 'insupplementalarrows-a'
2413
+ self.emit(type, :block_insupplemental_arrows_a, text, ts-1, te)
2414
+ when 'insupplementalarrows-b'
2415
+ self.emit(type, :block_insupplemental_arrows_b, text, ts-1, te)
2416
+ when 'insupplementalmathematicaloperators'
2417
+ self.emit(type, :block_insupplemental_mathematical_operators, text, ts-1, te)
2418
+ when 'insyriac'
2419
+ self.emit(type, :block_insyriac, text, ts-1, te)
2420
+ when 'intagalog'
2421
+ self.emit(type, :block_intagalog, text, ts-1, te)
2422
+ when 'intagbanwa'
2423
+ self.emit(type, :block_intagbanwa, text, ts-1, te)
2424
+ when 'intaile'
2425
+ self.emit(type, :block_intai_le, text, ts-1, te)
2426
+ when 'intamil'
2427
+ self.emit(type, :block_intamil, text, ts-1, te)
2428
+ when 'intelugu'
2429
+ self.emit(type, :block_intelugu, text, ts-1, te)
2430
+ when 'inthaana'
2431
+ self.emit(type, :block_inthaana, text, ts-1, te)
2432
+ when 'inthai'
2433
+ self.emit(type, :block_inthai, text, ts-1, te)
2434
+ when 'intibetan'
2435
+ self.emit(type, :block_intibetan, text, ts-1, te)
2436
+ when 'inunifiedcanadianaboriginalsyllabics'
2437
+ self.emit(type, :block_inunified_canadian_aboriginal_syllabics, text, ts-1, te)
2438
+ when 'invariationselectors'
2439
+ self.emit(type, :block_invariation_selectors, text, ts-1, te)
2440
+ when 'inyiradicals'
2441
+ self.emit(type, :block_inyi_radicals, text, ts-1, te)
2442
+ when 'inyisyllables'
2443
+ self.emit(type, :block_inyi_syllables, text, ts-1, te)
2444
+ when 'inyijinghexagramsymbols'
2445
+ self.emit(type, :block_inyijing_hexagram_symbols, text, ts-1, te)
2446
+
2233
2447
  else
2234
2448
  # Should this really be an error? Or would emitting
2235
2449
  # an :unknown for the property be better?
@@ -3791,7 +4005,7 @@ te = p+1
3791
4005
  # line 764 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner/scanner.rl"
3792
4006
  begin
3793
4007
  act = 59; end
3794
- # line 3795 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb"
4008
+ # line 4009 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb"
3795
4009
  end
3796
4010
  end
3797
4011
  end
@@ -3809,7 +4023,7 @@ ts = nil; end
3809
4023
  begin
3810
4024
  act = 0
3811
4025
  end
3812
- # line 3813 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb"
4026
+ # line 4027 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb"
3813
4027
  end
3814
4028
 
3815
4029
  if cs == 0
@@ -3843,7 +4057,7 @@ act = 0
3843
4057
  text = ts ? copy(data, ts-1..-1) : data.pack('c*')
3844
4058
  raise PrematureEndError.new( text )
3845
4059
  end
3846
- # line 3847 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb"
4060
+ # line 4061 "/Users/ammar/src/ruby/projects/regexp_parser/lib/regexp_parser/scanner.rb"
3847
4061
  end
3848
4062
  end
3849
4063
 
@@ -33,7 +33,7 @@
33
33
  'id_start'i | 'id_continue'i |
34
34
  'xid_start'i | 'xid_continue'i |
35
35
  'grapheme_base'i | 'grapheme_extend'i |
36
- 'default_ignorable_code_point'i;
36
+ 'default_ignorable_code_point'i;
37
37
 
38
38
  property_age = 'age=1.1'i | 'age=2.0'i | 'age=2.1'i |
39
39
  'age=3.0'i | 'age=3.1'i | 'age=3.2'i |
@@ -42,7 +42,7 @@
42
42
  'age=6.1'i | 'age=6.2'i | 'age=6.3'i |
43
43
  'age=7.0'i;
44
44
 
45
- property_script = (alpha | space | '_')+; # everything else
45
+ property_script = (alnum | space | '_' | '-')+; # everything else
46
46
 
47
47
  property_sequence = property_char . '{' . '^'? (
48
48
  property_name | general_category |
@@ -553,7 +553,7 @@
553
553
  self.emit(type, :script_tagalog, text, ts-1, te)
554
554
  when 'thaa', 'thaana'
555
555
  self.emit(type, :script_thaana, text, ts-1, te)
556
- when 'thai'
556
+ when 'thai'
557
557
  self.emit(type, :script_thai, text, ts-1, te)
558
558
  when 'tibt', 'tibetan'
559
559
  self.emit(type, :script_tibetan, text, ts-1, te)
@@ -578,6 +578,220 @@
578
578
  when 'zzzz', 'unknown'
579
579
  self.emit(type, :script_unknown, text, ts-1, te)
580
580
 
581
+ # Unicode blocks
582
+ when 'inalphabeticpresentationforms'
583
+ self.emit(type, :block_inalphabetic_presentation_forms, text, ts-1, te)
584
+ when 'inalphabeticpresentationforms'
585
+ self.emit(type, :block_inalphabetic_presentation_forms, text, ts-1, te)
586
+ when 'inarabicpresentationforms-a'
587
+ self.emit(type, :block_inarabic_presentation_forms_a, text, ts-1, te)
588
+ when 'inarabicpresentationforms-b'
589
+ self.emit(type, :block_inarabic_presentation_forms_b, text, ts-1, te)
590
+ when 'inarabic'
591
+ self.emit(type, :block_inarabic, text, ts-1, te)
592
+ when 'inarmenian'
593
+ self.emit(type, :block_inarmenian, text, ts-1, te)
594
+ when 'inarrows'
595
+ self.emit(type, :block_inarrows, text, ts-1, te)
596
+ when 'inbasiclatin'
597
+ self.emit(type, :block_inbasic_latin, text, ts-1, te)
598
+ when 'inbengali'
599
+ self.emit(type, :block_inbengali, text, ts-1, te)
600
+ when 'inblockelements'
601
+ self.emit(type, :block_inblock_elements, text, ts-1, te)
602
+ when 'inbopomofoextended'
603
+ self.emit(type, :block_inbopomofo_extended, text, ts-1, te)
604
+ when 'inbopomofo'
605
+ self.emit(type, :block_inbopomofo, text, ts-1, te)
606
+ when 'inboxdrawing'
607
+ self.emit(type, :block_inbox_drawing, text, ts-1, te)
608
+ when 'inbraillepatterns'
609
+ self.emit(type, :block_inbraille_patterns, text, ts-1, te)
610
+ when 'inbuhid'
611
+ self.emit(type, :block_inbuhid, text, ts-1, te)
612
+ when 'incjkcompatibilityforms'
613
+ self.emit(type, :block_incjk_compatibility_forms, text, ts-1, te)
614
+ when 'incjkcompatibilityideographs'
615
+ self.emit(type, :block_incjk_compatibility_ideographs, text, ts-1, te)
616
+ when 'incjkcompatibility'
617
+ self.emit(type, :block_incjk_compatibility, text, ts-1, te)
618
+ when 'incjkradicalssupplement'
619
+ self.emit(type, :block_incjk_radicals_supplement, text, ts-1, te)
620
+ when 'incjksymbolsandpunctuation'
621
+ self.emit(type, :block_incjk_symbols_and_punctuation, text, ts-1, te)
622
+ when 'incjkunifiedideographsextensiona'
623
+ self.emit(type, :block_incjk_unified_ideographs_extension_a, text, ts-1, te)
624
+ when 'incjkunifiedideographs'
625
+ self.emit(type, :block_incjk_unified_ideographs, text, ts-1, te)
626
+ when 'incherokee'
627
+ self.emit(type, :block_incherokee, text, ts-1, te)
628
+ when 'incombiningdiacriticalmarksforsymbols'
629
+ self.emit(type, :block_incombining_diacritical_marks_for_symbols, text, ts-1, te)
630
+ when 'incombiningdiacriticalmarks'
631
+ self.emit(type, :block_incombining_diacritical_marks, text, ts-1, te)
632
+ when 'incombininghalfmarks'
633
+ self.emit(type, :block_incombining_half_marks, text, ts-1, te)
634
+ when 'incontrolpictures'
635
+ self.emit(type, :block_incontrol_pictures, text, ts-1, te)
636
+ when 'incurrencysymbols'
637
+ self.emit(type, :block_incurrency_symbols, text, ts-1, te)
638
+ when 'incyrillicsupplementary'
639
+ self.emit(type, :block_incyrillic_supplementary, text, ts-1, te)
640
+ when 'incyrillic'
641
+ self.emit(type, :block_incyrillic, text, ts-1, te)
642
+ when 'indevanagari'
643
+ self.emit(type, :block_indevanagari, text, ts-1, te)
644
+ when 'indingbats'
645
+ self.emit(type, :block_indingbats, text, ts-1, te)
646
+ when 'inenclosedalphanumerics'
647
+ self.emit(type, :block_inenclosed_alphanumerics, text, ts-1, te)
648
+ when 'inenclosedcjklettersandmonths'
649
+ self.emit(type, :block_inenclosed_cjk_letters_and_months, text, ts-1, te)
650
+ when 'inethiopic'
651
+ self.emit(type, :block_inethiopic, text, ts-1, te)
652
+ when 'ingeneralpunctuation'
653
+ self.emit(type, :block_ingeneral_punctuation, text, ts-1, te)
654
+ when 'ingeometricshapes'
655
+ self.emit(type, :block_ingeometric_shapes, text, ts-1, te)
656
+ when 'ingeorgian'
657
+ self.emit(type, :block_ingeorgian, text, ts-1, te)
658
+ when 'ingreekextended'
659
+ self.emit(type, :block_ingreek_extended, text, ts-1, te)
660
+ when 'ingreekandcoptic'
661
+ self.emit(type, :block_ingreek_and_coptic, text, ts-1, te)
662
+ when 'ingujarati'
663
+ self.emit(type, :block_ingujarati, text, ts-1, te)
664
+ when 'ingurmukhi'
665
+ self.emit(type, :block_ingurmukhi, text, ts-1, te)
666
+ when 'inhalfwidthandfullwidthforms'
667
+ self.emit(type, :block_inhalfwidth_and_fullwidth_forms, text, ts-1, te)
668
+ when 'inhangulcompatibilityjamo'
669
+ self.emit(type, :block_inhangul_compatibility_jamo, text, ts-1, te)
670
+ when 'inhanguljamo'
671
+ self.emit(type, :block_inhangul_jamo, text, ts-1, te)
672
+ when 'inhangulsyllables'
673
+ self.emit(type, :block_inhangul_syllables, text, ts-1, te)
674
+ when 'inhanunoo'
675
+ self.emit(type, :block_inhanunoo, text, ts-1, te)
676
+ when 'inhebrew'
677
+ self.emit(type, :block_inhebrew, text, ts-1, te)
678
+ when 'inhighprivateusesurrogates'
679
+ self.emit(type, :block_inhigh_private_use_surrogates, text, ts-1, te)
680
+ when 'inhighsurrogates'
681
+ self.emit(type, :block_inhigh_surrogates, text, ts-1, te)
682
+ when 'inhiragana'
683
+ self.emit(type, :block_inhiragana, text, ts-1, te)
684
+ when 'inipaextensions'
685
+ self.emit(type, :block_inipa_extensions, text, ts-1, te)
686
+ when 'inideographicdescriptioncharacters'
687
+ self.emit(type, :block_inideographic_description_characters, text, ts-1, te)
688
+ when 'inkanbun'
689
+ self.emit(type, :block_inkanbun, text, ts-1, te)
690
+ when 'inkangxiradicals'
691
+ self.emit(type, :block_inkangxi_radicals, text, ts-1, te)
692
+ when 'inkannada'
693
+ self.emit(type, :block_inkannada, text, ts-1, te)
694
+ when 'inkatakanaphoneticextensions'
695
+ self.emit(type, :block_inkatakana_phonetic_extensions, text, ts-1, te)
696
+ when 'inkatakana'
697
+ self.emit(type, :block_inkatakana, text, ts-1, te)
698
+ when 'inkhmersymbols'
699
+ self.emit(type, :block_inkhmer_symbols, text, ts-1, te)
700
+ when 'inkhmer'
701
+ self.emit(type, :block_inkhmer, text, ts-1, te)
702
+ when 'inlao'
703
+ self.emit(type, :block_inlao, text, ts-1, te)
704
+ when 'inlatin-1supplement'
705
+ self.emit(type, :block_inlatin_1_supplement, text, ts-1, te)
706
+ when 'inlatinextended-a'
707
+ self.emit(type, :block_inlatin_extended_a, text, ts-1, te)
708
+ when 'inlatinextended-b'
709
+ self.emit(type, :block_inlatin_extended_b, text, ts-1, te)
710
+ when 'inlatinextendedadditional'
711
+ self.emit(type, :block_inlatin_extended_additional, text, ts-1, te)
712
+ when 'inletterlikesymbols'
713
+ self.emit(type, :block_inletterlike_symbols, text, ts-1, te)
714
+ when 'inlimbu'
715
+ self.emit(type, :block_inlimbu, text, ts-1, te)
716
+ when 'inlowsurrogates'
717
+ self.emit(type, :block_inlow_surrogates, text, ts-1, te)
718
+ when 'inmalayalam'
719
+ self.emit(type, :block_inmalayalam, text, ts-1, te)
720
+ when 'inmathematicaloperators'
721
+ self.emit(type, :block_inmathematical_operators, text, ts-1, te)
722
+ when 'inmiscellaneousmathematicalsymbols-a'
723
+ self.emit(type, :block_inmiscellaneous_mathematical_symbols_a, text, ts-1, te)
724
+ when 'inmiscellaneousmathematicalsymbols-b'
725
+ self.emit(type, :block_inmiscellaneous_mathematical_symbols_b, text, ts-1, te)
726
+ when 'inmiscellaneoussymbolsandarrows'
727
+ self.emit(type, :block_inmiscellaneous_symbols_and_arrows, text, ts-1, te)
728
+ when 'inmiscellaneoussymbols'
729
+ self.emit(type, :block_inmiscellaneous_symbols, text, ts-1, te)
730
+ when 'inmiscellaneoustechnical'
731
+ self.emit(type, :block_inmiscellaneous_technical, text, ts-1, te)
732
+ when 'inmongolian'
733
+ self.emit(type, :block_inmongolian, text, ts-1, te)
734
+ when 'inmyanmar'
735
+ self.emit(type, :block_inmyanmar, text, ts-1, te)
736
+ when 'innumberforms'
737
+ self.emit(type, :block_innumber_forms, text, ts-1, te)
738
+ when 'inogham'
739
+ self.emit(type, :block_inogham, text, ts-1, te)
740
+ when 'inopticalcharacterrecognition'
741
+ self.emit(type, :block_inoptical_character_recognition, text, ts-1, te)
742
+ when 'inoriya'
743
+ self.emit(type, :block_inoriya, text, ts-1, te)
744
+ when 'inphoneticextensions'
745
+ self.emit(type, :block_inphonetic_extensions, text, ts-1, te)
746
+ when 'inprivateusearea'
747
+ self.emit(type, :block_inprivate_use_area, text, ts-1, te)
748
+ when 'inrunic'
749
+ self.emit(type, :block_inrunic, text, ts-1, te)
750
+ when 'insinhala'
751
+ self.emit(type, :block_insinhala, text, ts-1, te)
752
+ when 'insmallformvariants'
753
+ self.emit(type, :block_insmall_form_variants, text, ts-1, te)
754
+ when 'inspacingmodifierletters'
755
+ self.emit(type, :block_inspacing_modifier_letters, text, ts-1, te)
756
+ when 'inspecials'
757
+ self.emit(type, :block_inspecials, text, ts-1, te)
758
+ when 'insuperscriptsandsubscripts'
759
+ self.emit(type, :block_insuperscripts_and_subscripts, text, ts-1, te)
760
+ when 'insupplementalarrows-a'
761
+ self.emit(type, :block_insupplemental_arrows_a, text, ts-1, te)
762
+ when 'insupplementalarrows-b'
763
+ self.emit(type, :block_insupplemental_arrows_b, text, ts-1, te)
764
+ when 'insupplementalmathematicaloperators'
765
+ self.emit(type, :block_insupplemental_mathematical_operators, text, ts-1, te)
766
+ when 'insyriac'
767
+ self.emit(type, :block_insyriac, text, ts-1, te)
768
+ when 'intagalog'
769
+ self.emit(type, :block_intagalog, text, ts-1, te)
770
+ when 'intagbanwa'
771
+ self.emit(type, :block_intagbanwa, text, ts-1, te)
772
+ when 'intaile'
773
+ self.emit(type, :block_intai_le, text, ts-1, te)
774
+ when 'intamil'
775
+ self.emit(type, :block_intamil, text, ts-1, te)
776
+ when 'intelugu'
777
+ self.emit(type, :block_intelugu, text, ts-1, te)
778
+ when 'inthaana'
779
+ self.emit(type, :block_inthaana, text, ts-1, te)
780
+ when 'inthai'
781
+ self.emit(type, :block_inthai, text, ts-1, te)
782
+ when 'intibetan'
783
+ self.emit(type, :block_intibetan, text, ts-1, te)
784
+ when 'inunifiedcanadianaboriginalsyllabics'
785
+ self.emit(type, :block_inunified_canadian_aboriginal_syllabics, text, ts-1, te)
786
+ when 'invariationselectors'
787
+ self.emit(type, :block_invariation_selectors, text, ts-1, te)
788
+ when 'inyiradicals'
789
+ self.emit(type, :block_inyi_radicals, text, ts-1, te)
790
+ when 'inyisyllables'
791
+ self.emit(type, :block_inyi_syllables, text, ts-1, te)
792
+ when 'inyijinghexagramsymbols'
793
+ self.emit(type, :block_inyijing_hexagram_symbols, text, ts-1, te)
794
+
581
795
  else
582
796
  # Should this really be an error? Or would emitting
583
797
  # an :unknown for the property be better?
@@ -225,7 +225,115 @@ module Regexp::Syntax
225
225
  :script_warang_citi
226
226
  ]
227
227
 
228
- V190 = CharType + POSIX + Category::All + Derived + Script
228
+ UnicodeBlock = [
229
+ :block_inalphabetic_presentation_forms,
230
+ :block_inarabic_presentation_forms_a,
231
+ :block_inarabic_presentation_forms_b,
232
+ :block_inarabic,
233
+ :block_inarmenian,
234
+ :block_inarrows,
235
+ :block_inbasic_latin,
236
+ :block_inbengali,
237
+ :block_inblock_elements,
238
+ :block_inbopomofo_extended,
239
+ :block_inbopomofo,
240
+ :block_inbox_drawing,
241
+ :block_inbraille_patterns,
242
+ :block_inbuhid,
243
+ :block_incjk_compatibility_forms,
244
+ :block_incjk_compatibility_ideographs,
245
+ :block_incjk_compatibility,
246
+ :block_incjk_radicals_supplement,
247
+ :block_incjk_symbols_and_punctuation,
248
+ :block_incjk_unified_ideographs_extension_a,
249
+ :block_incjk_unified_ideographs,
250
+ :block_incherokee,
251
+ :block_incombining_diacritical_marks_for_symbols,
252
+ :block_incombining_diacritical_marks,
253
+ :block_incombining_half_marks,
254
+ :block_incontrol_pictures,
255
+ :block_incurrency_symbols,
256
+ :block_incyrillic_supplementary,
257
+ :block_incyrillic,
258
+ :block_indevanagari,
259
+ :block_indingbats,
260
+ :block_inenclosed_alphanumerics,
261
+ :block_inenclosed_cjk_letters_and_months,
262
+ :block_inethiopic,
263
+ :block_ingeneral_punctuation,
264
+ :block_ingeometric_shapes,
265
+ :block_ingeorgian,
266
+ :block_ingreek_extended,
267
+ :block_ingreek_and_coptic,
268
+ :block_ingujarati,
269
+ :block_ingurmukhi,
270
+ :block_inhalfwidth_and_fullwidth_forms,
271
+ :block_inhangul_compatibility_jamo,
272
+ :block_inhangul_jamo,
273
+ :block_inhangul_syllables,
274
+ :block_inhanunoo,
275
+ :block_inhebrew,
276
+ :block_inhigh_private_use_surrogates,
277
+ :block_inhigh_surrogates,
278
+ :block_inhiragana,
279
+ :block_inipa_extensions,
280
+ :block_inideographic_description_characters,
281
+ :block_inkanbun,
282
+ :block_inkangxi_radicals,
283
+ :block_inkannada,
284
+ :block_inkatakana_phonetic_extensions,
285
+ :block_inkatakana,
286
+ :block_inkhmer_symbols,
287
+ :block_inkhmer,
288
+ :block_inlao,
289
+ :block_inlatin_1_supplement,
290
+ :block_inlatin_extended_a,
291
+ :block_inlatin_extended_b,
292
+ :block_inlatin_extended_additional,
293
+ :block_inletterlike_symbols,
294
+ :block_inlimbu,
295
+ :block_inlow_surrogates,
296
+ :block_inmalayalam,
297
+ :block_inmathematical_operators,
298
+ :block_inmiscellaneous_mathematical_symbols_a,
299
+ :block_inmiscellaneous_mathematical_symbols_b,
300
+ :block_inmiscellaneous_symbols_and_arrows,
301
+ :block_inmiscellaneous_symbols,
302
+ :block_inmiscellaneous_technical,
303
+ :block_inmongolian,
304
+ :block_inmyanmar,
305
+ :block_innumber_forms,
306
+ :block_inogham,
307
+ :block_inoptical_character_recognition,
308
+ :block_inoriya,
309
+ :block_inphonetic_extensions,
310
+ :block_inprivate_use_area,
311
+ :block_inrunic,
312
+ :block_insinhala,
313
+ :block_insmall_form_variants,
314
+ :block_inspacing_modifier_letters,
315
+ :block_inspecials,
316
+ :block_insuperscripts_and_subscripts,
317
+ :block_insupplemental_arrows_a,
318
+ :block_insupplemental_arrows_b,
319
+ :block_insupplemental_mathematical_operators,
320
+ :block_insyriac,
321
+ :block_intagalog,
322
+ :block_intagbanwa,
323
+ :block_intai_le,
324
+ :block_intamil,
325
+ :block_intelugu,
326
+ :block_inthaana,
327
+ :block_inthai,
328
+ :block_intibetan,
329
+ :block_inunified_canadian_aboriginal_syllabics,
330
+ :block_invariation_selectors,
331
+ :block_inyi_radicals,
332
+ :block_inyi_syllables,
333
+ :block_inyijing_hexagram_symbols,
334
+ ]
335
+
336
+ V190 = CharType + POSIX + Category::All + Derived + Script + UnicodeBlock
229
337
  V193 = Age_V193 + Script_6_0
230
338
 
231
339
  V200 = Age_V200
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  module Parser
3
- VERSION = '0.2.1'
3
+ VERSION = '0.3.0'
4
4
  end
5
5
  end
@@ -337,6 +337,13 @@ class ParserProperties < Test::Unit::TestCase
337
337
  "Expected Script property, but got #{t.expressions[1].class.name}")
338
338
  end
339
339
 
340
+ def test_parse_property_block
341
+ t = RP.parse 'ab\p{InArmenian}cd', 'ruby/1.9'
342
+
343
+ assert( t.expressions[1].is_a?(UnicodeProperty::Block),
344
+ "Expected Block property, but got #{t.expressions[1].class.name}")
345
+ end
346
+
340
347
  def test_parse_property_following_literal
341
348
  t = RP.parse 'ab\p{Lu}cd', 'ruby/1.9'
342
349
 
@@ -2,7 +2,7 @@ require File.expand_path("../../helpers", __FILE__)
2
2
 
3
3
  %w{
4
4
  anchors errors escapes free_space groups literals
5
- meta properties quantifiers scripts sets types
5
+ meta properties quantifiers scripts sets types unicode_blocks
6
6
  }.each do|tc|
7
7
  require File.expand_path("../test_#{tc}", __FILE__)
8
8
  end
@@ -0,0 +1,130 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ScannerUnicodeBlocks < Test::Unit::TestCase
4
+
5
+ tests = {
6
+ 'InAlphabetic_Presentation_Forms' => :block_inalphabetic_presentation_forms,
7
+ 'InArabic_Presentation_Forms-A' => :block_inarabic_presentation_forms_a,
8
+ 'InArabic_Presentation_Forms-B' => :block_inarabic_presentation_forms_b,
9
+ 'InArabic' => :block_inarabic,
10
+ 'InArmenian' => :block_inarmenian,
11
+ 'InArrows' => :block_inarrows,
12
+ 'InBasic_Latin' => :block_inbasic_latin,
13
+ 'InBengali' => :block_inbengali,
14
+ 'InBlock_Elements' => :block_inblock_elements,
15
+ 'InBopomofo_Extended' => :block_inbopomofo_extended,
16
+ 'InBopomofo' => :block_inbopomofo,
17
+ 'InBox_Drawing' => :block_inbox_drawing,
18
+ 'InBraille_Patterns' => :block_inbraille_patterns,
19
+ 'InBuhid' => :block_inbuhid,
20
+ 'InCJK_Compatibility_Forms' => :block_incjk_compatibility_forms,
21
+ 'InCJK_Compatibility_Ideographs' => :block_incjk_compatibility_ideographs,
22
+ 'InCJK_Compatibility' => :block_incjk_compatibility,
23
+ 'InCJK_Radicals_Supplement' => :block_incjk_radicals_supplement,
24
+ 'InCJK_Symbols_and_Punctuation' => :block_incjk_symbols_and_punctuation,
25
+ 'InCJK_Unified_Ideographs_Extension_A' => :block_incjk_unified_ideographs_extension_a,
26
+ 'InCJK_Unified_Ideographs' => :block_incjk_unified_ideographs,
27
+ 'InCherokee' => :block_incherokee,
28
+ 'InCombining_Diacritical_Marks_for_Symbols' => :block_incombining_diacritical_marks_for_symbols,
29
+ 'InCombining_Diacritical_Marks' => :block_incombining_diacritical_marks,
30
+ 'InCombining_Half_Marks' => :block_incombining_half_marks,
31
+ 'InControl_Pictures' => :block_incontrol_pictures,
32
+ 'InCurrency_Symbols' => :block_incurrency_symbols,
33
+ 'InCyrillic_Supplementary' => :block_incyrillic_supplementary,
34
+ 'InCyrillic' => :block_incyrillic,
35
+ 'InDevanagari' => :block_indevanagari,
36
+ 'InDingbats' => :block_indingbats,
37
+ 'InEnclosed_Alphanumerics' => :block_inenclosed_alphanumerics,
38
+ 'InEnclosed_CJK_Letters_and_Months' => :block_inenclosed_cjk_letters_and_months,
39
+ 'InEthiopic' => :block_inethiopic,
40
+ 'InGeneral_Punctuation' => :block_ingeneral_punctuation,
41
+ 'InGeometric_Shapes' => :block_ingeometric_shapes,
42
+ 'InGeorgian' => :block_ingeorgian,
43
+ 'InGreek_Extended' => :block_ingreek_extended,
44
+ 'InGreek_and_Coptic' => :block_ingreek_and_coptic,
45
+ 'InGujarati' => :block_ingujarati,
46
+ 'InGurmukhi' => :block_ingurmukhi,
47
+ 'InHalfwidth_and_Fullwidth_Forms' => :block_inhalfwidth_and_fullwidth_forms,
48
+ 'InHangul_Compatibility_Jamo' => :block_inhangul_compatibility_jamo,
49
+ 'InHangul_Jamo' => :block_inhangul_jamo,
50
+ 'InHangul_Syllables' => :block_inhangul_syllables,
51
+ 'InHanunoo' => :block_inhanunoo,
52
+ 'InHebrew' => :block_inhebrew,
53
+ 'InHigh_Private_Use_Surrogates' => :block_inhigh_private_use_surrogates,
54
+ 'InHigh_Surrogates' => :block_inhigh_surrogates,
55
+ 'InHiragana' => :block_inhiragana,
56
+ 'InIPA_Extensions' => :block_inipa_extensions,
57
+ 'InIdeographic_Description_Characters' => :block_inideographic_description_characters,
58
+ 'InKanbun' => :block_inkanbun,
59
+ 'InKangxi_Radicals' => :block_inkangxi_radicals,
60
+ 'InKannada' => :block_inkannada,
61
+ 'InKatakana_Phonetic_Extensions' => :block_inkatakana_phonetic_extensions,
62
+ 'InKatakana' => :block_inkatakana,
63
+ 'InKhmer_Symbols' => :block_inkhmer_symbols,
64
+ 'InKhmer' => :block_inkhmer,
65
+ 'InLao' => :block_inlao,
66
+ 'InLatin-1_Supplement' => :block_inlatin_1_supplement,
67
+ 'InLatin_Extended-A' => :block_inlatin_extended_a,
68
+ 'InLatin_Extended-B' => :block_inlatin_extended_b,
69
+ 'InLatin_Extended_Additional' => :block_inlatin_extended_additional,
70
+ 'InLetterlike_Symbols' => :block_inletterlike_symbols,
71
+ 'InLimbu' => :block_inlimbu,
72
+ 'InLow_Surrogates' => :block_inlow_surrogates,
73
+ 'InMalayalam' => :block_inmalayalam,
74
+ 'InMathematical_Operators' => :block_inmathematical_operators,
75
+ 'InMiscellaneous_Mathematical_Symbols-A' => :block_inmiscellaneous_mathematical_symbols_a,
76
+ 'InMiscellaneous_Mathematical_Symbols-B' => :block_inmiscellaneous_mathematical_symbols_b,
77
+ 'InMiscellaneous_Symbols_and_Arrows' => :block_inmiscellaneous_symbols_and_arrows,
78
+ 'InMiscellaneous_Symbols' => :block_inmiscellaneous_symbols,
79
+ 'InMiscellaneous_Technical' => :block_inmiscellaneous_technical,
80
+ 'InMongolian' => :block_inmongolian,
81
+ 'InMyanmar' => :block_inmyanmar,
82
+ 'InNumber_Forms' => :block_innumber_forms,
83
+ 'InOgham' => :block_inogham,
84
+ 'InOptical_Character_Recognition' => :block_inoptical_character_recognition,
85
+ 'InOriya' => :block_inoriya,
86
+ 'InPhonetic_Extensions' => :block_inphonetic_extensions,
87
+ 'InPrivate_Use_Area' => :block_inprivate_use_area,
88
+ 'InRunic' => :block_inrunic,
89
+ 'InSinhala' => :block_insinhala,
90
+ 'InSmall_Form_Variants' => :block_insmall_form_variants,
91
+ 'InSpacing_Modifier_Letters' => :block_inspacing_modifier_letters,
92
+ 'InSpecials' => :block_inspecials,
93
+ 'InSuperscripts_and_Subscripts' => :block_insuperscripts_and_subscripts,
94
+ 'InSupplemental_Arrows-A' => :block_insupplemental_arrows_a,
95
+ 'InSupplemental_Arrows-B' => :block_insupplemental_arrows_b,
96
+ 'InSupplemental_Mathematical_Operators' => :block_insupplemental_mathematical_operators,
97
+ 'InSyriac' => :block_insyriac,
98
+ 'InTagalog' => :block_intagalog,
99
+ 'InTagbanwa' => :block_intagbanwa,
100
+ 'InTai_Le' => :block_intai_le,
101
+ 'InTamil' => :block_intamil,
102
+ 'InTelugu' => :block_intelugu,
103
+ 'InThaana' => :block_inthaana,
104
+ 'InThai' => :block_inthai,
105
+ 'InTibetan' => :block_intibetan,
106
+ 'InUnified_Canadian_Aboriginal_Syllabics' => :block_inunified_canadian_aboriginal_syllabics,
107
+ 'InVariation_Selectors' => :block_invariation_selectors,
108
+ 'InYi_Radicals' => :block_inyi_radicals,
109
+ 'InYi_Syllables' => :block_inyi_syllables,
110
+ 'InYijing_Hexagram_Symbols' => :block_inyijing_hexagram_symbols
111
+ }
112
+
113
+ count = 0
114
+ tests.each do |property, test|
115
+ define_method "test_scan_property_#{test}_#{count+=1}" do
116
+ token = RS.scan("a\\p{#{property}}c")[1]
117
+
118
+ assert_equal( :property, token[0] )
119
+ assert_equal( test, token[1] )
120
+ end
121
+
122
+ define_method "test_scan_nonproperty_#{test}_#{count+=1}" do
123
+ token = RS.scan("a\\P{#{property}}c")[1]
124
+
125
+ assert_equal( :nonproperty, token[0] )
126
+ assert_equal( test, token[1] )
127
+ end
128
+ end
129
+
130
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-18 00:00:00.000000000 Z
11
+ date: 2015-08-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
14
  email:
@@ -133,6 +133,7 @@ files:
133
133
  - test/scanner/test_scripts.rb
134
134
  - test/scanner/test_sets.rb
135
135
  - test/scanner/test_types.rb
136
+ - test/scanner/test_unicode_blocks.rb
136
137
  - test/syntax/ruby/test_1.8.rb
137
138
  - test/syntax/ruby/test_1.9.1.rb
138
139
  - test/syntax/ruby/test_1.9.3.rb
@@ -220,6 +221,7 @@ test_files:
220
221
  - test/scanner/test_scripts.rb
221
222
  - test/scanner/test_sets.rb
222
223
  - test/scanner/test_types.rb
224
+ - test/scanner/test_unicode_blocks.rb
223
225
  - test/syntax/ruby/test_1.8.rb
224
226
  - test/syntax/ruby/test_1.9.1.rb
225
227
  - test/syntax/ruby/test_1.9.3.rb