tinycus 1.0.9 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/tinycus.rb +181 -183
  3. metadata +4 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f72c3f525833a5d89fb848a6420279f4844dc415ceca2cfc7175386f3e2dd937
4
- data.tar.gz: aecc63197fc8b456d72d5fad485abb9aa196dcfae4cc12b518dcaaf73c6ab180
3
+ metadata.gz: 6c1eec898c95abe9a16e91e709d5727d320c7a797764e34687092225eb3ca19a
4
+ data.tar.gz: ded9cefb4a86639b89fdf0c39ab0751892b6722b9f4a2920450571a209eb5cbb
5
5
  SHA512:
6
- metadata.gz: b144d74a8fae7aa860d5eb5f8edaf70fa54af93a08c6a1859c137a16095db0c7d906edc7c9fd121617d1cf312fa948fd4f42c1730b7014e8cb603ec774477411
7
- data.tar.gz: 440fda68bf56748e308f59bf660366ec2e2439b6f09540a8c4aa772b982083442ba2fe42c6b73ba2d71968903b17320ea62dcaeb18471aeb47c1d63f72dab6a8
6
+ metadata.gz: 44a433108259fd71617245eb974043df5f58753fd565373108b2e5cc23ae8b3ebade1853ab1fc4ce021fe05ee0b3f94e514b949bc5f04cfe587c2c804b83119d
7
+ data.tar.gz: 67edff394d6cea2d25e098d6b86a42cfe9b794342605873c71abb7c61696d7f71a693cd5044e131045a57568201aac7544961002203efaa6996c0aa0216922f3
data/tinycus.rb CHANGED
@@ -26,7 +26,7 @@ The four "alpha_" functions work on Greek and English, also most Latin character
26
26
  end
27
27
 
28
28
  def Tinycus.alpha_collation() # :nodoc:
29
- return Tinycus::Tr.get_greek_collation_tr
29
+ return Tinycus.get_tr('greek_to_collation_form')
30
30
  end
31
31
 
32
32
  # synonym of Tinycus.contains_vowel, for readability when using it on a single character
@@ -212,25 +212,11 @@ The four "alpha_" functions work on Greek and English, also most Latin character
212
212
  forms = forms+forms.map { |x| x.capitalize }
213
213
  return forms.uniq
214
214
  end
215
+
215
216
  end
216
217
 
217
218
  class Tinycus::Tr
218
219
 
219
- @@prep_remove_acute_and_grave_from_greek = nil
220
- @@prep_remove_circumflex_from_greek = nil
221
- @@prep_add_circumflex_to_greek = nil
222
- @@prep_remove_grave_from_greek = nil
223
- @@prep_remove_acute_from_greek = nil
224
- @@prep_add_grave_to_greek = nil
225
- @@prep_add_acute_to_greek = nil
226
- @@prep_remove_diar_from_greek = nil
227
- @@prep_remove_breathing_from_greek = nil
228
- @@prep_add_diar_to_greek = nil
229
- @@prep_remove_accents_from_greek = nil
230
- @@greek_grave_to_acute = nil
231
- @@greek_acute_to_grave = nil
232
- @@prep_greek_to_collation_form = nil
233
-
234
220
  # Initialize a data structure that represents an action equivalent to String#tr(a,b), but faster.
235
221
  # Including redundant characters or unchanged characters is harmless and is fixed in this constructor; it does not cause
236
222
  # any performance hit when the object is actually used. The initializer takes linear time and memory in the size of
@@ -289,160 +275,27 @@ The four "alpha_" functions work on Greek and English, also most Latin character
289
275
  end
290
276
  end
291
277
 
292
- def Tr.greek_grave_to_acute(s,n:false)
293
- if @@greek_grave_to_acute.nil? then
294
- @@greek_grave_to_acute = Tinycus::Tr.new(
295
- "ÀÈÌÒÙàèìòùἂἃἊἒἓἢἣἫἲἳὂὃὊὋὓὢὣὫὰὲὴὶὸὺὼῒῢῸῂ","ÁÉÍÓÚáéíóúἄἅἌἔἕἤἥἭἴἵὄὅὌὍὕὤὥὭάέήίόύώΐΰΌῄ"
296
- )
297
- end
298
- if n then s=s.unicode_normalize(:nfc) end
299
- return @@greek_grave_to_acute.apply(s)
300
- end
301
-
302
- def Tr.greek_acute_to_grave(s,n:false)
303
- if @@greek_acute_to_grave.nil? then
304
- @@greek_acute_to_grave = Tinycus::Tr.new(
305
- "ÁÉÍÓÚáéíóúἄἅἌἔἕἤἥἭἴἵὄὅὌὍὕὤὥὭάέήίόύώΐΰΌ","ÀÈÌÒÙàèìòùἂἃἊἒἓἢἣἫἲἳὂὃὊὋὓὢὣὫὰὲὴὶὸὺὼῒῢῸ"
306
- )
307
- end
308
- if n then s=s.unicode_normalize(:nfc) end
309
- return @@greek_acute_to_grave.apply(s)
310
- end
311
-
312
- def Tr.remove_tonal_accents_from_greek(s,n:false)
313
- # to do: make this more efficient by creating a single-pass tr
314
- return Tr.remove_circumflex_from_greek(Tr.remove_acute_and_grave_from_greek(s,n:n))
315
- end
316
-
317
- def Tr.remove_acute_and_grave_from_greek(s,n:false)
318
- if @@prep_remove_acute_and_grave_from_greek.nil? then
319
- @@prep_remove_acute_and_grave_from_greek =
320
- Tinycus::Tr.new(
321
- "ÀÁàáÈÉèéÌÍìíÒÓòóÙÚùúÝýΆάἂἃἄἅἊἌἍὰᾴΈέἒἓἔἕἜἝὲήἢἣἤἥἫἬἭὴᾓᾔᾕῂῄΊΐίἲἳἴἵἼἽὶῒΌόὂὃὄὅὊὋὌὍὸῸΰύὓὔὕὝὺῢώὢὣὤὥὫὬὭὼᾤῴᾍ",
322
- "AAaaEEeeIIiiOOooUUuuYyΑαἀἁἀἁἈἈἉαᾳΕεἐἑἐἑἘἙεηἠἡἠἡἩἨἩηᾑᾐᾑῃῃΙϊιἰἱἰἱἸἹιϊΟοὀὁὀὁὈὉὈὉοΟϋυὑὐὑὙυϋωὠὡὠὡὩὨὩωᾠῳᾉ"
323
- )
324
- end
325
- if n then s=s.unicode_normalize(:nfc) end
326
- return @@prep_remove_acute_and_grave_from_greek.apply(s)
327
- end
328
-
329
- def Tr.remove_circumflex_from_greek(s,n:false)
330
- if @@prep_remove_circumflex_from_greek.nil? then
331
- @@prep_remove_circumflex_from_greek =
332
- Tinycus::Tr.new(
333
- "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿΆΈΊΌΐάέήίΰϊϋόύώỏἀἁἂἃἄἅἆἈἉἊἌἍἎἐἑἒἓἔἕἘἙἜἝἠἡἢἣἤἥἦἧἨἩἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἼἽἾὀὁὂὃὄὅὈὉὊὋὌὍὐὑὓὔὕὖὗὙὝὠὡὢὣὤὥὦὧὨὩὫὬὭὮὯὰὲὴὶὸὺὼᾐᾑᾓᾔᾕᾖᾗᾠᾤᾦᾧᾰᾱᾳᾴᾶᾷᾸᾹῂῃῄῆῇῐῑῒῖῗῘῙῠῡῢῥῦῨῩῬῳῴῶῷῸ",
334
- "ÀÁAAÄÅÆÇÈÉEËÌÍIÏNÒÓOOÖØÙÚUÜÝàáaaäåæçèéeëìíiïnòóooöøùúuüýÿΆΈΊΌΐάέήίΰϊϋόύώỏἀἁἂἃἄἅἀἈἉἊἌἍἈἐἑἒἓἔἕἘἙἜἝἠἡἢἣἤἥἠἡἨἩἫἬἭἨἩἰἱἲἳἴἵἰἱἸἹἼἽἸὀὁὂὃὄὅὈὉὊὋὌὍὐὑὓὔὕὐὑὙὝὠὡὢὣὤὥὠὡὨὩὫὬὭὨὩὰὲὴὶὸὺὼᾐᾑᾓᾔᾕᾐᾑᾠᾤᾠᾡᾰᾱᾳᾴαᾳᾸᾹῂῃῄηῃῐῑῒιϊῘῙῠῡῢῥυῨῩῬῳῴωῳῸ"
335
- )
336
- end
337
- if n then s=s.unicode_normalize(:nfc) end
338
- return @@prep_remove_circumflex_from_greek.apply(s)
339
- end
340
-
341
- def Tr.add_circumflex_to_greek(s,n:false)
342
- if @@prep_add_circumflex_to_greek.nil? then
343
- @@prep_add_circumflex_to_greek =
344
- Tinycus::Tr.new(
345
- "AAEINOOUaaeinoouἀἈἠἡἨἩἰἱἸὐὑὠὡὨὩᾐᾑᾠᾡαᾳηῃιϊυωῳ",
346
- "ÂÃÊÎÑÔÕÛâãêîñôõûἆἎἦἧἮἯἶἷἾὖὗὦὧὮὯᾖᾗᾦᾧᾶᾷῆῇῖῗῦῶῷ"
347
- )
348
- end
349
- if n then s=s.unicode_normalize(:nfc) end
350
- return @@prep_add_circumflex_to_greek.apply(s)
351
- end
278
+ def Tr.add_acute_to_greek(s,n:false) return Tinycus.get_tr('add_acute_to_greek').apply(s,n:n) end
279
+ def Tr.add_circumflex_to_greek(s,n:false) return Tinycus.get_tr('add_circumflex_to_greek').apply(s,n:n) end
280
+ def Tr.add_grave_to_greek(s,n:false) return Tinycus.get_tr('add_grave_to_greek').apply(s,n:n) end
352
281
 
353
- def Tr.remove_acute_from_greek(s,n:false)
354
- if @@prep_remove_acute_from_greek.nil? then
355
- @@prep_remove_acute_from_greek =
356
- Tinycus::Tr.new(
357
- "ÁÉÍÓÚáéíóúἄἅἌἔἕἤἥἭἴἵὄὅὌὍὕὤὥὭάέήίόύώῄῴΐΰΌ",
358
- "AEIOUaeiouἀἁἈἐἑἠἡἩἰἱὀὁὈὉὑὠὡὩαεηιουωῃῳϊϋΟ"
359
- )
360
- end
361
- if n then s=s.unicode_normalize(:nfc) end
362
- return @@prep_remove_acute_from_greek.apply(s)
363
- end
282
+ def Tr.remove_acute_from_greek(s,n:false) return Tinycus.get_tr('remove_acute_from_greek').apply(s,n:n) end
283
+ def Tr.remove_grave_from_greek(s,n:false) return Tinycus.get_tr('remove_grave_from_greek').apply(s,n:n) end
284
+ def Tr.remove_circumflex_from_greek(s,n:false) return Tinycus.get_tr('remove_circumflex_from_greek').apply(s,n:n) end
285
+ def Tr.remove_tonal_accents_from_greek(s,n:false) return Tinycus.get_tr('remove_tonal_accents_from_greek').apply(s,n:n) end
286
+ def Tr.remove_acute_and_grave_from_greek(s,n:false) return Tinycus.get_tr('remove_acute_and_grave_from_greek').apply(s,n:n) end
364
287
 
365
- def Tr.remove_grave_from_greek(s,n:false)
366
- if @@prep_remove_grave_from_greek.nil? then
367
- @@prep_remove_grave_from_greek =
368
- Tinycus::Tr.new(
369
- "ÀÈÌÒÙàèìòùἂἃἊἒἓἢἣἫἲἳὂὃὊὋὓὢὣὫὰὲὴὶὸὺὼῂῒῢῸ",
370
- "AEIOUaeiouἀἁἈἐἑἠἡἩἰἱὀὁὈὉὑὠὡὩαεηιουωῃϊϋΟ"
371
- )
372
- end
373
- if n then s=s.unicode_normalize(:nfc) end
374
- return @@prep_remove_grave_from_greek.apply(s)
375
- end
288
+ def Tr.greek_grave_to_acute(s,n:false) return Tinycus.get_tr('greek_grave_to_acute').apply(s,n:n) end
289
+ def Tr.greek_acute_to_grave(s,n:false) return Tinycus.get_tr('greek_acute_to_grave').apply(s,n:n) end
376
290
 
377
- def Tr.add_grave_to_greek(s,n:false)
378
- if @@prep_add_grave_to_greek.nil? then
379
- @@prep_add_grave_to_greek =
380
- Tinycus::Tr.new(
381
- "AEIOUaeiouἀἁἈἐἑἠἡἩἰἱὀὁὈὉὑὠὡὩαεηιουωῃϊϋΟ",
382
- "ÀÈÌÒÙàèìòùἂἃἊἒἓἢἣἫἲἳὂὃὊὋὓὢὣὫὰὲὴὶὸὺὼῂῒῢῸ"
383
- )
384
- end
385
- if n then s=s.unicode_normalize(:nfc) end
386
- return @@prep_add_grave_to_greek.apply(s)
387
- end
291
+ def Tr.add_diar_to_greek(s,n:false) return Tinycus.get_tr('add_diar_to_greek').apply(s,n:n) end
292
+ def Tr.remove_diar_from_greek(s,n:false) return Tinycus.get_tr('remove_diar_from_greek').apply(s,n:n) end
388
293
 
389
- def Tr.add_acute_to_greek(s,n:false)
390
- if @@prep_add_acute_to_greek.nil? then
391
- @@prep_add_acute_to_greek =
392
- Tinycus::Tr.new(
393
- "AEIOUaeiouἀἁἈἐἑἠἡἩἰἱὀὁὈὉὑὠὡὩαεηιουωῃῳϊϋΟ",
394
- "ÁÉÍÓÚáéíóúἄἅἌἔἕἤἥἭἴἵὄὅὌὍὕὤὥὭάέήίόύώῄῴΐΰΌ"
395
- )
396
- end
397
- if n then s=s.unicode_normalize(:nfc) end
398
- return @@prep_add_acute_to_greek.apply(s)
399
- end
294
+ def Tr.remove_breathing_from_greek(s,n:false) return Tinycus.get_tr('remove_breathing_from_greek').apply(s,n:n) end
400
295
 
401
- def Tr.remove_diar_from_greek(s,n:false)
402
- # Can't do caps with diaresis, since they only exist as combining characters.
403
- if @@prep_remove_diar_from_greek.nil? then
404
- @@prep_remove_diar_from_greek =
405
- Tinycus::Tr.new(
406
- "ϊΐῒῗϋΰῢ",
407
- "ιίὶῖυύὺ"
408
- )
409
- end
410
- if n then s=s.unicode_normalize(:nfc) end
411
- return @@prep_remove_diar_from_greek.apply(s)
412
- end
296
+ def Tr.remove_accents_from_greek(s,n:false) return Tinycus.get_tr('remove_accents_from_greek').apply(s,n:n) end
297
+ def Tr.greek_to_collation_form(s,n:false) return Tinycus.get_tr('greek_to_collation_form').apply(s,n:n) end
413
298
 
414
- def Tr.add_diar_to_greek(s,n:false)
415
- # Can't do caps with diaresis, since they only exist as combining characters.
416
- if @@prep_add_diar_to_greek.nil? then
417
- @@prep_add_diar_to_greek =
418
- Tinycus::Tr.new(
419
- "ιίὶῖυύὺ",
420
- "ϊΐῒῗϋΰῢ"
421
- )
422
- end
423
- if n then s=s.unicode_normalize(:nfc) end
424
- return @@prep_add_diar_to_greek.apply(s)
425
- end
426
-
427
- def Tr.remove_breathing_from_greek(s,n:false)
428
- if @@prep_remove_breathing_from_greek.nil? then
429
- @@prep_remove_breathing_from_greek =
430
- Tinycus::Tr.new(
431
- "ἄἌἈἈἀἈἁἉἂἊἅἍἆἉἉἃἋᾇἔἜἑἙἐἘἕἝἘἘἙἙἓἛἒἚἣἫἡἩἠἨἦἥἭἢἪἤἬᾔἬΙἧᾗἨἨᾕἭΙἩἩᾐἨΙᾖᾑἩΙἰἸἱἹἴἼἶἳἻἵἽἷἸἸἲἺἹἹὃὋὄὌὀὈὈὈὅὍὁὉὉὉῥῬῬῬῤὐὗὕὝὑὙὖὔὓὛὙὙὥὭὣὫὤὬᾤὬΙὠὨὦᾧὡὩὧᾠὨΙὨὨὢὪᾦὩὩ",
432
- "άΆΑΑαΑαΑὰᾺάΆᾶΑΑὰᾺᾷέΈεΕεΕέΈΕΕΕΕὲῈὲῈὴῊηΗηΗῆήΉὴῊήΉῄΉΙῆῇΗΗῄΉΙΗΗῃΗΙῇῃΗΙιΙιΙίΊῖὶῚίΊῖΙΙὶῚΙΙὸῸόΌοΟΟΟόΌοΟΟΟρΡΡΡρυῦύΎυΥῦύὺῪΥΥώΏὼῺώΏῴΏΙωΩῶῷωΩῶῳΩΙΩΩὼῺῷΩΩ"
433
- )
434
- end
435
- if n then s=s.unicode_normalize(:nfc) end
436
- return @@prep_remove_breathing_from_greek.apply(s)
437
- end
438
-
439
- def Tr.remove_accents_from_greek(s,n:false)
440
- if @@prep_remove_accents_from_greek.nil? then
441
- @@prep_remove_accents_from_greek = Tr.remove_accents('el')
442
- end
443
- if n then s=s.unicode_normalize(:nfc) end
444
- return @@prep_remove_accents_from_greek.apply(s)
445
- end
446
299
 
447
300
  # Slow.
448
301
  def Tr.remove_macrons_and_breves(s)
@@ -544,21 +397,6 @@ print a.join('|'),"--\n"
544
397
  =end
545
398
  end
546
399
 
547
- def Tr.get_greek_collation_tr # :nodoc:
548
- if @@prep_greek_to_collation_form.nil? then
549
- @@prep_greek_to_collation_form = Tr.collation_form('el')
550
- end
551
- return @@prep_greek_to_collation_form
552
- end
553
-
554
- def Tr.greek_to_collation_form(s,n:false)
555
- if @@prep_greek_to_collation_form.nil? then
556
- @@prep_greek_to_collation_form = Tr.collation_form('el')
557
- end
558
- if n then s=s.unicode_normalize(:nfc) end
559
- return @@prep_greek_to_collation_form.apply(s)
560
- end
561
-
562
400
  # Returns a Tinycus::Tr object which can then be used to act on strings using the apply() method.
563
401
  # The 'el' locale is a standard thing that software like ICU uses for polytonic Greek. The object constructed with this
564
402
  # locale will also remove most accents and macrons from Latin characters, but will miss some cases like Czech, and will not handle Cyrillic.
@@ -884,6 +722,8 @@ def Util.canonicalize_greek_word(w,n:false)
884
722
  # Works on a single word, not an entire string.
885
723
  # Is designed so that calling it on a Latin word is fast and harmless.
886
724
  # The n argument has the same definition as in Tinycus::Tr.remove_accents_from_greek().
725
+ # To do this on an IfMows object, use IfMows#canonicalize. Converting from string to IfMows, canonicalizing, and then going back to string
726
+ # is about 10 times slower than this function.
887
727
  if !looks_greek(w) then return w end # is fast on Latin script
888
728
  if n then w = w.unicode_normalize(:nfc) end
889
729
  w = to_single_accent(w)
@@ -1073,6 +913,71 @@ JSON
1073
913
  return b
1074
914
  end
1075
915
 
916
+ #------------------------------------------------------------------------------------------------
917
+ # Build tr table the first time the relevant function is called.
918
+ #------------------------------------------------------------------------------------------------
919
+ @@greek_tr_tables = {}
920
+ def Tinycus.get_tr(function_name) # :nodoc:
921
+ if @@greek_tr_tables.has_key?(function_name) then return @@greek_tr_tables[function_name] end
922
+ operations = {
923
+ 'add_acute_to_greek'=> {'add_tonal'=>'acute'},
924
+ 'add_circumflex_to_greek'=> {'add_tonal'=>'circumflex'},
925
+ 'add_grave_to_greek'=> {'add_tonal'=>'grave'},
926
+
927
+ 'remove_acute_from_greek'=> {'remove_tonal'=>['acute']},
928
+ 'remove_circumflex_from_greek'=> {'remove_tonal'=>['circumflex']},
929
+ 'remove_grave_from_greek'=> {'remove_tonal'=>['grave']},
930
+ 'remove_tonal_accents_from_greek'=> {'remove_tonal'=>['acute','circumflex','grave']},
931
+ 'remove_acute_and_grave_from_greek'=>{'remove_tonal'=>['acute','grave']},
932
+
933
+ 'greek_acute_to_grave'=> {'change_tonal'=>['acute','grave']},
934
+ 'greek_grave_to_acute'=> {'change_tonal'=>['grave','acute']},
935
+
936
+ # Can't do caps with diaresis, since they only exist as combining characters.
937
+ 'add_diar_to_greek'=> {'add_boolean'=>['diar'],'lc_only'=>true},
938
+ 'remove_diar_from_greek'=> {'remove_boolean'=>['diar'],'lc_only'=>true},
939
+
940
+ 'remove_breathing_from_greek'=> {'set_string'=>['breathing','none']},
941
+
942
+ 'remove_accents_from_greek'=> {'set_string'=>['breathing','none'],
943
+ 'remove_boolean'=>['diar','iota_subscript'],
944
+ 'remove_tonal'=>['acute','circumflex','grave']},
945
+ 'greek_to_collation_form'=> {'set_string'=>['breathing','none'],
946
+ 'remove_boolean'=>['diar','iota_subscript','uppercase'],
947
+ 'remove_tonal'=>['acute','circumflex','grave']},
948
+ }[function_name]
949
+ if operations.nil? then raise "undefined tr function #{function_name}" end
950
+ a = ''
951
+ b = ''
952
+ Tinycus.all_greek_characters.chars.each { |c|
953
+ next if operations['lc_only'] && c!=c.downcase
954
+ plain,d = Tinycus.disassemble_greek_char(c)
955
+ if operations.has_key?('add_tonal') then d['tonal']=operations['add_tonal'] end
956
+ if operations.has_key?('remove_tonal') then
957
+ operations['remove_tonal'].each { |acc| if d['tonal']==acc then d['tonal']='none' end }
958
+ end
959
+ if operations.has_key?('change_tonal') then
960
+ from,to = operations['change_tonal']
961
+ if d['tonal']==from then d['tonal']=to end
962
+ end
963
+ if operations.has_key?('add_boolean') then
964
+ operations['add_boolean'].each { |x|
965
+ unless c!=c.downcase && x=='diar' then d[x]=true end # Can't do caps with diaresis, since they only exist as combining characters.
966
+ }
967
+ end
968
+ if operations.has_key?('remove_boolean') then operations['remove_boolean'].each { |x| d[x]=false } end
969
+ if operations.has_key?('set_string') then x,y=operations['set_string']; d[x]=y end
970
+ c2 = Tinycus.assemble_greek_char(plain,d)
971
+ next if c2.nil? # happens for stuff like uppercase alpha with smooth breathing and acute
972
+ # if c2.nil? then raise "unable to assemble #{plain} #{d}, should have had key present as #{plain}#{Tinycus.assemble_greek_char_d_to_hex(plain,d)}" end
973
+ next if c2==c
974
+ a += c
975
+ b += c2
976
+ }
977
+ @@greek_tr_tables[function_name] = Tinycus::Tr.new(a,b)
978
+ return @@greek_tr_tables[function_name]
979
+ end
980
+
1076
981
  # Returns [plain,d], where plain is a lowercase, unaccented Greek letter (α-ω, plus ς), and d is
1077
982
  # a hash with the following keys:
1078
983
  # uppercase, diar, iota_subscript - boolean values
@@ -1100,8 +1005,7 @@ JSON
1100
1005
  return [plain,d]
1101
1006
  end
1102
1007
 
1103
- # The inverse of Tinycus.disassemble_greek_char.
1104
- def Tinycus.assemble_greek_char(plain,d)
1008
+ def Tinycus.assemble_greek_char_d_to_hex(plain,d)
1105
1009
  b = 0
1106
1010
  b |= 0b1 if d['uppercase']
1107
1011
  b |= 0b10 if d['diar']
@@ -1111,7 +1015,12 @@ JSON
1111
1015
  b |= 0b11000 if d['tonal']=='circumflex'
1112
1016
  b |= 0b0100000 if d['breathing']=='smooth'
1113
1017
  b |= 0b1000000 if d['breathing']=='rough'
1114
- x = plain+("%.2x" % b)
1018
+ return ("%.2x" % b)
1019
+ end
1020
+
1021
+ # The inverse of Tinycus.disassemble_greek_char.
1022
+ def Tinycus.assemble_greek_char(plain,d)
1023
+ x = plain+Tinycus.assemble_greek_char_d_to_hex(plain,d)
1115
1024
  return Tinycus.assemble_greek_char_hex(x)
1116
1025
  end
1117
1026
 
@@ -1136,17 +1045,35 @@ JSON
1136
1045
  return @@assemble_greek_char_hash[x]
1137
1046
  end
1138
1047
 
1048
+ @@all_greek_characters = nil
1049
+
1050
+ def Tinycus.all_greek_characters
1051
+ # Returns a string containing all the characters of the Greek alphabet, including all accentuations, in the standardized forms
1052
+ # that are allowed by Tinycus.
1053
+ if @@all_greek_characters.nil? then
1054
+ @@all_greek_characters = @@disassemble_greek_char_hash.keys.join
1055
+ end
1056
+ return @@all_greek_characters
1057
+ end
1058
+
1059
+
1139
1060
  # The following are output by generating/assemble_disassemble.rb .
1140
1061
  @@disassemble_greek_char_hash = {
1141
1062
  'α'=>['α',0b0],
1142
1063
  'Α'=>['α',0b1],
1143
1064
  'ᾳ'=>['α',0b100],
1144
1065
  'ά'=>['α',0b1000],
1066
+ 'Ά'=>['α',0b1001],
1067
+ 'ᾴ'=>['α',0b1100],
1145
1068
  'ὰ'=>['α',0b10000],
1069
+ 'Ὰ'=>['α',0b10001],
1070
+ 'ᾲ'=>['α',0b10100],
1146
1071
  'ᾶ'=>['α',0b11000],
1147
1072
  'ᾷ'=>['α',0b11100],
1148
1073
  'ἀ'=>['α',0b100000],
1149
1074
  'Ἀ'=>['α',0b100001],
1075
+ 'ᾀ'=>['α',0b100100],
1076
+ 'ᾈ'=>['α',0b100101],
1150
1077
  'ἄ'=>['α',0b101000],
1151
1078
  'Ἄ'=>['α',0b101001],
1152
1079
  'ᾄ'=>['α',0b101100],
@@ -1156,10 +1083,14 @@ JSON
1156
1083
  'Ἆ'=>['α',0b111001],
1157
1084
  'ἁ'=>['α',0b1000000],
1158
1085
  'Ἁ'=>['α',0b1000001],
1086
+ 'ᾁ'=>['α',0b1000100],
1087
+ 'ᾉ'=>['α',0b1000101],
1159
1088
  'ἅ'=>['α',0b1001000],
1160
1089
  'Ἅ'=>['α',0b1001001],
1161
1090
  'ᾅ'=>['α',0b1001100],
1091
+ 'ᾍ'=>['α',0b1001101],
1162
1092
  'ἃ'=>['α',0b1010000],
1093
+ 'Ἃ'=>['α',0b1010001],
1163
1094
  'ᾇ'=>['α',0b1011100],
1164
1095
  'β'=>['β',0b0],
1165
1096
  'Β'=>['β',0b1],
@@ -1170,25 +1101,31 @@ JSON
1170
1101
  'ε'=>['ε',0b0],
1171
1102
  'Ε'=>['ε',0b1],
1172
1103
  'έ'=>['ε',0b1000],
1104
+ 'Έ'=>['ε',0b1001],
1173
1105
  'ὲ'=>['ε',0b10000],
1106
+ 'Ὲ'=>['ε',0b10001],
1174
1107
  'ἐ'=>['ε',0b100000],
1175
1108
  'Ἐ'=>['ε',0b100001],
1176
1109
  'ἔ'=>['ε',0b101000],
1177
1110
  'Ἔ'=>['ε',0b101001],
1178
1111
  'ἒ'=>['ε',0b110000],
1112
+ 'Ἒ'=>['ε',0b110001],
1179
1113
  'ἑ'=>['ε',0b1000000],
1180
1114
  'Ἑ'=>['ε',0b1000001],
1181
1115
  'ἕ'=>['ε',0b1001000],
1182
1116
  'Ἕ'=>['ε',0b1001001],
1183
1117
  'ἓ'=>['ε',0b1010000],
1118
+ 'Ἓ'=>['ε',0b1010001],
1184
1119
  'ζ'=>['ζ',0b0],
1185
1120
  'Ζ'=>['ζ',0b1],
1186
1121
  'η'=>['η',0b0],
1187
1122
  'Η'=>['η',0b1],
1188
1123
  'ῃ'=>['η',0b100],
1189
1124
  'ή'=>['η',0b1000],
1125
+ 'Ή'=>['η',0b1001],
1190
1126
  'ῄ'=>['η',0b1100],
1191
1127
  'ὴ'=>['η',0b10000],
1128
+ 'Ὴ'=>['η',0b10001],
1192
1129
  'ῂ'=>['η',0b10100],
1193
1130
  'ῆ'=>['η',0b11000],
1194
1131
  'ῇ'=>['η',0b11100],
@@ -1199,6 +1136,7 @@ JSON
1199
1136
  'Ἤ'=>['η',0b101001],
1200
1137
  'ᾔ'=>['η',0b101100],
1201
1138
  'ἢ'=>['η',0b110000],
1139
+ 'Ἢ'=>['η',0b110001],
1202
1140
  'ἦ'=>['η',0b111000],
1203
1141
  'Ἦ'=>['η',0b111001],
1204
1142
  'ᾖ'=>['η',0b111100],
@@ -1210,6 +1148,7 @@ JSON
1210
1148
  'ᾕ'=>['η',0b1001100],
1211
1149
  'ἣ'=>['η',0b1010000],
1212
1150
  'Ἣ'=>['η',0b1010001],
1151
+ 'ᾓ'=>['η',0b1010100],
1213
1152
  'ἧ'=>['η',0b1011000],
1214
1153
  'Ἧ'=>['η',0b1011001],
1215
1154
  'ᾗ'=>['η',0b1011100],
@@ -1218,9 +1157,12 @@ JSON
1218
1157
  'ι'=>['ι',0b0],
1219
1158
  'Ι'=>['ι',0b1],
1220
1159
  'ϊ'=>['ι',0b10],
1160
+ 'Ϊ'=>['ι',0b11],
1221
1161
  'ί'=>['ι',0b1000],
1162
+ 'Ί'=>['ι',0b1001],
1222
1163
  'ΐ'=>['ι',0b1010],
1223
1164
  'ὶ'=>['ι',0b10000],
1165
+ 'Ὶ'=>['ι',0b10001],
1224
1166
  'ῒ'=>['ι',0b10010],
1225
1167
  'ῖ'=>['ι',0b11000],
1226
1168
  'ῗ'=>['ι',0b11010],
@@ -1229,6 +1171,7 @@ JSON
1229
1171
  'ἴ'=>['ι',0b101000],
1230
1172
  'Ἴ'=>['ι',0b101001],
1231
1173
  'ἲ'=>['ι',0b110000],
1174
+ 'Ἲ'=>['ι',0b110001],
1232
1175
  'ἶ'=>['ι',0b111000],
1233
1176
  'Ἶ'=>['ι',0b111001],
1234
1177
  'ἱ'=>['ι',0b1000000],
@@ -1236,7 +1179,9 @@ JSON
1236
1179
  'ἵ'=>['ι',0b1001000],
1237
1180
  'Ἵ'=>['ι',0b1001001],
1238
1181
  'ἳ'=>['ι',0b1010000],
1182
+ 'Ἳ'=>['ι',0b1010001],
1239
1183
  'ἷ'=>['ι',0b1011000],
1184
+ 'Ἷ'=>['ι',0b1011001],
1240
1185
  'κ'=>['κ',0b0],
1241
1186
  'Κ'=>['κ',0b1],
1242
1187
  'λ'=>['λ',0b0],
@@ -1250,11 +1195,15 @@ JSON
1250
1195
  'ο'=>['ο',0b0],
1251
1196
  'Ο'=>['ο',0b1],
1252
1197
  'ό'=>['ο',0b1000],
1198
+ 'Ό'=>['ο',0b1001],
1253
1199
  'ὸ'=>['ο',0b10000],
1200
+ 'Ὸ'=>['ο',0b10001],
1254
1201
  'ὀ'=>['ο',0b100000],
1255
1202
  'Ὀ'=>['ο',0b100001],
1256
1203
  'ὄ'=>['ο',0b101000],
1257
1204
  'Ὄ'=>['ο',0b101001],
1205
+ 'ὂ'=>['ο',0b110000],
1206
+ 'Ὂ'=>['ο',0b110001],
1258
1207
  'ὁ'=>['ο',0b1000000],
1259
1208
  'Ὁ'=>['ο',0b1000001],
1260
1209
  'ὅ'=>['ο',0b1001000],
@@ -1276,9 +1225,12 @@ JSON
1276
1225
  'υ'=>['υ',0b0],
1277
1226
  'Υ'=>['υ',0b1],
1278
1227
  'ϋ'=>['υ',0b10],
1228
+ 'Ϋ'=>['υ',0b11],
1279
1229
  'ύ'=>['υ',0b1000],
1230
+ 'Ύ'=>['υ',0b1001],
1280
1231
  'ΰ'=>['υ',0b1010],
1281
1232
  'ὺ'=>['υ',0b10000],
1233
+ 'Ὺ'=>['υ',0b10001],
1282
1234
  'ῢ'=>['υ',0b10010],
1283
1235
  'ῦ'=>['υ',0b11000],
1284
1236
  'ὐ'=>['υ',0b100000],
@@ -1289,7 +1241,9 @@ JSON
1289
1241
  'ὕ'=>['υ',0b1001000],
1290
1242
  'Ὕ'=>['υ',0b1001001],
1291
1243
  'ὓ'=>['υ',0b1010000],
1244
+ 'Ὓ'=>['υ',0b1010001],
1292
1245
  'ὗ'=>['υ',0b1011000],
1246
+ 'Ὗ'=>['υ',0b1011001],
1293
1247
  'φ'=>['φ',0b0],
1294
1248
  'Φ'=>['φ',0b1],
1295
1249
  'χ'=>['χ',0b0],
@@ -1300,8 +1254,10 @@ JSON
1300
1254
  'Ω'=>['ω',0b1],
1301
1255
  'ῳ'=>['ω',0b100],
1302
1256
  'ώ'=>['ω',0b1000],
1257
+ 'Ώ'=>['ω',0b1001],
1303
1258
  'ῴ'=>['ω',0b1100],
1304
1259
  'ὼ'=>['ω',0b10000],
1260
+ 'Ὼ'=>['ω',0b10001],
1305
1261
  'ῶ'=>['ω',0b11000],
1306
1262
  'ῷ'=>['ω',0b11100],
1307
1263
  'ὠ'=>['ω',0b100000],
@@ -1311,15 +1267,19 @@ JSON
1311
1267
  'Ὤ'=>['ω',0b101001],
1312
1268
  'ᾤ'=>['ω',0b101100],
1313
1269
  'ὢ'=>['ω',0b110000],
1270
+ 'Ὢ'=>['ω',0b110001],
1314
1271
  'ὦ'=>['ω',0b111000],
1315
1272
  'Ὦ'=>['ω',0b111001],
1316
1273
  'ᾦ'=>['ω',0b111100],
1317
1274
  'ὡ'=>['ω',0b1000000],
1318
1275
  'Ὡ'=>['ω',0b1000001],
1276
+ 'ᾡ'=>['ω',0b1000100],
1319
1277
  'ὥ'=>['ω',0b1001000],
1320
1278
  'Ὥ'=>['ω',0b1001001],
1279
+ 'ᾥ'=>['ω',0b1001100],
1321
1280
  'ὣ'=>['ω',0b1010000],
1322
1281
  'Ὣ'=>['ω',0b1010001],
1282
+ 'ᾣ'=>['ω',0b1010100],
1323
1283
  'ὧ'=>['ω',0b1011000],
1324
1284
  'Ὧ'=>['ω',0b1011001],
1325
1285
  'ᾧ'=>['ω',0b1011100],
@@ -1327,16 +1287,21 @@ JSON
1327
1287
  'Ϝ'=>['ϝ',0b1]
1328
1288
  }
1329
1289
  @@assemble_greek_char_hash = {
1330
-
1331
1290
  'α00'=>'α',
1332
1291
  'α01'=>'Α',
1333
1292
  'α04'=>'ᾳ',
1334
1293
  'α08'=>'ά',
1294
+ 'α09'=>'Ά',
1295
+ 'α0c'=>'ᾴ',
1335
1296
  'α10'=>'ὰ',
1297
+ 'α11'=>'Ὰ',
1298
+ 'α14'=>'ᾲ',
1336
1299
  'α18'=>'ᾶ',
1337
1300
  'α1c'=>'ᾷ',
1338
1301
  'α20'=>'ἀ',
1339
1302
  'α21'=>'Ἀ',
1303
+ 'α24'=>'ᾀ',
1304
+ 'α25'=>'ᾈ',
1340
1305
  'α28'=>'ἄ',
1341
1306
  'α29'=>'Ἄ',
1342
1307
  'α2c'=>'ᾄ',
@@ -1346,10 +1311,14 @@ JSON
1346
1311
  'α39'=>'Ἆ',
1347
1312
  'α40'=>'ἁ',
1348
1313
  'α41'=>'Ἁ',
1314
+ 'α44'=>'ᾁ',
1315
+ 'α45'=>'ᾉ',
1349
1316
  'α48'=>'ἅ',
1350
1317
  'α49'=>'Ἅ',
1351
1318
  'α4c'=>'ᾅ',
1319
+ 'α4d'=>'ᾍ',
1352
1320
  'α50'=>'ἃ',
1321
+ 'α51'=>'Ἃ',
1353
1322
  'α5c'=>'ᾇ',
1354
1323
  'β00'=>'β',
1355
1324
  'β01'=>'Β',
@@ -1360,25 +1329,31 @@ JSON
1360
1329
  'ε00'=>'ε',
1361
1330
  'ε01'=>'Ε',
1362
1331
  'ε08'=>'έ',
1332
+ 'ε09'=>'Έ',
1363
1333
  'ε10'=>'ὲ',
1334
+ 'ε11'=>'Ὲ',
1364
1335
  'ε20'=>'ἐ',
1365
1336
  'ε21'=>'Ἐ',
1366
1337
  'ε28'=>'ἔ',
1367
1338
  'ε29'=>'Ἔ',
1368
1339
  'ε30'=>'ἒ',
1340
+ 'ε31'=>'Ἒ',
1369
1341
  'ε40'=>'ἑ',
1370
1342
  'ε41'=>'Ἑ',
1371
1343
  'ε48'=>'ἕ',
1372
1344
  'ε49'=>'Ἕ',
1373
1345
  'ε50'=>'ἓ',
1346
+ 'ε51'=>'Ἓ',
1374
1347
  'ζ00'=>'ζ',
1375
1348
  'ζ01'=>'Ζ',
1376
1349
  'η00'=>'η',
1377
1350
  'η01'=>'Η',
1378
1351
  'η04'=>'ῃ',
1379
1352
  'η08'=>'ή',
1353
+ 'η09'=>'Ή',
1380
1354
  'η0c'=>'ῄ',
1381
1355
  'η10'=>'ὴ',
1356
+ 'η11'=>'Ὴ',
1382
1357
  'η14'=>'ῂ',
1383
1358
  'η18'=>'ῆ',
1384
1359
  'η1c'=>'ῇ',
@@ -1389,6 +1364,7 @@ JSON
1389
1364
  'η29'=>'Ἤ',
1390
1365
  'η2c'=>'ᾔ',
1391
1366
  'η30'=>'ἢ',
1367
+ 'η31'=>'Ἢ',
1392
1368
  'η38'=>'ἦ',
1393
1369
  'η39'=>'Ἦ',
1394
1370
  'η3c'=>'ᾖ',
@@ -1400,6 +1376,7 @@ JSON
1400
1376
  'η4c'=>'ᾕ',
1401
1377
  'η50'=>'ἣ',
1402
1378
  'η51'=>'Ἣ',
1379
+ 'η54'=>'ᾓ',
1403
1380
  'η58'=>'ἧ',
1404
1381
  'η59'=>'Ἧ',
1405
1382
  'η5c'=>'ᾗ',
@@ -1408,9 +1385,12 @@ JSON
1408
1385
  'ι00'=>'ι',
1409
1386
  'ι01'=>'Ι',
1410
1387
  'ι02'=>'ϊ',
1388
+ 'ι03'=>'Ϊ',
1411
1389
  'ι08'=>'ί',
1390
+ 'ι09'=>'Ί',
1412
1391
  'ι0a'=>'ΐ',
1413
1392
  'ι10'=>'ὶ',
1393
+ 'ι11'=>'Ὶ',
1414
1394
  'ι12'=>'ῒ',
1415
1395
  'ι18'=>'ῖ',
1416
1396
  'ι1a'=>'ῗ',
@@ -1419,6 +1399,7 @@ JSON
1419
1399
  'ι28'=>'ἴ',
1420
1400
  'ι29'=>'Ἴ',
1421
1401
  'ι30'=>'ἲ',
1402
+ 'ι31'=>'Ἲ',
1422
1403
  'ι38'=>'ἶ',
1423
1404
  'ι39'=>'Ἶ',
1424
1405
  'ι40'=>'ἱ',
@@ -1426,7 +1407,9 @@ JSON
1426
1407
  'ι48'=>'ἵ',
1427
1408
  'ι49'=>'Ἵ',
1428
1409
  'ι50'=>'ἳ',
1410
+ 'ι51'=>'Ἳ',
1429
1411
  'ι58'=>'ἷ',
1412
+ 'ι59'=>'Ἷ',
1430
1413
  'κ00'=>'κ',
1431
1414
  'κ01'=>'Κ',
1432
1415
  'λ00'=>'λ',
@@ -1440,11 +1423,15 @@ JSON
1440
1423
  'ο00'=>'ο',
1441
1424
  'ο01'=>'Ο',
1442
1425
  'ο08'=>'ό',
1426
+ 'ο09'=>'Ό',
1443
1427
  'ο10'=>'ὸ',
1428
+ 'ο11'=>'Ὸ',
1444
1429
  'ο20'=>'ὀ',
1445
1430
  'ο21'=>'Ὀ',
1446
1431
  'ο28'=>'ὄ',
1447
1432
  'ο29'=>'Ὄ',
1433
+ 'ο30'=>'ὂ',
1434
+ 'ο31'=>'Ὂ',
1448
1435
  'ο40'=>'ὁ',
1449
1436
  'ο41'=>'Ὁ',
1450
1437
  'ο48'=>'ὅ',
@@ -1466,9 +1453,12 @@ JSON
1466
1453
  'υ00'=>'υ',
1467
1454
  'υ01'=>'Υ',
1468
1455
  'υ02'=>'ϋ',
1456
+ 'υ03'=>'Ϋ',
1469
1457
  'υ08'=>'ύ',
1458
+ 'υ09'=>'Ύ',
1470
1459
  'υ0a'=>'ΰ',
1471
1460
  'υ10'=>'ὺ',
1461
+ 'υ11'=>'Ὺ',
1472
1462
  'υ12'=>'ῢ',
1473
1463
  'υ18'=>'ῦ',
1474
1464
  'υ20'=>'ὐ',
@@ -1479,7 +1469,9 @@ JSON
1479
1469
  'υ48'=>'ὕ',
1480
1470
  'υ49'=>'Ὕ',
1481
1471
  'υ50'=>'ὓ',
1472
+ 'υ51'=>'Ὓ',
1482
1473
  'υ58'=>'ὗ',
1474
+ 'υ59'=>'Ὗ',
1483
1475
  'φ00'=>'φ',
1484
1476
  'φ01'=>'Φ',
1485
1477
  'χ00'=>'χ',
@@ -1490,8 +1482,10 @@ JSON
1490
1482
  'ω01'=>'Ω',
1491
1483
  'ω04'=>'ῳ',
1492
1484
  'ω08'=>'ώ',
1485
+ 'ω09'=>'Ώ',
1493
1486
  'ω0c'=>'ῴ',
1494
1487
  'ω10'=>'ὼ',
1488
+ 'ω11'=>'Ὼ',
1495
1489
  'ω18'=>'ῶ',
1496
1490
  'ω1c'=>'ῷ',
1497
1491
  'ω20'=>'ὠ',
@@ -1501,15 +1495,19 @@ JSON
1501
1495
  'ω29'=>'Ὤ',
1502
1496
  'ω2c'=>'ᾤ',
1503
1497
  'ω30'=>'ὢ',
1498
+ 'ω31'=>'Ὢ',
1504
1499
  'ω38'=>'ὦ',
1505
1500
  'ω39'=>'Ὦ',
1506
1501
  'ω3c'=>'ᾦ',
1507
1502
  'ω40'=>'ὡ',
1508
1503
  'ω41'=>'Ὡ',
1504
+ 'ω44'=>'ᾡ',
1509
1505
  'ω48'=>'ὥ',
1510
1506
  'ω49'=>'Ὥ',
1507
+ 'ω4c'=>'ᾥ',
1511
1508
  'ω50'=>'ὣ',
1512
1509
  'ω51'=>'Ὣ',
1510
+ 'ω54'=>'ᾣ',
1513
1511
  'ω58'=>'ὧ',
1514
1512
  'ω59'=>'Ὧ',
1515
1513
  'ω5c'=>'ᾧ',
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tinycus
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.9
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Crowell
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-15 00:00:00.000000000 Z
11
+ date: 2024-01-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "This is a ruby library to do some string functions efficiently that\nwould
14
14
  otherwise be slow or require a huge footprint. For example,\nit can remove accents
@@ -16,9 +16,7 @@ description: "This is a ruby library to do some string functions efficiently tha
16
16
  email:
17
17
  executables: []
18
18
  extensions: []
19
- extra_rdoc_files:
20
- - README.md
21
- - tinycus.rb
19
+ extra_rdoc_files: []
22
20
  files:
23
21
  - LICENSE
24
22
  - README.md
@@ -30,11 +28,7 @@ metadata:
30
28
  contact_uri: http://lightandmatter.com/area4author.html
31
29
  documentation_uri: https://bitbucket.org/ben-crowell/tinycus
32
30
  post_install_message:
33
- rdoc_options:
34
- - "--exclude"
35
- - "(bad_combining|generating)"
36
- - "--main"
37
- - README.md
31
+ rdoc_options: []
38
32
  require_paths:
39
33
  - lib
40
34
  required_ruby_version: !ruby/object:Gem::Requirement