RubyGems - tinycus - Versions diffs - 1.0.5 → 1.0.7 - Mend

tinycus 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

checksums.yaml +4 -4
data/tinycus.rb +87 -80
metadata +12 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 90849e6c1df95e50e8f6ba6818d78a8bf6352a82fc9048313a11acd7d1624bf2
-  data.tar.gz: 5b962ac97c0d25777cf527b3a61d0c0e7ebb12c2321b690eca724907bd692de7
+  metadata.gz: 83d0a3e5d3f764901829858fc5a1b9160577e46f5e277504291c21225c2a38a0
+  data.tar.gz: da3dece01b93b157e1e36d7e07b27ead0f55dab621101af4444e716672028323
 SHA512:
-  metadata.gz: 6af58dc8e3928c473e96416b20fee0893aba4b9abf3838f9689e5b95326eb74c80986a6fa09d47c3ca8240b00d2c17609d2b1adbdc74e86ac98055889ca9aff7
-  data.tar.gz: a5e5a4cad6fbfae7fe6588864cc3607eec471b0c0db6e8adfc3657c44826b63696cac92c410423ef62fe96f3e618e0a476e8b89d764404115a68fc03dc7a8336
+  metadata.gz: 16fb025b30abf1d6650aa2a76108117a88e30e5d5b524fc8aae83aca1531b62a2b4baab4da9167b673636cae413b8225fc236772185c9c1063bc4a31bc1321c3
+  data.tar.gz: e0cc0963f112a25e782875ecb179e035a0dabb1a6d20b2e3b59d0b2c5584bfb0bbe1d9b41c275dac631b7c520822a1e34b1c1340184a64f27bda52d403c3dd4b

data/tinycus.rb CHANGED Viewed

@@ -4,45 +4,51 @@ require 'json'
 module Tinycus
-  # The four "alpha_" functions work on Greek and English, also most Latin characters; see comments in Tr.get_greek_collation_tr.
+=begin rdoc
+The four "alpha_" functions work on Greek and English, also most Latin characters; see comments in Tr.get_greek_collation_tr.
+=end
+  # Sort a list of strings in alphabetical order.
   def Tinycus.alpha_sort(l,n:false)
     return Tinycus.sort(l,Tinycus.alpha_collation,n:false)
   end
+  # Tests two strings for equality in alphabetical order, returns a boolean.
   def Tinycus.alpha_equal(a,b,n:true)
     return (Tinycus.alpha_compare(a,b,n:n)==0)
   end
+  # An alphabetical <=> function.
   def Tinycus.alpha_compare(a,b,n:true)
     # return (Tinycus::Tr.remove_accents_from_greek(a,n:n).downcase <=> Tinycus::Tr.remove_accents_from_greek(b,n:n).downcase)
     collation_tr = Tinycus.alpha_collation
     return collation_tr.apply(a,n:n) <=> collation_tr.apply(b,n:n)
   end
-  def Tinycus.alpha_collation
+  def Tinycus.alpha_collation() # :nodoc:
     return Tinycus::Tr.get_greek_collation_tr
   end
+  # synonym of Tinycus.contains_vowel, for readability when using it on a single character
   def Tinycus.is_vowel(c)
-    # synonym of Tinycus.contains_vowel, for readability when using it on a single character
     return Tinycus.contains_vowel(c)
   end
+  # Works for Greek and Latin; considers y to be a vowel; doesn't handle stuff like Welsh w.
   def Tinycus.contains_vowel(s)
-    # works for Greek and Latin; considers y to be a vowel; doesn't handle stuff like Welsh w
     if Tinycus::Tr.remove_accents_from_greek(s).downcase.match?(/[αειουηωaeiouyæ]/) then return true else return false end
   end
   # ---
+  # Like alpha_sort, but more general, using any Tr object to define a collation order.
   def Tinycus.sort(l,collation_tr,n:false)
     p = l.clone # This does work on a list of strings: ruby -e "a=['p','q']; b=a.clone; b[1]='x'; print a"
     p.sort_by! { |x| collation_tr.apply(x,n:n) } # ruby's sort_by! only applies the block once to each element, to form an index
     return p
   end
-  def Tinycus.run_tests
+  def Tinycus.run_tests # :nodoc:
     # removing accents:
     tests = [
       ['',''],
@@ -125,23 +131,13 @@ module Tinycus
       end
     }
+    Tinycus::Tr.run_tests()
+    Tinycus::MiscGreek.run_tests()
   end
-  # fixme:
-  #   Determine byte order and make sure we convert to native (i.e., BE rather than LE if we're on a big-endian machine).
-  @@bloater = Encoding::Converter.new('UTF-8','UTF-32LE')
-  @@shrinker = Encoding::Converter.new('UTF-32LE','UTF-8')
-  def Tinycus.bloat(s) # private method
-    return @@bloater.convert(s)
-  end
-  def Tinycus.shrink(s) # private method
-    return @@shrinker.convert(s)
-  end
   class Tinycus::MiscGreek
-    def MiscGreek.run_tests
+    def MiscGreek.run_tests # :nodoc:
       print "testing MiscGreek.add_second_accent...\n"
       [['θεμείλια','θεμείλιά'],
        ['πόλεμονδε','πόλεμόνδε'],
@@ -155,9 +151,9 @@ module Tinycus
       }
     end
+    # A rough approximation, for cases where we don't need perfect precision and either don't have Ifthimos's syllabification module
+    # or don't want the performance hit.
     def MiscGreek.estimate_syll_count(x)
-      # A rough approximation, for cases where we don't need perfect precision and either don't have Ransom's greek/syllab.rb
-      # or don't want the performance hit.
       x = x.downcase
       x = x.gsub(/[ϊ]/,'e') # prevent it from being misinterpreted as a diphthong after the diaresis is stripped below
       x = Tr.remove_accents_from_greek(x)
@@ -181,10 +177,10 @@ module Tinycus
       return Tr.remove_tonal_accents_from_greek(x)!=x
     end
-    def MiscGreek.add_second_accent(w_orig)
-      # e.g., if w is θεμείλια, returns θεμείλιά
-      w = w_orig.clone # shallow copy, works on a string; I'm not clear in why this is necessary, but it is; modification to w_orig is visible in
-                       # output of make test_misc_greek
+    # Modifies a word as would be appropriate if it was followed by an enclitic. E.g., if the input is θεμείλια, returns θεμείλιά.
+    def MiscGreek.add_second_accent(w)
+      w = w.clone # shallow copy, works on a string; I'm not clear in why this is necessary, but it is; modification to w is visible in
+                  # output of make test_misc_greek
       if w=~/(.*)δε$/ then
         stem = $1
         nsyll = MiscGreek.estimate_syll_count(stem)
@@ -202,14 +198,14 @@ module Tinycus
       return w
     end
+    # For a given word, try to predict every possible form it could take in a text, including
+    # both possible capitalizations, acute/grave, and multiple accents.
+    # The word w should already have been converted into a canonical dictionary form (typically a single acute accent).
+    # This is not 100% perfect, mainly because the rules for multiple accents are complicated and Tinycus doesn't include a full
+    # syllabification algorithm.
+    # I tested this as a round-trip on all multiply accented words occurring in Homer. The following three words were the only
+    # ones where it failed: κάλλίον, σταφύλῇ, ὕπὸ.
     def MiscGreek.all_cases_and_accents(w)
-      # For a given word, try to predict every possible form it could take in a text, including
-      # both possible capitalizations, acute/grave, and multiple accents.
-      # The word w should already have been converted into a canonical dictionary form (typically a single acute accent).
-      # This is not 100% perfect, mainly because the rules for multiple accents are complicated and Tinycus doesn't include a full
-      # syllabification algorithm.
-      # I tested this as a round-trip on all multiply accented words occurring in Homer. The following three words were the only
-      # ones where it failed: κάλλίον, σταφύλῇ, ὕπὸ.
       forms = [w.downcase]
       forms = forms+forms.map { |x| Tr.greek_acute_to_grave(x) }
       forms = forms+forms.map { |x| MiscGreek.add_second_accent(x) }
@@ -235,10 +231,11 @@ module Tinycus
     @@greek_acute_to_grave = nil
     @@prep_greek_to_collation_form = nil
+    # Initialize a data structure that represents an action equivalent to String#tr(a,b), but faster.
+    # Including redundant characters or unchanged characters is harmless and is fixed in this constructor; it does not cause
+    # any performance hit when the object is actually used. The initializer takes linear time and memory in the size of
+    # the inputs.
     def initialize(a,b)
-      # Initialize a data structure that represents an action equivalent to string.tr(a,b), but faster.
-      # Including redudant characters is harmless and is fixed in this constructor; it does not cause
-      # any performance hit when the object is actually used.
       if a.length!=b.length then raise "lengths unequal, #{a.length} and #{b.length}" end
       @l = a.length
       @orig_tables = [a.clone,b.clone] # stash them away for testing purposes
@@ -246,7 +243,7 @@ module Tinycus
       @h = {}
       0.upto(@l-1) { |i|
         p,q = a[i],b[i]
-        @h[p] = q
+        if p!=q then @h[p] = q end
       }
       @h.freeze
@@ -254,9 +251,13 @@ module Tinycus
     attr_reader :l,:a,:b
+    # Takes a predefined Tr object and uses it to perform the equivalent of String#tr. Takes O(1) time in the length of
+    # the translation list.
+    # This function tends to be a bottleneck for performance in real-world applications. I tried several algorithms.
+    # See notes in comments at top of scripts/benchmark.rb.
+    # Something like Gnu gperf is theoretically superior in certain ways (e.g., avoiding the theoretical possibility of
+    # a hash collision).
     def apply(s,n:false)
-      # This function tends to be a bottleneck for performance in real-world applications. I tried several algorithms.
-      # See notes in comments at top of scripts/benchmark.rb.
       if n then s = s.unicode_normalize(:nfc) end # 30% performance hit, not necessary if input has already been normalized
@@ -271,9 +272,10 @@ module Tinycus
     end
-    def self_test(alphabet)
-      # Raises an exception if it fails. Otherwise just returns silently.
-      # If not nil, then the alphabet parameter gives a list of characters that are allowed to exist in the output.
+    # Tests a Tr object and pokes and prods it to see if it seems OK.
+    # Raises an exception if it fails. Otherwise just returns silently.
+    # If not nil, then the alphabet argument should give a list of characters that are allowed to exist in the output.
+    def self_test(alphabet) # :nodoc:
       a,b = @orig_tables
       if self.apply(a)!=b then raise "error in self_test, applying me to original a does not give original b" end
       if self.apply(b)!=b then raise "error in self_test, applying me to original b does not give original b" end
@@ -442,6 +444,7 @@ module Tinycus
       return @@prep_remove_accents_from_greek.apply(s)
     end
+    # Slow.
     def Tr.remove_macrons_and_breves(s)
       # This can't be implemented using my fast method, because most of these are composed characters.
       if !(s.kind_of?(String)) then return s end
@@ -500,12 +503,11 @@ a.each { |c|
 =end
 end
+    # Changes a macronized string to one that looks like this: ἕννυ_μι.
+    # We don't handle grave and circumflex accents, but those don't occur in dictionary headers with macrons.
+    # For an IfMows object in Ifthimos, this can be done using stringify(macronization:'underbar').
     def Tr.macronized_to_underbar_style(s)
-      # Changes a macronized string to one that looks like this: ἕννυ_μι.
       # The lists in the regexes are generated by the commented-out scripts below, and are not actually totally comprehensive.
-      # We don't handle grave and circumflex accents, but those don't occur in dictionary headers with macrons.
-      # For an IfMows object in Ifthimos, this can be done using stringify(macronization:'underbar').
-      # ---
       x = s.clone
       x = x.gsub(/(ϊ̄)/) { "#{Tinycus::Tr.remove_macrons_and_breves($1)}_" } # iota with diaresis and macron, occurs in ἀϊκή
       # First handle letters that have both a macron and a breve, treating them as if they weren't macronized at all:
@@ -542,7 +544,7 @@ print a.join('|'),"--\n"
 =end
     end
-    def Tr.get_greek_collation_tr
+    def Tr.get_greek_collation_tr # :nodoc:
       if @@prep_greek_to_collation_form.nil? then
         @@prep_greek_to_collation_form = Tr.collation_form('el')
       end
@@ -557,10 +559,10 @@ print a.join('|'),"--\n"
       return @@prep_greek_to_collation_form.apply(s)
     end
+    # Returns a Tinycus::Tr object which can then be used to act on strings using the apply() method.
+    # The 'el' locale is a standard thing that software like ICU uses for polytonic Greek. The object constructed with this
+    # locale will also remove most accents and macrons from Latin characters, but will miss some cases like Czech, and will not handle Cyrillic.
     def Tr.remove_accents(locale)
-      # Returns a Tinycus::Tr object which can then be used to act on strings using the apply() method.
-      # The 'el' locale is a standard thing that software like ICU uses for polytonic Greek. The object constructed with this
-      # locale will also remove most accents and macrons from Latin characters, but will miss some cases like Czech, and will not handle Cyrillic.
       t = {
         "el"=>[
           "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿΆΈΊΌΐάέήίΰϊϋόύώỏἀἁἂἃἄἅἆἈἉἊἌἍἎἐἑἒἓἔἕἘἙἜἝἠἡἢἣἤἥἦἧἨἩἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἼἽἾὀὁὂὃὄὅὈὉὊὋὌὍὐὑὓὔὕὖὗὙὝὠὡὢὣὤὥὦὧὨὩὫὬὭὮὯὰὲὴὶὸὺὼᾐᾑᾓᾔᾕᾖᾗᾠᾤᾦᾧᾰᾱᾳᾴᾶᾷᾸᾹῂῃῄῆῇῐῑῒῖῗῘῙῠῡῢῥῦῨῩῬῳῴῶῷῸῤᾆᾄᾂᾁᾇᾅᾃᾍᾡ",
@@ -573,11 +575,11 @@ print a.join('|'),"--\n"
       return result
     end
+    # Returns a Tinycus::Tr object which can then be used to act on strings using the apply() method. Gives a form that
+    # can be alphabetized properly.
+    # The 'el' locale is a standard thing that software like ICU uses for polytonic Greek. The object constructed with this
+    # locale will also produce correct results for most Latin-script words, will miss some cases like Czech, and will not handle Cyrillic.
     def Tr.collation_form(locale)
-      # Returns a Tinycus::Tr object which can then be used to act on strings using the apply() method. Gives a form that
-      # can be alphabetized properly.
-      # The 'el' locale is a standard thing that software like ICU uses for polytonic Greek. The object constructed with this
-      # locale will also produce correct results for most Latin-script words, will miss some cases like Czech, and will not handle Cyrillic.
       t = {
         "el"=>[
           "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåæçèéêëìíîïñòóôõöøùúûüýÿΆΈΊΌΐάέήίΰϊϋόύώỏἀἁἂἃἄἅἆἈἉἊἌἍἎἐἑἒἓἔἕἘἙἜἝἠἡἢἣἤἥἦἧἨἩἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἼἽἾὀὁὂὃὄὅὈὉὊὋὌὍὐὑὓὔὕὖὗὙὝὠὡὢὣὤὥὦὧὨὩὫὬὭὮὯὰὲὴὶὸὺὼᾐᾑᾓᾔᾕᾖᾗᾠᾤᾦᾧᾰᾱᾳᾴᾶᾷᾸᾹῂῃῄῆῇῐῑῒῖῗῘῙῠῡῢῥῦῨῩῬῳῴῶῷῸῤᾆᾄᾂᾁᾇᾅᾃΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩςᾍ",
@@ -590,15 +592,14 @@ print a.join('|'),"--\n"
       return result
     end
+    # E.g., Tr.add_breathing_to_character('α','rough') gives 'ἁ'.
     def Tr.add_breathing_to_character(c,what)
       plain,d = Tinycus.disassemble_greek_char(c)
       d['breathing'] = what
       return Tinycus.assemble_greek_char(plain,d)
     end
-    def Tr.run_tests
+    def Tr.run_tests # :nodoc:
       # to execute this, do a "make test_tr"
       ['el'].each { |locale|
         tr = Tinycus::Tr.remove_accents(locale)
@@ -613,10 +614,10 @@ print a.join('|'),"--\n"
   class Tinycus::Script
+    # Script can be 'latin', 'greek', or 'hebrew'.
+    # C can be both, lowercase, or uppercase.
+    # For scripts that don't have case, c is ignored.
     def Script.alphabet(script,c:'both')
-      # Script can be 'latin', 'greek', or 'hebrew'.
-      # C can be both, lowercase, or uppercase.
-      # For scripts that don't have case, c is ignored.
       t = {
         'latin'=>{'has_case'=>true},
         'greek'=>{'has_case'=>true},
@@ -633,7 +634,7 @@ print a.join('|'),"--\n"
       die("illegal value of c=#{c}, must be both, lowercase, or uppercase")
     end
-    def Script.alphabet_helper(script,include_lc_only_chars)
+    def Script.alphabet_helper(script,include_lc_only_chars) # :nodoc:
       if script=='latin'  then return 'abcdefghijklmnopqrstuvwxyz' end
       if script=='greek'  then
         result = 'αβγδεζηθικλμνξοπρστυφχψω'
@@ -651,8 +652,9 @@ print a.join('|'),"--\n"
   class Tinycus::Cleanup
+    # Designed for external data sources that can have all kinds of nasty crap in them. Slow, thorough, silent, and brutal.
+    # Attempts to eliminate the traces of incomplete beta code conversion that are found in some Project Perseus XML files.
     def Cleanup.clean_up_grotty_greek(s,allow_latin:false,clean_perseus:true,standardize_punctuation:true)
-      # Designed for external data sources that can have all kinds of nasty crap in them. Slow, thorough, silent, and brutal.
       a = s.split(/(\s+)/) # returns a string in which even indices are words, odd indices are whitespace
       b = []
       0.upto(a.length-1) { |i|
@@ -676,8 +678,8 @@ print a.join('|'),"--\n"
       return s
     end
+    # Like clean_up_grotty_greek, but works on a single word.
     def Cleanup.clean_up_grotty_greek_one_word(s,allow_latin:false,clean_perseus:true,standardize_punctuation:true)
-      # This works on a single word.
       s = s.unicode_normalize(:nfc)
       s = Cleanup.clean_up_greek_combining_characters(s,allow_latin:allow_latin)
       # In Perseus's Polybius, they have bracketed text sometimes. In their system, this should probably be a separate punctuation token.
@@ -705,7 +707,7 @@ print a.join('|'),"--\n"
       return s
     end
-    def Cleanup.clean_up_greek_combining_characters(s,allow_latin:false)
+    def Cleanup.clean_up_greek_combining_characters(s,allow_latin:false) # :nodoc:
       combining_comma_above = [787].pack('U')
       combining_acute_accent = [769].pack('U')
       greek_koronis = [8125].pack('U')
@@ -735,8 +737,7 @@ print a.join('|'),"--\n"
       return s
     end
-    def Cleanup.clean_up_greek_beta_code(s)
-      # This was for when I mistakenly used old beta code version of project perseus.
+    def Cleanup.clean_up_greek_beta_code(s) # :nodoc:
       # Even with perseus 2.1, some stuff seems to come through that looks like beta code, e.g., ἀργει~ος.
       # https://github.com/PerseusDL/treebank_data/issues/30
       s = s.sub(/\((.)/) { $1.tr("αειουηω","ἁἑἱὁὑἡὡ") }
@@ -751,8 +752,8 @@ print a.join('|'),"--\n"
       return s
     end
+    # Works on any string, doesn't have to be a single word. Standardize elision character and middle dot/ano teleia.
     def Cleanup.standardize_greek_punctuation(s)
-      # Works on any string, doesn't have to be a single word. Standardize elision character and middle dot/ano teleia.
       # Perseus and Monro/Allen write ρ with breathing mark instead of ρ᾽ when there's elision:
       s = s.gsub(/(?<=[[:alpha:]])[ῤῥ](?![[:alpha:]])/,'ρ᾽')
       # ... Note that we do need to reinsert the breathing mark, or else we lose the info needed to do accurate lemmatization. Cf. Spelling module.
@@ -1013,26 +1014,31 @@ end
 JSON
 @@beta_code_conversion = nil
-  def Tinycus.beta_code_conversion_table
-    if @@beta_code_conversion.nil? then @@beta_code_conversion=JSON.parse(@@beta_code_conversion_json) end
+  def Tinycus.beta_code_conversion_table # :nodoc:
+    if @@beta_code_conversion.nil? then
+      @@beta_code_conversion=JSON.parse(@@beta_code_conversion_json)
+      @@beta_code_conversion.freeze
+    end
     return @@beta_code_conversion
   end
+  # Converts a unicode character to beta code. The input must be utf8/nfc.
   def Tinycus.greek_char_unicode_to_beta_code(u)
     b = Tinycus.beta_code_conversion_table()[0][u]
     if !b.nil? then return b else return u end # most failures will just be whitespace, punctuation, etc.
   end
+  # Converts a character from beta code to unicode.
   def Tinycus.greek_char_beta_code_to_unicode(b)
     b = Tinycus.canonicalize_char_greek_beta_code(b)
     u = Tinycus.beta_code_conversion_table()[1][b]
     if !u.nil? then return u else return b end
   end
+  # Breathing normally comes after accent, but sometimes you see things in the wild where it's reversed.
+  # I can't find any documentation for any preferred or canonical order. What circumstantial evidence I could find I put into the WP article.
+  # Note that the order of |+ doesn't matter, because the same letter can't have both.
   def Tinycus.canonicalize_char_greek_beta_code(b)
-    # Breathing normally comes after accent, but sometimes you see things in the wild where it's reversed.
-    # I can't find any documentation for any preferred or canonical order. What circumstantial evidence I could find I put into the WP article.
-    # Note that the order of |+ doesn't matter, because the same letter can't have both.
     ")(/\\=|+&'".chars.each { |c|
       if b=~/(.*)#{Regexp::quote(c)}(.*)/ then b = $1+$2+c end
     }
@@ -1040,6 +1046,7 @@ JSON
     return b
   end
+  # Converts a string from utf8/nfc to beta code.
   def Tinycus.greek_unicode_to_beta_code(u)
     u = Tinycus::Cleanup.clean_up_grotty_greek(u,allow_latin:true,clean_perseus:true,standardize_punctuation:false)
     # ... the conversion below will not work on unicode that isn't done cleanly and according to modern standards
@@ -1051,6 +1058,7 @@ JSON
     return b
   end
+  # Converts a string from beta code to unicode.
   def Tinycus.greek_beta_code_to_unicode(b)
     # This implementation will be kind of slow because it does regex replacements in place.
     b = b.clone
@@ -1065,13 +1073,13 @@ JSON
     return b
   end
+  # Returns [plain,d], where plain is a lowercase, unaccented Greek letter (α-ω, plus ς), and d is
+  # a hash with the following keys:
+  #   uppercase, diar, iota_subscript - boolean values
+  #   tonal - string value: none acute grave circumflex
+  #   breathing - string value: none smooth rough
+  # Doesn't handle macrons and breves. I have a function IfMows.disassemble_char in Ifthimos that does that.
   def Tinycus.disassemble_greek_char(c)
-    # Returns [plain,d], where plain is a lowercase, unaccented Greek letter (α-ω, plus ς), and d is
-    # a hash with the following keys:
-    #   uppercase, diar, iota_subscript - boolean values
-    #   tonal - string value: none acute grave circumflex
-    #   breathing - string value: none smooth rough
-    # Doesn't handle macrons and breves. I have a function IfMows.disassemble_char in Ifthimos that does that.
     x = Tinycus.disassemble_greek_char_binary(c)
     if x.nil? then return nil end
     plain,decor = x
@@ -1092,9 +1100,8 @@ JSON
     return [plain,d]
   end
+  # The inverse of Tinycus.disassemble_greek_char.
   def Tinycus.assemble_greek_char(plain,d)
-    # The inverse of Tinycus.disassemble_greek_char.
-    # Doesn't handle macrons and breves. I have a function IfMows.assemble_char in Ifthimos that does that.
     b = 0
     b |=        0b1 if d['uppercase']
     b |=       0b10 if d['diar']
@@ -1108,7 +1115,7 @@ JSON
     return Tinycus.assemble_greek_char_hex(x)
   end
-  def Tinycus.disassemble_greek_char_binary(c)
+  def Tinycus.disassemble_greek_char_binary(c) # :nodoc:
     # Returns [plain,b], where plain is a lowercase, unaccented Greek letter (α-ω, plus ς), and b is
     # an integer containing a set of flags encoded in binary, as follows:
     #  b |=        0b1 if d['uppercase']
@@ -1122,7 +1129,7 @@ JSON
     return @@disassemble_greek_char_hash[c]
   end
-  def Tinycus.assemble_greek_char_hex(x)
+  def Tinycus.assemble_greek_char_hex(x) # :nodoc:
     # The inverse of the map in Tinycus.disassemble_greek_char_binary.
     # Accepts an input such as 'α08', where the second and third characters are the hex representation of of
     # the set of flags described in the comments in the forward map.

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: tinycus
 version: !ruby/object:Gem::Version
-  version: 1.0.5
+  version: 1.0.7
 platform: ruby
 authors:
 - Benjamin Crowell
@@ -16,7 +16,8 @@ description: "This is a ruby library to do some string functions efficiently tha
 email:
 executables: []
 extensions: []
-extra_rdoc_files: []
+extra_rdoc_files:
+- README.md
 files:
 - LICENSE
 - README.md
@@ -24,9 +25,16 @@ files:
 homepage: https://bitbucket.org/ben-crowell/tinycus
 licenses:
 - GPL-3.0-only
-metadata: {}
+metadata:
+  contact_uri: http://lightandmatter.com/area4author.html
+  homepage_uri: https://bitbucket.org/ben-crowell/tinycus
+  source_code_uri: https://bitbucket.org/ben-crowell/tinycus
 post_install_message:
-rdoc_options: []
+rdoc_options:
+- "--exclude"
+- "(bad_combining|generating)"
+- "--main"
+- README.md
 require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement