blackwinter-cmess 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/COPYING +676 -0
  2. data/ChangeLog +54 -0
  3. data/README +63 -0
  4. data/Rakefile +51 -0
  5. data/bin/bconv +130 -0
  6. data/bin/cinderella +190 -0
  7. data/bin/decode_entities +106 -0
  8. data/bin/guess_encoding +223 -0
  9. data/data/chartab.yaml +26724 -0
  10. data/data/csets/iso_8859-1.yaml +195 -0
  11. data/data/csets/iso_8859-15.yaml +204 -0
  12. data/data/csets/latin1.yaml +195 -0
  13. data/data/csets/unicode/basic_latin.yaml +97 -0
  14. data/data/csets/unicode/cyrillic-supplement.yaml +17 -0
  15. data/data/csets/unicode/cyrillic.yaml +256 -0
  16. data/data/csets/unicode/greek.yaml +129 -0
  17. data/data/csets/unicode/ipa_extensions.yaml +97 -0
  18. data/data/csets/unicode/latin-extended-c.yaml +18 -0
  19. data/data/csets/unicode/latin-extended-d.yaml +3 -0
  20. data/data/csets/unicode/latin_1_supplement.yaml +128 -0
  21. data/data/csets/unicode/latin_extended_a.yaml +129 -0
  22. data/data/csets/unicode/latin_extended_additional.yaml +247 -0
  23. data/data/csets/unicode/latin_extended_b.yaml +209 -0
  24. data/data/csets/unicode/letterlike_symbols.yaml +80 -0
  25. data/data/csets/unicode/spacing_modifier_letters.yaml +81 -0
  26. data/data/csets/utf-8.yaml +1504 -0
  27. data/data/csets/utf8.yaml +1504 -0
  28. data/data/test_chars.yaml +14 -0
  29. data/example/cinderella/crop +127 -0
  30. data/example/cinderella/crop_repaired +127 -0
  31. data/example/cinderella/empty6-slash.txt +1495 -0
  32. data/example/cinderella/empty6-slash_repaired.txt +1495 -0
  33. data/example/cinderella/pot +1368 -0
  34. data/example/guess_encoding/check_results +60 -0
  35. data/example/guess_encoding/de.utf-8.txt +10030 -0
  36. data/example/guess_encoding/en.utf-8.txt +10030 -0
  37. data/example/guess_encoding/fr.utf-8.txt +10030 -0
  38. data/example/guess_encoding/it.utf-8.txt +10030 -0
  39. data/lib/cmess/bconv.rb +169 -0
  40. data/lib/cmess/cinderella.rb +66 -0
  41. data/lib/cmess/cli.rb +120 -0
  42. data/lib/cmess/decode_entities.rb +69 -0
  43. data/lib/cmess/guess_encoding/automatic.rb +343 -0
  44. data/lib/cmess/guess_encoding/encoding.rb +78 -0
  45. data/lib/cmess/guess_encoding/manual.rb +108 -0
  46. data/lib/cmess/guess_encoding.rb +61 -0
  47. data/lib/cmess/version.rb +51 -0
  48. data/lib/cmess.rb +49 -0
  49. metadata +136 -0
@@ -0,0 +1,81 @@
1
+ ---
2
+ - "ʰ" # MODIFIER LETTER SMALL H
3
+ - "ʱ" # MODIFIER LETTER SMALL H WITH HOOK
4
+ - "ʲ" # MODIFIER LETTER SMALL J
5
+ - "ʳ" # MODIFIER LETTER SMALL R
6
+ - "ʴ" # MODIFIER LETTER SMALL TURNED R
7
+ - "ʵ" # MODIFIER LETTER SMALL TURNED R WITH HOOK
8
+ - "ʶ" # MODIFIER LETTER SMALL CAPITAL INVERTED R
9
+ - "ʷ" # MODIFIER LETTER SMALL W
10
+ - "ʸ" # MODIFIER LETTER SMALL Y
11
+ - "ʹ" # MODIFIER LETTER PRIME
12
+ - "ʺ" # MODIFIER LETTER DOUBLE PRIME
13
+ - "ʻ" # MODIFIER LETTER TURNED COMMA
14
+ - "ʼ" # MODIFIER LETTER APOSTROPHE
15
+ - "ʽ" # MODIFIER LETTER REVERSED COMMA
16
+ - "ʾ" # MODIFIER LETTER RIGHT HALF RING
17
+ - "ʿ" # MODIFIER LETTER LEFT HALF RING
18
+ - "ˀ" # MODIFIER LETTER GLOTTAL STOP
19
+ - "ˁ" # MODIFIER LETTER REVERSED GLOTTAL STOP
20
+ - "˂" # MODIFIER LETTER LEFT ARROWHEAD
21
+ - "˃" # MODIFIER LETTER RIGHT ARROWHEAD
22
+ - "˄" # MODIFIER LETTER UP ARROWHEAD
23
+ - "˅" # MODIFIER LETTER DOWN ARROWHEAD
24
+ - "ˆ" # MODIFIER LETTER CIRCUMFLEX ACCENT (present in WGL4 and MacRoman)
25
+ - "ˇ" # CARON (present in WGL4 and MacRoman)
26
+ - "ˈ" # MODIFIER LETTER VERTICAL LINE
27
+ - "ˉ" # MODIFIER LETTER MACRON (present in WGL4)
28
+ - "ˊ" # MODIFIER LETTER ACUTE ACCENT
29
+ - "ˋ" # MODIFIER LETTER GRAVE ACCENT
30
+ - "ˌ" # MODIFIER LETTER LOW VERTICAL LINE
31
+ - "ˍ" # MODIFIER LETTER LOW MACRON
32
+ - "ˎ" # MODIFIER LETTER LOW GRAVE ACCENT
33
+ - "ˏ" # MODIFIER LETTER LOW ACUTE ACCENT
34
+ - "ː" # MODIFIER LETTER TRIANGULAR COLON
35
+ - "ˑ" # MODIFIER LETTER HALF TRIANGULAR COLON
36
+ - "˒" # MODIFIER LETTER CENTRED RIGHT HALF RING
37
+ - "˓" # MODIFIER LETTER CENTRED LEFT HALF RING
38
+ - "˔" # MODIFIER LETTER UP TACK
39
+ - "˕" # MODIFIER LETTER DOWN TACK
40
+ - "˖" # MODIFIER LETTER PLUS SIGN
41
+ - "˗" # MODIFIER LETTER MINUS SIGN
42
+ - "˘" # BREVE (present in WGL4 and MacRoman)
43
+ - "˙" # DOT ABOVE (present in WGL4 and MacRoman)
44
+ - "˚" # RING ABOVE (present in WGL4 and MacRoman)
45
+ - "˛" # OGONEK (present in WGL4 and MacRoman)
46
+ - "˜" # SMALL TILDE (present in WGL4 and MacRoman)
47
+ - "˝" # DOUBLE ACUTE ACCENT (present in WGL4 and MacRoman)
48
+ - "˞" # MODIFIER LETTER RHOTIC HOOK
49
+ - "˟" # MODIFIER LETTER CROSS ACCENT
50
+ - "ˠ" # MODIFIER LETTER SMALL GAMMA
51
+ - "ˡ" # MODIFIER LETTER SMALL L
52
+ - "ˢ" # MODIFIER LETTER SMALL S
53
+ - "ˣ" # MODIFIER LETTER SMALL X
54
+ - "ˤ" # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
55
+ - "˥" # MODIFIER LETTER EXTRA-HIGH TONE BAR
56
+ - "˦" # MODIFIER LETTER HIGH TONE BAR
57
+ - "˧" # MODIFIER LETTER MID TONE BAR
58
+ - "˨" # MODIFIER LETTER LOW TONE BAR
59
+ - "˩" # MODIFIER LETTER EXTRA-LOW TONE BAR
60
+ - "˪" # MODIFIER LETTER YIN DEPARTING TONE MARK
61
+ - "˫" # MODIFIER LETTER YANG DEPARTING TONE MARK
62
+ - "ˬ" # MODIFIER LETTER VOICING
63
+ - "˭" # MODIFIER LETTER UNASPIRATED
64
+ - "ˮ" # MODIFIER LETTER DOUBLE APOSTROPHE
65
+ - "˯" # MODIFIER LETTER LOW DOWN ARROWHEAD
66
+ - "˰" # MODIFIER LETTER LOW UP ARROWHEAD
67
+ - "˱" # MODIFIER LETTER LOW LEFT ARROWHEAD
68
+ - "˲" # MODIFIER LETTER LOW RIGHT ARROWHEAD
69
+ - "˳" # MODIFIER LETTER LOW RING
70
+ - "˴" # MODIFIER LETTER MIDDLE GRAVE ACCENT
71
+ - "˵" # MODIFIER LETTER MIDDLE DOUBLE GRAVE ACCENT
72
+ - "˶" # MODIFIER LETTER MIDDLE DOUBLE ACUTE ACCENT
73
+ - "˷" # MODIFIER LETTER LOW TILDE
74
+ - "˸" # MODIFIER LETTER RAISED COLON
75
+ - "˹" # MODIFIER LETTER BEGIN HIGH TONE
76
+ - "˺" # MODIFIER LETTER END HIGH TONE
77
+ - "˻" # MODIFIER LETTER BEGIN LOW TONE
78
+ - "˼" # MODIFIER LETTER END LOW TONE
79
+ - "˽" # MODIFIER LETTER SHELF
80
+ - "˾" # MODIFIER LETTER OPEN SHELF
81
+ - "˿" # MODIFIER LETTER LOW LEFT ARROW