blackwinter-cmess 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/COPYING +676 -0
  2. data/ChangeLog +54 -0
  3. data/README +63 -0
  4. data/Rakefile +51 -0
  5. data/bin/bconv +130 -0
  6. data/bin/cinderella +190 -0
  7. data/bin/decode_entities +106 -0
  8. data/bin/guess_encoding +223 -0
  9. data/data/chartab.yaml +26724 -0
  10. data/data/csets/iso_8859-1.yaml +195 -0
  11. data/data/csets/iso_8859-15.yaml +204 -0
  12. data/data/csets/latin1.yaml +195 -0
  13. data/data/csets/unicode/basic_latin.yaml +97 -0
  14. data/data/csets/unicode/cyrillic-supplement.yaml +17 -0
  15. data/data/csets/unicode/cyrillic.yaml +256 -0
  16. data/data/csets/unicode/greek.yaml +129 -0
  17. data/data/csets/unicode/ipa_extensions.yaml +97 -0
  18. data/data/csets/unicode/latin-extended-c.yaml +18 -0
  19. data/data/csets/unicode/latin-extended-d.yaml +3 -0
  20. data/data/csets/unicode/latin_1_supplement.yaml +128 -0
  21. data/data/csets/unicode/latin_extended_a.yaml +129 -0
  22. data/data/csets/unicode/latin_extended_additional.yaml +247 -0
  23. data/data/csets/unicode/latin_extended_b.yaml +209 -0
  24. data/data/csets/unicode/letterlike_symbols.yaml +80 -0
  25. data/data/csets/unicode/spacing_modifier_letters.yaml +81 -0
  26. data/data/csets/utf-8.yaml +1504 -0
  27. data/data/csets/utf8.yaml +1504 -0
  28. data/data/test_chars.yaml +14 -0
  29. data/example/cinderella/crop +127 -0
  30. data/example/cinderella/crop_repaired +127 -0
  31. data/example/cinderella/empty6-slash.txt +1495 -0
  32. data/example/cinderella/empty6-slash_repaired.txt +1495 -0
  33. data/example/cinderella/pot +1368 -0
  34. data/example/guess_encoding/check_results +60 -0
  35. data/example/guess_encoding/de.utf-8.txt +10030 -0
  36. data/example/guess_encoding/en.utf-8.txt +10030 -0
  37. data/example/guess_encoding/fr.utf-8.txt +10030 -0
  38. data/example/guess_encoding/it.utf-8.txt +10030 -0
  39. data/lib/cmess/bconv.rb +169 -0
  40. data/lib/cmess/cinderella.rb +66 -0
  41. data/lib/cmess/cli.rb +120 -0
  42. data/lib/cmess/decode_entities.rb +69 -0
  43. data/lib/cmess/guess_encoding/automatic.rb +343 -0
  44. data/lib/cmess/guess_encoding/encoding.rb +78 -0
  45. data/lib/cmess/guess_encoding/manual.rb +108 -0
  46. data/lib/cmess/guess_encoding.rb +61 -0
  47. data/lib/cmess/version.rb +51 -0
  48. data/lib/cmess.rb +49 -0
  49. metadata +136 -0
@@ -0,0 +1,60 @@
1
+ - de/cp1250 => ISO-8859-1
2
+ ? de/cp1251 => UNKNOWN
3
+ - de/cp1252 => ISO-8859-1
4
+ + de/cp850 => CP850
5
+ - de/cp852 => CP850
6
+ ? de/cp856 => UNKNOWN
7
+ - de/iso-8859-1 => CP1252
8
+ - de/iso-8859-15 => CP1252
9
+ - de/iso-8859-16 => CP1252
10
+ - de/iso-8859-2 => CP1252
11
+ - de/iso-8859-3 => CP1252
12
+ - de/iso-8859-4 => CP1252
13
+ - de/iso-8859-9 => CP1252
14
+ + de/macintosh => MACINTOSH
15
+ + de/utf-8 => UTF-8
16
+ # en/cp1250 => ASCII
17
+ # en/cp1251 => ASCII
18
+ # en/cp1252 => ASCII
19
+ # en/cp850 => ASCII
20
+ # en/cp852 => ASCII
21
+ # en/cp856 => ASCII
22
+ # en/iso-8859-1 => ASCII
23
+ # en/iso-8859-15 => ASCII
24
+ # en/iso-8859-16 => ASCII
25
+ # en/iso-8859-2 => ASCII
26
+ # en/iso-8859-3 => ASCII
27
+ # en/iso-8859-4 => ASCII
28
+ # en/iso-8859-9 => ASCII
29
+ # en/macintosh => ASCII
30
+ # en/utf-8 => ASCII
31
+ - fr/cp1250 => ISO-8859-1
32
+ ? fr/cp1251 => UNKNOWN
33
+ - fr/cp1252 => ISO-8859-1
34
+ + fr/cp850 => CP850
35
+ - fr/cp852 => CP850
36
+ ? fr/cp856 => UNKNOWN
37
+ + fr/iso-8859-1 => ISO-8859-1
38
+ - fr/iso-8859-15 => ISO-8859-1
39
+ - fr/iso-8859-16 => ISO-8859-1
40
+ - fr/iso-8859-2 => ISO-8859-1
41
+ - fr/iso-8859-3 => ISO-8859-1
42
+ - fr/iso-8859-4 => ISO-8859-1
43
+ - fr/iso-8859-9 => ISO-8859-1
44
+ + fr/macintosh => MACINTOSH
45
+ + fr/utf-8 => UTF-8
46
+ ? it/cp1250 => UNKNOWN
47
+ ? it/cp1251 => UNKNOWN
48
+ - it/cp1252 => ISO-8859-1
49
+ - it/cp850 => MACINTOSH
50
+ ? it/cp852 => UNKNOWN
51
+ ? it/cp856 => UNKNOWN
52
+ + it/iso-8859-1 => ISO-8859-1
53
+ - it/iso-8859-15 => ISO-8859-1
54
+ - it/iso-8859-16 => ISO-8859-1
55
+ ? it/iso-8859-2 => UNKNOWN
56
+ - it/iso-8859-3 => ISO-8859-1
57
+ ? it/iso-8859-4 => UNKNOWN
58
+ - it/iso-8859-9 => ISO-8859-1
59
+ + it/macintosh => MACINTOSH
60
+ + it/utf-8 => UTF-8