asciify 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/asciify.rb +107 -0
  2. data/lib/mappings/default.yaml +27 -0
  3. metadata +41 -0
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'iconv'
4
+ require 'enumerator'
5
+ require 'yaml'
6
+
7
+
8
+ class Asciify
9
+ Intermediate = "UCS-4"
10
+ PackFormat = "N*"
11
+
12
+ class Mapping
13
+
14
+ # converts an UTF-8 string to an array of unicode codepoints
15
+ #
16
+ def from_utf8(str)
17
+ Iconv.new(Intermediate,"UTF-8").iconv(str).unpack(PackFormat)
18
+ end
19
+
20
+ # define a mapping from Unicode codepoints to ASCII chars
21
+ # +language+ can be a path to a yaml file which contains the
22
+ # mappings as a Hash
23
+ #
24
+ # If +language+ is a symbol, it refers to a builtin mapping
25
+ #
26
+ def initialize(language = :default, replacement = "?")
27
+
28
+ if Symbol === language
29
+ path = "#{File.dirname(__FILE__)}/mappings/#{language}.yaml"
30
+ else
31
+ path = language
32
+ end
33
+
34
+ h = YAML.load_file(path)
35
+ i = Iconv.new("UCS-4","UTF-8")
36
+
37
+ # use the default replacement if the hash
38
+ @map = Hash.new( i.iconv(replacement).unpack(PackFormat) )
39
+
40
+ # the mappings file is UTF-8, recode to UCS-4
41
+ h.each { |k,v|
42
+ @map[*i.iconv(k).unpack(PackFormat)] = i.iconv(v).unpack(PackFormat)
43
+ }
44
+
45
+ @map
46
+ end
47
+
48
+ def [](codepoint)
49
+ @map[codepoint]
50
+ end
51
+
52
+ end
53
+
54
+ class HTMLEntities < Mapping
55
+
56
+ # mapping from Unicode codepoints to numeric HTML entities
57
+ #
58
+ # Asciify.new(Asciify::HTMLEntities).convert("\303\244") #=> "&#228;"
59
+ #
60
+ def [](codepoint)
61
+ from_utf8 "&##{codepoint};"
62
+ end
63
+ end
64
+
65
+ def initialize(replacement = "?", target = "ASCII", source = "UTF-8")
66
+ @from_input_enc = Iconv.new(Intermediate, source)
67
+ @to_output_enc = Iconv.new(target, Intermediate)
68
+
69
+ if String === replacement
70
+ r = @from_input_enc.iconv(replacement).unpack(PackFormat)
71
+ @mapping = Hash.new(r)
72
+ else
73
+ @mapping = replacement
74
+ end
75
+ end
76
+
77
+ def convert(str)
78
+ u16s = @from_input_enc.iconv(str)
79
+
80
+ s = u16s.unpack(PackFormat).collect { |codepoint|
81
+ codepoint < 128 ? codepoint : @mapping[codepoint]
82
+ }.flatten.compact.pack(PackFormat)
83
+
84
+ return @to_output_enc.iconv(s)
85
+ end
86
+
87
+ end
88
+
89
+ class String
90
+
91
+ # removes all characters which are not part of ascii
92
+ # and replaces them with +replacement+
93
+ #
94
+ # +replacement+ is supposed to be the same encoding as +source+
95
+ #
96
+ def asciify(*args)
97
+ Asciify.new(*args).convert(self)
98
+ end
99
+
100
+ def ascii?
101
+ self.to_enum(:each_byte).all? { |b| b < 128 }
102
+ end
103
+ end
104
+
105
+ if __FILE__ == $0
106
+ end
107
+
@@ -0,0 +1,27 @@
1
+ ---
2
+ "“": '"'
3
+ "”": '"'
4
+ "‘": "'"
5
+ "’": "'"
6
+ "„": '"'
7
+ "〝": '"'
8
+ "〞": '"'
9
+ "»": ">>"
10
+ "«": "<<"
11
+ "ä": "ae"
12
+ "ö": "oe"
13
+ "ü": "ue"
14
+ "Ä": "Ae"
15
+ "Ö": "Oe"
16
+ "Ü": "Ue"
17
+ "ß": "ss"
18
+ "æ": "ae"
19
+ "Æ": "AE"
20
+ "œ": "oe"
21
+ "Œ": "OE"
22
+ "€": "EUR"
23
+ "½": "1/2"
24
+ "¼": "1/4"
25
+ "¾": "3/4"
26
+ "©": "(c)"
27
+ "®": "(r)"
metadata ADDED
@@ -0,0 +1,41 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: asciify
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2006-01-24 00:00:00 +01:00
8
+ summary: Tool to strip non-ASCII characters from a string and replace them with something else
9
+ require_paths:
10
+ - lib
11
+ email: levin@grundeis.net
12
+ homepage: http://levinalex.de/ruby/asciify
13
+ rubyforge_project:
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ -
22
+ - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: ruby
27
+ signing_key:
28
+ cert_chain:
29
+ authors:
30
+ - Levin Alexander
31
+ files:
32
+ - lib/asciify.rb
33
+ - lib/mappings
34
+ - lib/mappings/default.yaml
35
+ test_files: []
36
+ rdoc_options: []
37
+ extra_rdoc_files: []
38
+ executables: []
39
+ extensions: []
40
+ requirements: []
41
+ dependencies: []