asciify 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/asciify.rb +107 -0
  2. data/lib/mappings/default.yaml +27 -0
  3. metadata +41 -0
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'iconv'
4
+ require 'enumerator'
5
+ require 'yaml'
6
+
7
+
8
+ class Asciify
9
+ Intermediate = "UCS-4"
10
+ PackFormat = "N*"
11
+
12
+ class Mapping
13
+
14
+ # converts an UTF-8 string to an array of unicode codepoints
15
+ #
16
+ def from_utf8(str)
17
+ Iconv.new(Intermediate,"UTF-8").iconv(str).unpack(PackFormat)
18
+ end
19
+
20
+ # define a mapping from Unicode codepoints to ASCII chars
21
+ # +language+ can be a path to a yaml file which contains the
22
+ # mappings as a Hash
23
+ #
24
+ # If +language+ is a symbol, it refers to a builtin mapping
25
+ #
26
+ def initialize(language = :default, replacement = "?")
27
+
28
+ if Symbol === language
29
+ path = "#{File.dirname(__FILE__)}/mappings/#{language}.yaml"
30
+ else
31
+ path = language
32
+ end
33
+
34
+ h = YAML.load_file(path)
35
+ i = Iconv.new("UCS-4","UTF-8")
36
+
37
+ # use the default replacement if the hash
38
+ @map = Hash.new( i.iconv(replacement).unpack(PackFormat) )
39
+
40
+ # the mappings file is UTF-8, recode to UCS-4
41
+ h.each { |k,v|
42
+ @map[*i.iconv(k).unpack(PackFormat)] = i.iconv(v).unpack(PackFormat)
43
+ }
44
+
45
+ @map
46
+ end
47
+
48
+ def [](codepoint)
49
+ @map[codepoint]
50
+ end
51
+
52
+ end
53
+
54
+ class HTMLEntities < Mapping
55
+
56
+ # mapping from Unicode codepoints to numeric HTML entities
57
+ #
58
+ # Asciify.new(Asciify::HTMLEntities).convert("\303\244") #=> "&#228;"
59
+ #
60
+ def [](codepoint)
61
+ from_utf8 "&##{codepoint};"
62
+ end
63
+ end
64
+
65
+ def initialize(replacement = "?", target = "ASCII", source = "UTF-8")
66
+ @from_input_enc = Iconv.new(Intermediate, source)
67
+ @to_output_enc = Iconv.new(target, Intermediate)
68
+
69
+ if String === replacement
70
+ r = @from_input_enc.iconv(replacement).unpack(PackFormat)
71
+ @mapping = Hash.new(r)
72
+ else
73
+ @mapping = replacement
74
+ end
75
+ end
76
+
77
+ def convert(str)
78
+ u16s = @from_input_enc.iconv(str)
79
+
80
+ s = u16s.unpack(PackFormat).collect { |codepoint|
81
+ codepoint < 128 ? codepoint : @mapping[codepoint]
82
+ }.flatten.compact.pack(PackFormat)
83
+
84
+ return @to_output_enc.iconv(s)
85
+ end
86
+
87
+ end
88
+
89
+ class String
90
+
91
+ # removes all characters which are not part of ascii
92
+ # and replaces them with +replacement+
93
+ #
94
+ # +replacement+ is supposed to be the same encoding as +source+
95
+ #
96
+ def asciify(*args)
97
+ Asciify.new(*args).convert(self)
98
+ end
99
+
100
+ def ascii?
101
+ self.to_enum(:each_byte).all? { |b| b < 128 }
102
+ end
103
+ end
104
+
105
+ if __FILE__ == $0
106
+ end
107
+
@@ -0,0 +1,27 @@
1
+ ---
2
+ "“": '"'
3
+ "”": '"'
4
+ "‘": "'"
5
+ "’": "'"
6
+ "„": '"'
7
+ "〝": '"'
8
+ "〞": '"'
9
+ "»": ">>"
10
+ "«": "<<"
11
+ "ä": "ae"
12
+ "ö": "oe"
13
+ "ü": "ue"
14
+ "Ä": "Ae"
15
+ "Ö": "Oe"
16
+ "Ü": "Ue"
17
+ "ß": "ss"
18
+ "æ": "ae"
19
+ "Æ": "AE"
20
+ "œ": "oe"
21
+ "Œ": "OE"
22
+ "€": "EUR"
23
+ "½": "1/2"
24
+ "¼": "1/4"
25
+ "¾": "3/4"
26
+ "©": "(c)"
27
+ "®": "(r)"
metadata ADDED
@@ -0,0 +1,41 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: asciify
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2006-01-24 00:00:00 +01:00
8
+ summary: Tool to strip non-ASCII characters from a string and replace them with something else
9
+ require_paths:
10
+ - lib
11
+ email: levin@grundeis.net
12
+ homepage: http://levinalex.de/ruby/asciify
13
+ rubyforge_project:
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ -
22
+ - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ platform: ruby
27
+ signing_key:
28
+ cert_chain:
29
+ authors:
30
+ - Levin Alexander
31
+ files:
32
+ - lib/asciify.rb
33
+ - lib/mappings
34
+ - lib/mappings/default.yaml
35
+ test_files: []
36
+ rdoc_options: []
37
+ extra_rdoc_files: []
38
+ executables: []
39
+ extensions: []
40
+ requirements: []
41
+ dependencies: []