asciify 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/asciify.rb +107 -0
- data/lib/mappings/default.yaml +27 -0
- metadata +41 -0
data/lib/asciify.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'iconv'
|
4
|
+
require 'enumerator'
|
5
|
+
require 'yaml'
|
6
|
+
|
7
|
+
|
8
|
+
class Asciify
|
9
|
+
Intermediate = "UCS-4"
|
10
|
+
PackFormat = "N*"
|
11
|
+
|
12
|
+
class Mapping
|
13
|
+
|
14
|
+
# converts an UTF-8 string to an array of unicode codepoints
|
15
|
+
#
|
16
|
+
def from_utf8(str)
|
17
|
+
Iconv.new(Intermediate,"UTF-8").iconv(str).unpack(PackFormat)
|
18
|
+
end
|
19
|
+
|
20
|
+
# define a mapping from Unicode codepoints to ASCII chars
|
21
|
+
# +language+ can be a path to a yaml file which contains the
|
22
|
+
# mappings as a Hash
|
23
|
+
#
|
24
|
+
# If +language+ is a symbol, it refers to a builtin mapping
|
25
|
+
#
|
26
|
+
def initialize(language = :default, replacement = "?")
|
27
|
+
|
28
|
+
if Symbol === language
|
29
|
+
path = "#{File.dirname(__FILE__)}/mappings/#{language}.yaml"
|
30
|
+
else
|
31
|
+
path = language
|
32
|
+
end
|
33
|
+
|
34
|
+
h = YAML.load_file(path)
|
35
|
+
i = Iconv.new("UCS-4","UTF-8")
|
36
|
+
|
37
|
+
# use the default replacement if the hash
|
38
|
+
@map = Hash.new( i.iconv(replacement).unpack(PackFormat) )
|
39
|
+
|
40
|
+
# the mappings file is UTF-8, recode to UCS-4
|
41
|
+
h.each { |k,v|
|
42
|
+
@map[*i.iconv(k).unpack(PackFormat)] = i.iconv(v).unpack(PackFormat)
|
43
|
+
}
|
44
|
+
|
45
|
+
@map
|
46
|
+
end
|
47
|
+
|
48
|
+
def [](codepoint)
|
49
|
+
@map[codepoint]
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
class HTMLEntities < Mapping
|
55
|
+
|
56
|
+
# mapping from Unicode codepoints to numeric HTML entities
|
57
|
+
#
|
58
|
+
# Asciify.new(Asciify::HTMLEntities).convert("\303\244") #=> "ä"
|
59
|
+
#
|
60
|
+
def [](codepoint)
|
61
|
+
from_utf8 "&##{codepoint};"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def initialize(replacement = "?", target = "ASCII", source = "UTF-8")
|
66
|
+
@from_input_enc = Iconv.new(Intermediate, source)
|
67
|
+
@to_output_enc = Iconv.new(target, Intermediate)
|
68
|
+
|
69
|
+
if String === replacement
|
70
|
+
r = @from_input_enc.iconv(replacement).unpack(PackFormat)
|
71
|
+
@mapping = Hash.new(r)
|
72
|
+
else
|
73
|
+
@mapping = replacement
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def convert(str)
|
78
|
+
u16s = @from_input_enc.iconv(str)
|
79
|
+
|
80
|
+
s = u16s.unpack(PackFormat).collect { |codepoint|
|
81
|
+
codepoint < 128 ? codepoint : @mapping[codepoint]
|
82
|
+
}.flatten.compact.pack(PackFormat)
|
83
|
+
|
84
|
+
return @to_output_enc.iconv(s)
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
class String
|
90
|
+
|
91
|
+
# removes all characters which are not part of ascii
|
92
|
+
# and replaces them with +replacement+
|
93
|
+
#
|
94
|
+
# +replacement+ is supposed to be the same encoding as +source+
|
95
|
+
#
|
96
|
+
def asciify(*args)
|
97
|
+
Asciify.new(*args).convert(self)
|
98
|
+
end
|
99
|
+
|
100
|
+
def ascii?
|
101
|
+
self.to_enum(:each_byte).all? { |b| b < 128 }
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
if __FILE__ == $0
|
106
|
+
end
|
107
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
---
|
2
|
+
"“": '"'
|
3
|
+
"”": '"'
|
4
|
+
"‘": "'"
|
5
|
+
"’": "'"
|
6
|
+
"„": '"'
|
7
|
+
"〝": '"'
|
8
|
+
"〞": '"'
|
9
|
+
"»": ">>"
|
10
|
+
"«": "<<"
|
11
|
+
"ä": "ae"
|
12
|
+
"ö": "oe"
|
13
|
+
"ü": "ue"
|
14
|
+
"Ä": "Ae"
|
15
|
+
"Ö": "Oe"
|
16
|
+
"Ü": "Ue"
|
17
|
+
"ß": "ss"
|
18
|
+
"æ": "ae"
|
19
|
+
"Æ": "AE"
|
20
|
+
"œ": "oe"
|
21
|
+
"Œ": "OE"
|
22
|
+
"€": "EUR"
|
23
|
+
"½": "1/2"
|
24
|
+
"¼": "1/4"
|
25
|
+
"¾": "3/4"
|
26
|
+
"©": "(c)"
|
27
|
+
"®": "(r)"
|
metadata
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.11
|
3
|
+
specification_version: 1
|
4
|
+
name: asciify
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2006-01-24 00:00:00 +01:00
|
8
|
+
summary: Tool to strip non-ASCII characters from a string and replace them with something else
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: levin@grundeis.net
|
12
|
+
homepage: http://levinalex.de/ruby/asciify
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
-
|
22
|
+
- ">"
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.0.0
|
25
|
+
version:
|
26
|
+
platform: ruby
|
27
|
+
signing_key:
|
28
|
+
cert_chain:
|
29
|
+
authors:
|
30
|
+
- Levin Alexander
|
31
|
+
files:
|
32
|
+
- lib/asciify.rb
|
33
|
+
- lib/mappings
|
34
|
+
- lib/mappings/default.yaml
|
35
|
+
test_files: []
|
36
|
+
rdoc_options: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
executables: []
|
39
|
+
extensions: []
|
40
|
+
requirements: []
|
41
|
+
dependencies: []
|