HtmlCodeCleaner 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 18c6d91ada1fc1458278180b195e742e2d743d7a
4
+ data.tar.gz: 72280347646b01fba97d98db660d4542e117cad2
5
+ SHA512:
6
+ metadata.gz: 99c8d99c21b7e94bc6de5ca30672887fcbfd8a24cdd199dadac33f4344fe341974e424d479d65f741e8c21d5f917a50c2a02bc002ada31fc93831ea75bb60a31
7
+ data.tar.gz: e615a840ca997cc620981a37843ce58b9b367f08db08b053ee07fb42373fb106e222d00964e3c1b6f0e39469e7fe6b73fff43c51d3ecacaf0b9c37cb84c04402
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc "Run tests"
8
+ task :default => :test
@@ -0,0 +1,209 @@
1
+
2
+ class HtmlCodeCleaner
3
+
4
+ # removes character codes from string
5
+ # must be defined in the dictionary
6
+ def self.clean_string(string)
7
+ if (string != nil)
8
+ dict = [ ['&#33;',"!"],
9
+ ['&#34;','"'],
10
+ ['&#35;','#'],
11
+ ['&#36;',"$"],
12
+ ['&#37;',"%"],
13
+ ['&#38;',"&"],
14
+ ['&#39;',"'"],
15
+ ['&#40;',"("],
16
+ ['&#41;',")"],
17
+ ['&#42;',"*"],
18
+ ['&#43;',"+"],
19
+ ['&#44;',","],
20
+ ['&#45;',"-"],
21
+ ['&#46;',"0"],
22
+ ['&#47;',"/"],
23
+ ['&#48;',"0"],
24
+ ['&#49;',"1"],
25
+ ['&#50;',"2"],
26
+ ['&#51;',"3"],
27
+ ['&#52;',"4"],
28
+ ['&#53;',"5"],
29
+ ['&#54;',"6"],
30
+ ['&#55;',"7"],
31
+ ['&#56;',"8"],
32
+ ['&#57;',"9"],
33
+ ['&#58;',":"],
34
+ ['&#59;',";"],
35
+ ['&#60;',"<"],
36
+ ['&#61;',"="],
37
+ ['&#62;',">"],
38
+ ['&#63;',"?"],
39
+ ['&#64;',"@"],
40
+ ['&#65;',"A"], ['&#97;',"a"],
41
+ ['&#66;',"B"], ['&#98;',"b"],
42
+ ['&#67;',"C"], ['&#99;',"c"],
43
+ ['&#68;',"D"], ['&#100;',"d"],
44
+ ['&#69;',"E"], ['&#101;',"e"],
45
+ ['&#70;',"F"], ['&#102;',"f"],
46
+ ['&#71;',"G"], ['&#103;',"g"],
47
+ ['&#72;',"H"], ['&#104;',"h"],
48
+ ['&#73;',"I"], ['&#105;',"i"],
49
+ ['&#74;',"J"], ['&#106;',"j"],
50
+ ['&#75;',"K"], ['&#107;',"k"],
51
+ ['&#76;',"L"], ['&#108;',"l"],
52
+ ['&#77;',"M"], ['&#109;',"m"],
53
+ ['&#78;',"N"], ['&#110;',"n"],
54
+ ['&#79;',"O"], ['&#111;',"o"],
55
+ ['&#80;',"P"], ['&#112;',"p"],
56
+ ['&#81;',"Q"], ['&#113;',"q"],
57
+ ['&#82;',"R"], ['&#114;',"r"],
58
+ ['&#83;',"S"], ['&#115;',"s"],
59
+ ['&#84;',"T"], ['&#116;',"t"],
60
+ ['&#85;',"U"], ['&#117;',"u"],
61
+ ['&#86;',"V"], ['&#118;',"v"],
62
+ ['&#87;',"W"], ['&#119;',"w"],
63
+ ['&#88;',"X"], ['&#120;',"x"],
64
+ ['&#89;',"Y"], ['&#121;',"y"],
65
+ ['&#90;',"Z"], ['&#122;',"z"],
66
+ ['&#91;',"["],
67
+ ['&#92;','\ '],
68
+ ['&#93;',"]"],
69
+ ['&#94;',"^"],
70
+ ['&#95;',"_"],
71
+ ['&#96;',"`"],
72
+ ['&#123;',"{"],
73
+ ['&#124;',"|"],
74
+ ['&#125;',"}"],
75
+ ['&#126;',"~"],
76
+ ['&#128;',"€"],
77
+ ['&#129;'," "],
78
+ ['&#130;',"‚"],
79
+ ['&#131;',"ƒ"],
80
+ ['&#132;',"„"],
81
+ ['&#133;',"…"],
82
+ ['&#134;',"†"],
83
+ ['&#135;',"‡"],
84
+ ['&#136;',"ˆ"],
85
+ ['&#137;',"‰"],
86
+ ['&#138;',"Š"],
87
+ ['&#139;',"‹"],
88
+ ['&#140;',"Œ"],
89
+ ['&#141;'," "],
90
+ ['&#142;',"Ž"],
91
+ ['&#143;'," "],
92
+ ['&#144;'," "],
93
+ ['&#145;',"‘"],
94
+ ['&#146;',"’"],
95
+ ['&#147;','"'],
96
+ ['&#148;',"”"],
97
+ ['&#149;',"•"],
98
+ ['&#150;',"–"],
99
+ ['&#151;',"—"],
100
+ ['&#152;',"˜"],
101
+ ['&#153;',"™"],
102
+ ['&#154;',"š"],
103
+ ['&#155;',"›"],
104
+ ['&#156;',"œ"],
105
+ ['&#157;'," "],
106
+ ['&#158;',"ž"],
107
+ ['&#159;',"Ÿ"],
108
+ ['&#160;'," "],
109
+ ['&#161;',"¡"],
110
+ ['&#162;',"¢"],
111
+ ['&#163;',"£"],
112
+ ['&#164;',"¤"],
113
+ ['&#165;',"¥"],
114
+ ['&#166;',"¦"],
115
+ ['&#167;',"§"],
116
+ ['&#168;',"¨"],
117
+ ['&#169;',"©"],
118
+ ['&#170;',"ª"],
119
+ ['&#171;',"«"],
120
+ ['&#172;',"¬"],
121
+ ['&#173;',""],
122
+ ['&#174;',"®"],
123
+ ['&#175;',"¯"],
124
+ ['&#176;',"°"],
125
+ ['&#177;',"±"],
126
+ ['&#178;',"²"],
127
+ ['&#179;',"³"],
128
+ ['&#180;',"´"],
129
+ ['&#181;',"µ"],
130
+ ['&#182;',"¶"],
131
+ ['&#183;',"·"],
132
+ ['&#184;',"¸"],
133
+ ['&#185;',"¹"],
134
+ ['&#186;',"º"],
135
+ ['&#187;',"»"],
136
+ ['&#188;',"¼"],
137
+ ['&#189;',"½"],
138
+ ['&#190;',"¾"],
139
+ ['&#191;',"¿"],
140
+ ['&#192;',"À"],
141
+ ['&#193;',"Á"],
142
+ ['&#194;',"Â"],
143
+ ['&#195;',"Ã"],
144
+ ['&#196;',"Ä"],
145
+ ['&#197;',"Å"],
146
+ ['&#198;',"Æ"],
147
+ ['&#199;',"Ç"],
148
+ ['&#200;',"È"],
149
+ ['&#201;',"É"],
150
+ ['&#202;',"Ê"],
151
+ ['&#203;',"Ë"],
152
+ ['&#204;',"Ì"],
153
+ ['&#205;',"Í"],
154
+ ['&#206;',"Î"],
155
+ ['&#207;',"Ï"],
156
+ ['&#208;',"Ð"],
157
+ ['&#209;',"Ñ"],
158
+ ['&#210;',"Ò"],
159
+ ['&#211;',"Ó"],
160
+ ['&#212;',"Ô"],
161
+ ['&#213;',"Õ"],
162
+ ['&#214;',"Ö"],
163
+ ['&#215;',"×"],
164
+ ['&#216;',"Ø"],
165
+ ['&#217;',"Ù"],
166
+ ['&#218;',"Ú"],
167
+ ['&#219;',"Û"],
168
+ ['&#220;',"Ü"],
169
+ ['&#221;',"Ý"],
170
+ ['&#222;',"Þ"],
171
+ ['&#223;',"ß"],
172
+ ['&#224;',"à"],
173
+ ['&#225;',"á"],
174
+ ['&#226;',"â"],
175
+ ['&#227;',"ã"],
176
+ ['&#228;',"ä"],
177
+ ['&#229;',"å"],
178
+ ['&#230;',"æ"],
179
+ ['&#231;',"ç"],
180
+ ['&#232;',"è"],
181
+ ['&#233;',"é"],
182
+ ['&#234;',"ê"],
183
+ ['&#235;',"ë"],
184
+ ['&#236;',"ì"],
185
+ ['&#237;',"í"],
186
+ ['&#238;',"î"],
187
+ ['&#239;',"ï"],
188
+ ['&#240;',"ð"],
189
+ ['&#241;',"ñ"],
190
+ ['&#242;',"ò"],
191
+ ['&#243;',"ó"],
192
+ ['&#244;',"ô"],
193
+ ['&#245;',"õ"],
194
+ ['&#246;',"ö"],
195
+ ['&#247;',"÷"],
196
+ ['&#248;',"ø"],
197
+ ['&#249;',"ù"],
198
+ ['&#250;',"ú"],
199
+ ['&#251;',"û"],
200
+ ['&#252;',"ü"],
201
+ ['&#253;',"ý"],
202
+ ['&#254;',"þ"],
203
+ ['&#255;',"ÿ"] ]
204
+ dict.each { |x| string = string.gsub(x.first, x.last) }
205
+ string
206
+ end
207
+ end
208
+
209
+ end
data/test/test_hcc.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'test/unit'
2
+ require 'HtmlCodeCleaner'
3
+ require 'minitest/autorun'
4
+
5
+ class HtmlCodeCleanerTest < Test::Unit::TestCase
6
+ def test_1
7
+ assert_equal "X", HtmlCodeCleaner.clean_string("&#88;")
8
+ end
9
+
10
+ def test_2
11
+ assert_equal "Hi", HtmlCodeCleaner.clean_string("Hi")
12
+ end
13
+
14
+ def test_3
15
+ assert_equal "#Hi", HtmlCodeCleaner.clean_string("&#35;Hi")
16
+ end
17
+
18
+ def test_4
19
+ assert_equal "#1", HtmlCodeCleaner.clean_string("&#35;&#49;")
20
+ end
21
+
22
+ def test_5
23
+ assert_equal "# 1", HtmlCodeCleaner.clean_string("&#35; &#49;")
24
+ end
25
+
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: HtmlCodeCleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Michael Hiland
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-12 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Parse a string of html code, replace code with ascii symbol
14
+ email: g2c9@ugrad.cs.ubc.ca
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - Rakefile
20
+ - lib/HtmlCodeCleaner.rb
21
+ - test/test_hcc.rb
22
+ homepage: http://rubygems.org/gems/HtmlCodeCleaner
23
+ licenses:
24
+ - GPL-3.0
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.2.2
43
+ signing_key:
44
+ specification_version: 3
45
+ summary: Replace Html code with symbol
46
+ test_files:
47
+ - test/test_hcc.rb