HtmlCodeCleaner 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 18c6d91ada1fc1458278180b195e742e2d743d7a
4
+ data.tar.gz: 72280347646b01fba97d98db660d4542e117cad2
5
+ SHA512:
6
+ metadata.gz: 99c8d99c21b7e94bc6de5ca30672887fcbfd8a24cdd199dadac33f4344fe341974e424d479d65f741e8c21d5f917a50c2a02bc002ada31fc93831ea75bb60a31
7
+ data.tar.gz: e615a840ca997cc620981a37843ce58b9b367f08db08b053ee07fb42373fb106e222d00964e3c1b6f0e39469e7fe6b73fff43c51d3ecacaf0b9c37cb84c04402
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc "Run tests"
8
+ task :default => :test
@@ -0,0 +1,209 @@
1
+
2
+ class HtmlCodeCleaner
3
+
4
+ # removes character codes from string
5
+ # must be defined in the dictionary
6
+ def self.clean_string(string)
7
+ if (string != nil)
8
+ dict = [ ['&#33;',"!"],
9
+ ['&#34;','"'],
10
+ ['&#35;','#'],
11
+ ['&#36;',"$"],
12
+ ['&#37;',"%"],
13
+ ['&#38;',"&"],
14
+ ['&#39;',"'"],
15
+ ['&#40;',"("],
16
+ ['&#41;',")"],
17
+ ['&#42;',"*"],
18
+ ['&#43;',"+"],
19
+ ['&#44;',","],
20
+ ['&#45;',"-"],
21
+ ['&#46;',"0"],
22
+ ['&#47;',"/"],
23
+ ['&#48;',"0"],
24
+ ['&#49;',"1"],
25
+ ['&#50;',"2"],
26
+ ['&#51;',"3"],
27
+ ['&#52;',"4"],
28
+ ['&#53;',"5"],
29
+ ['&#54;',"6"],
30
+ ['&#55;',"7"],
31
+ ['&#56;',"8"],
32
+ ['&#57;',"9"],
33
+ ['&#58;',":"],
34
+ ['&#59;',";"],
35
+ ['&#60;',"<"],
36
+ ['&#61;',"="],
37
+ ['&#62;',">"],
38
+ ['&#63;',"?"],
39
+ ['&#64;',"@"],
40
+ ['&#65;',"A"], ['&#97;',"a"],
41
+ ['&#66;',"B"], ['&#98;',"b"],
42
+ ['&#67;',"C"], ['&#99;',"c"],
43
+ ['&#68;',"D"], ['&#100;',"d"],
44
+ ['&#69;',"E"], ['&#101;',"e"],
45
+ ['&#70;',"F"], ['&#102;',"f"],
46
+ ['&#71;',"G"], ['&#103;',"g"],
47
+ ['&#72;',"H"], ['&#104;',"h"],
48
+ ['&#73;',"I"], ['&#105;',"i"],
49
+ ['&#74;',"J"], ['&#106;',"j"],
50
+ ['&#75;',"K"], ['&#107;',"k"],
51
+ ['&#76;',"L"], ['&#108;',"l"],
52
+ ['&#77;',"M"], ['&#109;',"m"],
53
+ ['&#78;',"N"], ['&#110;',"n"],
54
+ ['&#79;',"O"], ['&#111;',"o"],
55
+ ['&#80;',"P"], ['&#112;',"p"],
56
+ ['&#81;',"Q"], ['&#113;',"q"],
57
+ ['&#82;',"R"], ['&#114;',"r"],
58
+ ['&#83;',"S"], ['&#115;',"s"],
59
+ ['&#84;',"T"], ['&#116;',"t"],
60
+ ['&#85;',"U"], ['&#117;',"u"],
61
+ ['&#86;',"V"], ['&#118;',"v"],
62
+ ['&#87;',"W"], ['&#119;',"w"],
63
+ ['&#88;',"X"], ['&#120;',"x"],
64
+ ['&#89;',"Y"], ['&#121;',"y"],
65
+ ['&#90;',"Z"], ['&#122;',"z"],
66
+ ['&#91;',"["],
67
+ ['&#92;','\ '],
68
+ ['&#93;',"]"],
69
+ ['&#94;',"^"],
70
+ ['&#95;',"_"],
71
+ ['&#96;',"`"],
72
+ ['&#123;',"{"],
73
+ ['&#124;',"|"],
74
+ ['&#125;',"}"],
75
+ ['&#126;',"~"],
76
+ ['&#128;',"€"],
77
+ ['&#129;'," "],
78
+ ['&#130;',"‚"],
79
+ ['&#131;',"ƒ"],
80
+ ['&#132;',"„"],
81
+ ['&#133;',"…"],
82
+ ['&#134;',"†"],
83
+ ['&#135;',"‡"],
84
+ ['&#136;',"ˆ"],
85
+ ['&#137;',"‰"],
86
+ ['&#138;',"Š"],
87
+ ['&#139;',"‹"],
88
+ ['&#140;',"Œ"],
89
+ ['&#141;'," "],
90
+ ['&#142;',"Ž"],
91
+ ['&#143;'," "],
92
+ ['&#144;'," "],
93
+ ['&#145;',"‘"],
94
+ ['&#146;',"’"],
95
+ ['&#147;','"'],
96
+ ['&#148;',"”"],
97
+ ['&#149;',"•"],
98
+ ['&#150;',"–"],
99
+ ['&#151;',"—"],
100
+ ['&#152;',"˜"],
101
+ ['&#153;',"™"],
102
+ ['&#154;',"š"],
103
+ ['&#155;',"›"],
104
+ ['&#156;',"œ"],
105
+ ['&#157;'," "],
106
+ ['&#158;',"ž"],
107
+ ['&#159;',"Ÿ"],
108
+ ['&#160;'," "],
109
+ ['&#161;',"¡"],
110
+ ['&#162;',"¢"],
111
+ ['&#163;',"£"],
112
+ ['&#164;',"¤"],
113
+ ['&#165;',"¥"],
114
+ ['&#166;',"¦"],
115
+ ['&#167;',"§"],
116
+ ['&#168;',"¨"],
117
+ ['&#169;',"©"],
118
+ ['&#170;',"ª"],
119
+ ['&#171;',"«"],
120
+ ['&#172;',"¬"],
121
+ ['&#173;',""],
122
+ ['&#174;',"®"],
123
+ ['&#175;',"¯"],
124
+ ['&#176;',"°"],
125
+ ['&#177;',"±"],
126
+ ['&#178;',"²"],
127
+ ['&#179;',"³"],
128
+ ['&#180;',"´"],
129
+ ['&#181;',"µ"],
130
+ ['&#182;',"¶"],
131
+ ['&#183;',"·"],
132
+ ['&#184;',"¸"],
133
+ ['&#185;',"¹"],
134
+ ['&#186;',"º"],
135
+ ['&#187;',"»"],
136
+ ['&#188;',"¼"],
137
+ ['&#189;',"½"],
138
+ ['&#190;',"¾"],
139
+ ['&#191;',"¿"],
140
+ ['&#192;',"À"],
141
+ ['&#193;',"Á"],
142
+ ['&#194;',"Â"],
143
+ ['&#195;',"Ã"],
144
+ ['&#196;',"Ä"],
145
+ ['&#197;',"Å"],
146
+ ['&#198;',"Æ"],
147
+ ['&#199;',"Ç"],
148
+ ['&#200;',"È"],
149
+ ['&#201;',"É"],
150
+ ['&#202;',"Ê"],
151
+ ['&#203;',"Ë"],
152
+ ['&#204;',"Ì"],
153
+ ['&#205;',"Í"],
154
+ ['&#206;',"Î"],
155
+ ['&#207;',"Ï"],
156
+ ['&#208;',"Ð"],
157
+ ['&#209;',"Ñ"],
158
+ ['&#210;',"Ò"],
159
+ ['&#211;',"Ó"],
160
+ ['&#212;',"Ô"],
161
+ ['&#213;',"Õ"],
162
+ ['&#214;',"Ö"],
163
+ ['&#215;',"×"],
164
+ ['&#216;',"Ø"],
165
+ ['&#217;',"Ù"],
166
+ ['&#218;',"Ú"],
167
+ ['&#219;',"Û"],
168
+ ['&#220;',"Ü"],
169
+ ['&#221;',"Ý"],
170
+ ['&#222;',"Þ"],
171
+ ['&#223;',"ß"],
172
+ ['&#224;',"à"],
173
+ ['&#225;',"á"],
174
+ ['&#226;',"â"],
175
+ ['&#227;',"ã"],
176
+ ['&#228;',"ä"],
177
+ ['&#229;',"å"],
178
+ ['&#230;',"æ"],
179
+ ['&#231;',"ç"],
180
+ ['&#232;',"è"],
181
+ ['&#233;',"é"],
182
+ ['&#234;',"ê"],
183
+ ['&#235;',"ë"],
184
+ ['&#236;',"ì"],
185
+ ['&#237;',"í"],
186
+ ['&#238;',"î"],
187
+ ['&#239;',"ï"],
188
+ ['&#240;',"ð"],
189
+ ['&#241;',"ñ"],
190
+ ['&#242;',"ò"],
191
+ ['&#243;',"ó"],
192
+ ['&#244;',"ô"],
193
+ ['&#245;',"õ"],
194
+ ['&#246;',"ö"],
195
+ ['&#247;',"÷"],
196
+ ['&#248;',"ø"],
197
+ ['&#249;',"ù"],
198
+ ['&#250;',"ú"],
199
+ ['&#251;',"û"],
200
+ ['&#252;',"ü"],
201
+ ['&#253;',"ý"],
202
+ ['&#254;',"þ"],
203
+ ['&#255;',"ÿ"] ]
204
+ dict.each { |x| string = string.gsub(x.first, x.last) }
205
+ string
206
+ end
207
+ end
208
+
209
+ end
data/test/test_hcc.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'test/unit'
2
+ require 'HtmlCodeCleaner'
3
+ require 'minitest/autorun'
4
+
5
+ class HtmlCodeCleanerTest < Test::Unit::TestCase
6
+ def test_1
7
+ assert_equal "X", HtmlCodeCleaner.clean_string("&#88;")
8
+ end
9
+
10
+ def test_2
11
+ assert_equal "Hi", HtmlCodeCleaner.clean_string("Hi")
12
+ end
13
+
14
+ def test_3
15
+ assert_equal "#Hi", HtmlCodeCleaner.clean_string("&#35;Hi")
16
+ end
17
+
18
+ def test_4
19
+ assert_equal "#1", HtmlCodeCleaner.clean_string("&#35;&#49;")
20
+ end
21
+
22
+ def test_5
23
+ assert_equal "# 1", HtmlCodeCleaner.clean_string("&#35; &#49;")
24
+ end
25
+
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: HtmlCodeCleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Michael Hiland
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-12 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Parse a string of html code, replace code with ascii symbol
14
+ email: g2c9@ugrad.cs.ubc.ca
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - Rakefile
20
+ - lib/HtmlCodeCleaner.rb
21
+ - test/test_hcc.rb
22
+ homepage: http://rubygems.org/gems/HtmlCodeCleaner
23
+ licenses:
24
+ - GPL-3.0
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.2.2
43
+ signing_key:
44
+ specification_version: 3
45
+ summary: Replace Html code with symbol
46
+ test_files:
47
+ - test/test_hcc.rb