xls_html_cleaner 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +7 -0
- data/Rakefile +1 -2
- data/lib/xls_html_cleaner.rb +1 -1
- data/test/html/simple_excel.htm +166 -0
- metadata +3 -3
- data/test/output/simple_ooo.html +0 -48
data/README
CHANGED
@@ -19,10 +19,17 @@ Clean up your Excel generated HTML
|
|
19
19
|
|
20
20
|
== Features/Problems
|
21
21
|
|
22
|
+
* remove not allowed `tags' ( not elements )
|
23
|
+
* remove all attributes
|
24
|
+
* remove comment
|
25
|
+
* leave white spaces
|
26
|
+
|
27
|
+
If you give this script a non-html document, I don't know what happens :-)
|
22
28
|
|
23
29
|
== Synopsis
|
24
30
|
|
25
31
|
$ xls_html_cleaner SRC_HTML > DEST_HTML
|
32
|
+
$ cat SRC_HTML | xls_html_cleaner
|
26
33
|
|
27
34
|
== Copyright
|
28
35
|
|
data/Rakefile
CHANGED
@@ -57,8 +57,7 @@ spec = Gem::Specification.new do |s|
|
|
57
57
|
#s.autorequire = ""
|
58
58
|
s.test_files = Dir["test/*_test.rb"]
|
59
59
|
|
60
|
-
s.add_dependency('hpricot'
|
61
|
-
#s.required_ruby_version = '>= 1.8.2'
|
60
|
+
s.add_dependency('hpricot')
|
62
61
|
|
63
62
|
s.files = %w(README ChangeLog Rakefile) +
|
64
63
|
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
data/lib/xls_html_cleaner.rb
CHANGED
@@ -0,0 +1,166 @@
|
|
1
|
+
<html xmlns:o="urn:schemas-microsoft-com:office:office"
|
2
|
+
xmlns:x="urn:schemas-microsoft-com:office:excel"
|
3
|
+
xmlns="http://www.w3.org/TR/REC-html40">
|
4
|
+
|
5
|
+
<head>
|
6
|
+
<meta http-equiv=Content-Type content="text/html; charset=shift_jis">
|
7
|
+
<meta name=ProgId content=Excel.Sheet>
|
8
|
+
<meta name=Generator content="Microsoft Excel 11">
|
9
|
+
<link rel=File-List href="simple_excel.files/filelist.xml">
|
10
|
+
<link rel=Edit-Time-Data href="simple_excel.files/editdata.mso">
|
11
|
+
<link rel=OLE-Object-Data href="simple_excel.files/oledata.mso">
|
12
|
+
<!--[if gte mso 9]><xml>
|
13
|
+
<o:DocumentProperties>
|
14
|
+
<o:Author> </o:Author>
|
15
|
+
<o:LastAuthor> </o:LastAuthor>
|
16
|
+
<o:Created>2010-02-15T00:40:17Z</o:Created>
|
17
|
+
<o:LastSaved>2010-02-15T00:41:04Z</o:LastSaved>
|
18
|
+
<o:Version>11.9999</o:Version>
|
19
|
+
</o:DocumentProperties>
|
20
|
+
</xml><![endif]-->
|
21
|
+
<style>
|
22
|
+
<!--table
|
23
|
+
{mso-displayed-decimal-separator:"\.";
|
24
|
+
mso-displayed-thousand-separator:"\,";}
|
25
|
+
@page
|
26
|
+
{margin:.98in .79in .98in .79in;
|
27
|
+
mso-header-margin:.51in;
|
28
|
+
mso-footer-margin:.51in;}
|
29
|
+
tr
|
30
|
+
{mso-height-source:auto;
|
31
|
+
mso-ruby-visibility:none;}
|
32
|
+
col
|
33
|
+
{mso-width-source:auto;
|
34
|
+
mso-ruby-visibility:none;}
|
35
|
+
br
|
36
|
+
{mso-data-placement:same-cell;}
|
37
|
+
.style0
|
38
|
+
{mso-number-format:General;
|
39
|
+
text-align:general;
|
40
|
+
vertical-align:middle;
|
41
|
+
white-space:nowrap;
|
42
|
+
mso-rotate:0;
|
43
|
+
mso-background-source:auto;
|
44
|
+
mso-pattern:auto;
|
45
|
+
color:windowtext;
|
46
|
+
font-size:11.0pt;
|
47
|
+
font-weight:400;
|
48
|
+
font-style:normal;
|
49
|
+
text-decoration:none;
|
50
|
+
font-family:"�l�r �o�S�V�b�N", monospace;
|
51
|
+
mso-font-charset:128;
|
52
|
+
border:none;
|
53
|
+
mso-protection:locked visible;
|
54
|
+
mso-style-name:�W��;
|
55
|
+
mso-style-id:0;}
|
56
|
+
td
|
57
|
+
{mso-style-parent:style0;
|
58
|
+
padding-top:1px;
|
59
|
+
padding-right:1px;
|
60
|
+
padding-left:1px;
|
61
|
+
mso-ignore:padding;
|
62
|
+
color:windowtext;
|
63
|
+
font-size:11.0pt;
|
64
|
+
font-weight:400;
|
65
|
+
font-style:normal;
|
66
|
+
text-decoration:none;
|
67
|
+
font-family:"�l�r �o�S�V�b�N", monospace;
|
68
|
+
mso-font-charset:128;
|
69
|
+
mso-number-format:General;
|
70
|
+
text-align:general;
|
71
|
+
vertical-align:middle;
|
72
|
+
border:none;
|
73
|
+
mso-background-source:auto;
|
74
|
+
mso-pattern:auto;
|
75
|
+
mso-protection:locked visible;
|
76
|
+
white-space:nowrap;
|
77
|
+
mso-rotate:0;}
|
78
|
+
ruby
|
79
|
+
{ruby-align:left;}
|
80
|
+
rt
|
81
|
+
{color:windowtext;
|
82
|
+
font-size:6.0pt;
|
83
|
+
font-weight:400;
|
84
|
+
font-style:normal;
|
85
|
+
text-decoration:none;
|
86
|
+
font-family:"�l�r �o�S�V�b�N", monospace;
|
87
|
+
mso-font-charset:128;
|
88
|
+
mso-char-type:katakana;
|
89
|
+
display:none;}
|
90
|
+
-->
|
91
|
+
</style>
|
92
|
+
<!--[if gte mso 9]><xml>
|
93
|
+
<x:ExcelWorkbook>
|
94
|
+
<x:ExcelWorksheets>
|
95
|
+
<x:ExcelWorksheet>
|
96
|
+
<x:Name>Sheet1</x:Name>
|
97
|
+
<x:WorksheetOptions>
|
98
|
+
<x:DefaultRowHeight>270</x:DefaultRowHeight>
|
99
|
+
<x:Selected/>
|
100
|
+
<x:Panes>
|
101
|
+
<x:Pane>
|
102
|
+
<x:Number>3</x:Number>
|
103
|
+
<x:ActiveRow>2</x:ActiveRow>
|
104
|
+
</x:Pane>
|
105
|
+
</x:Panes>
|
106
|
+
<x:ProtectContents>False</x:ProtectContents>
|
107
|
+
<x:ProtectObjects>False</x:ProtectObjects>
|
108
|
+
<x:ProtectScenarios>False</x:ProtectScenarios>
|
109
|
+
</x:WorksheetOptions>
|
110
|
+
</x:ExcelWorksheet>
|
111
|
+
<x:ExcelWorksheet>
|
112
|
+
<x:Name>Sheet2</x:Name>
|
113
|
+
<x:WorksheetOptions>
|
114
|
+
<x:DefaultRowHeight>270</x:DefaultRowHeight>
|
115
|
+
<x:ProtectContents>False</x:ProtectContents>
|
116
|
+
<x:ProtectObjects>False</x:ProtectObjects>
|
117
|
+
<x:ProtectScenarios>False</x:ProtectScenarios>
|
118
|
+
</x:WorksheetOptions>
|
119
|
+
</x:ExcelWorksheet>
|
120
|
+
<x:ExcelWorksheet>
|
121
|
+
<x:Name>Sheet3</x:Name>
|
122
|
+
<x:WorksheetOptions>
|
123
|
+
<x:DefaultRowHeight>270</x:DefaultRowHeight>
|
124
|
+
<x:ProtectContents>False</x:ProtectContents>
|
125
|
+
<x:ProtectObjects>False</x:ProtectObjects>
|
126
|
+
<x:ProtectScenarios>False</x:ProtectScenarios>
|
127
|
+
</x:WorksheetOptions>
|
128
|
+
</x:ExcelWorksheet>
|
129
|
+
</x:ExcelWorksheets>
|
130
|
+
<x:WindowHeight>8505</x:WindowHeight>
|
131
|
+
<x:WindowWidth>10575</x:WindowWidth>
|
132
|
+
<x:WindowTopX>480</x:WindowTopX>
|
133
|
+
<x:WindowTopY>90</x:WindowTopY>
|
134
|
+
<x:ProtectStructure>False</x:ProtectStructure>
|
135
|
+
<x:ProtectWindows>False</x:ProtectWindows>
|
136
|
+
</x:ExcelWorkbook>
|
137
|
+
</xml><![endif]-->
|
138
|
+
</head>
|
139
|
+
|
140
|
+
<body link=blue vlink=purple>
|
141
|
+
|
142
|
+
<table x:str border=0 cellpadding=0 cellspacing=0 width=216 style='border-collapse:
|
143
|
+
collapse;table-layout:fixed;width:162pt'>
|
144
|
+
<col width=72 span=3 style='width:54pt'>
|
145
|
+
<tr height=18 style='height:13.5pt'>
|
146
|
+
<td height=18 width=72 style='height:13.5pt;width:54pt'>a</td>
|
147
|
+
<td width=72 style='width:54pt'>b</td>
|
148
|
+
<td width=72 style='width:54pt'>c</td>
|
149
|
+
</tr>
|
150
|
+
<tr height=18 style='height:13.5pt'>
|
151
|
+
<td height=18 style='height:13.5pt'>d</td>
|
152
|
+
<td>e</td>
|
153
|
+
<td>f</td>
|
154
|
+
</tr>
|
155
|
+
<![if supportMisalignedColumns]>
|
156
|
+
<tr height=0 style='display:none'>
|
157
|
+
<td width=72 style='width:54pt'></td>
|
158
|
+
<td width=72 style='width:54pt'></td>
|
159
|
+
<td width=72 style='width:54pt'></td>
|
160
|
+
</tr>
|
161
|
+
<![endif]>
|
162
|
+
</table>
|
163
|
+
|
164
|
+
</body>
|
165
|
+
|
166
|
+
</html>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xls_html_cleaner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wtnabe
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: "0"
|
24
24
|
version:
|
25
25
|
description: Clean up your Excel generated HTML
|
26
26
|
email: wtnabe@gmail.com
|
@@ -36,8 +36,8 @@ files:
|
|
36
36
|
- ChangeLog
|
37
37
|
- Rakefile
|
38
38
|
- bin/xls_html_cleaner
|
39
|
+
- test/html/simple_excel.htm
|
39
40
|
- test/html/simple_ooo.html
|
40
|
-
- test/output/simple_ooo.html
|
41
41
|
- test/test_helper.rb
|
42
42
|
- test/xls_html_cleaner_test.rb
|
43
43
|
- lib/xls_html_cleaner.rb
|
data/test/output/simple_ooo.html
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
|
2
|
-
|
3
|
-
<html>
|
4
|
-
<head>
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
<title></title>
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
</head>
|
22
|
-
|
23
|
-
<body>
|
24
|
-
<table>
|
25
|
-
<colgroup><col /><col /><col /></colgroup>
|
26
|
-
<tbody>
|
27
|
-
<tr>
|
28
|
-
<td>a</td>
|
29
|
-
<td>d</td>
|
30
|
-
<td>g</td>
|
31
|
-
</tr>
|
32
|
-
<tr>
|
33
|
-
<td>b</td>
|
34
|
-
<td>e</td>
|
35
|
-
<td>h</td>
|
36
|
-
</tr>
|
37
|
-
<tr>
|
38
|
-
<td>c</td>
|
39
|
-
<td>f</td>
|
40
|
-
<td>I</td>
|
41
|
-
</tr>
|
42
|
-
</tbody>
|
43
|
-
</table>
|
44
|
-
|
45
|
-
|
46
|
-
</body>
|
47
|
-
|
48
|
-
</html>
|