xls_html_cleaner 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +7 -0
- data/Rakefile +1 -2
- data/lib/xls_html_cleaner.rb +1 -1
- data/test/html/simple_excel.htm +166 -0
- metadata +3 -3
- data/test/output/simple_ooo.html +0 -48
data/README
CHANGED
@@ -19,10 +19,17 @@ Clean up your Excel generated HTML
|
|
19
19
|
|
20
20
|
== Features/Problems
|
21
21
|
|
22
|
+
* remove not allowed `tags' ( not elements )
|
23
|
+
* remove all attributes
|
24
|
+
* remove comment
|
25
|
+
* leave white spaces
|
26
|
+
|
27
|
+
If you give this script a non-html document, I don't know what happens :-)
|
22
28
|
|
23
29
|
== Synopsis
|
24
30
|
|
25
31
|
$ xls_html_cleaner SRC_HTML > DEST_HTML
|
32
|
+
$ cat SRC_HTML | xls_html_cleaner
|
26
33
|
|
27
34
|
== Copyright
|
28
35
|
|
data/Rakefile
CHANGED
@@ -57,8 +57,7 @@ spec = Gem::Specification.new do |s|
|
|
57
57
|
#s.autorequire = ""
|
58
58
|
s.test_files = Dir["test/*_test.rb"]
|
59
59
|
|
60
|
-
s.add_dependency('hpricot'
|
61
|
-
#s.required_ruby_version = '>= 1.8.2'
|
60
|
+
s.add_dependency('hpricot')
|
62
61
|
|
63
62
|
s.files = %w(README ChangeLog Rakefile) +
|
64
63
|
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
data/lib/xls_html_cleaner.rb
CHANGED
@@ -0,0 +1,166 @@
|
|
1
|
+
<html xmlns:o="urn:schemas-microsoft-com:office:office"
|
2
|
+
xmlns:x="urn:schemas-microsoft-com:office:excel"
|
3
|
+
xmlns="http://www.w3.org/TR/REC-html40">
|
4
|
+
|
5
|
+
<head>
|
6
|
+
<meta http-equiv=Content-Type content="text/html; charset=shift_jis">
|
7
|
+
<meta name=ProgId content=Excel.Sheet>
|
8
|
+
<meta name=Generator content="Microsoft Excel 11">
|
9
|
+
<link rel=File-List href="simple_excel.files/filelist.xml">
|
10
|
+
<link rel=Edit-Time-Data href="simple_excel.files/editdata.mso">
|
11
|
+
<link rel=OLE-Object-Data href="simple_excel.files/oledata.mso">
|
12
|
+
<!--[if gte mso 9]><xml>
|
13
|
+
<o:DocumentProperties>
|
14
|
+
<o:Author> </o:Author>
|
15
|
+
<o:LastAuthor> </o:LastAuthor>
|
16
|
+
<o:Created>2010-02-15T00:40:17Z</o:Created>
|
17
|
+
<o:LastSaved>2010-02-15T00:41:04Z</o:LastSaved>
|
18
|
+
<o:Version>11.9999</o:Version>
|
19
|
+
</o:DocumentProperties>
|
20
|
+
</xml><![endif]-->
|
21
|
+
<style>
|
22
|
+
<!--table
|
23
|
+
{mso-displayed-decimal-separator:"\.";
|
24
|
+
mso-displayed-thousand-separator:"\,";}
|
25
|
+
@page
|
26
|
+
{margin:.98in .79in .98in .79in;
|
27
|
+
mso-header-margin:.51in;
|
28
|
+
mso-footer-margin:.51in;}
|
29
|
+
tr
|
30
|
+
{mso-height-source:auto;
|
31
|
+
mso-ruby-visibility:none;}
|
32
|
+
col
|
33
|
+
{mso-width-source:auto;
|
34
|
+
mso-ruby-visibility:none;}
|
35
|
+
br
|
36
|
+
{mso-data-placement:same-cell;}
|
37
|
+
.style0
|
38
|
+
{mso-number-format:General;
|
39
|
+
text-align:general;
|
40
|
+
vertical-align:middle;
|
41
|
+
white-space:nowrap;
|
42
|
+
mso-rotate:0;
|
43
|
+
mso-background-source:auto;
|
44
|
+
mso-pattern:auto;
|
45
|
+
color:windowtext;
|
46
|
+
font-size:11.0pt;
|
47
|
+
font-weight:400;
|
48
|
+
font-style:normal;
|
49
|
+
text-decoration:none;
|
50
|
+
font-family:"�l�r �o�S�V�b�N", monospace;
|
51
|
+
mso-font-charset:128;
|
52
|
+
border:none;
|
53
|
+
mso-protection:locked visible;
|
54
|
+
mso-style-name:�W��;
|
55
|
+
mso-style-id:0;}
|
56
|
+
td
|
57
|
+
{mso-style-parent:style0;
|
58
|
+
padding-top:1px;
|
59
|
+
padding-right:1px;
|
60
|
+
padding-left:1px;
|
61
|
+
mso-ignore:padding;
|
62
|
+
color:windowtext;
|
63
|
+
font-size:11.0pt;
|
64
|
+
font-weight:400;
|
65
|
+
font-style:normal;
|
66
|
+
text-decoration:none;
|
67
|
+
font-family:"�l�r �o�S�V�b�N", monospace;
|
68
|
+
mso-font-charset:128;
|
69
|
+
mso-number-format:General;
|
70
|
+
text-align:general;
|
71
|
+
vertical-align:middle;
|
72
|
+
border:none;
|
73
|
+
mso-background-source:auto;
|
74
|
+
mso-pattern:auto;
|
75
|
+
mso-protection:locked visible;
|
76
|
+
white-space:nowrap;
|
77
|
+
mso-rotate:0;}
|
78
|
+
ruby
|
79
|
+
{ruby-align:left;}
|
80
|
+
rt
|
81
|
+
{color:windowtext;
|
82
|
+
font-size:6.0pt;
|
83
|
+
font-weight:400;
|
84
|
+
font-style:normal;
|
85
|
+
text-decoration:none;
|
86
|
+
font-family:"�l�r �o�S�V�b�N", monospace;
|
87
|
+
mso-font-charset:128;
|
88
|
+
mso-char-type:katakana;
|
89
|
+
display:none;}
|
90
|
+
-->
|
91
|
+
</style>
|
92
|
+
<!--[if gte mso 9]><xml>
|
93
|
+
<x:ExcelWorkbook>
|
94
|
+
<x:ExcelWorksheets>
|
95
|
+
<x:ExcelWorksheet>
|
96
|
+
<x:Name>Sheet1</x:Name>
|
97
|
+
<x:WorksheetOptions>
|
98
|
+
<x:DefaultRowHeight>270</x:DefaultRowHeight>
|
99
|
+
<x:Selected/>
|
100
|
+
<x:Panes>
|
101
|
+
<x:Pane>
|
102
|
+
<x:Number>3</x:Number>
|
103
|
+
<x:ActiveRow>2</x:ActiveRow>
|
104
|
+
</x:Pane>
|
105
|
+
</x:Panes>
|
106
|
+
<x:ProtectContents>False</x:ProtectContents>
|
107
|
+
<x:ProtectObjects>False</x:ProtectObjects>
|
108
|
+
<x:ProtectScenarios>False</x:ProtectScenarios>
|
109
|
+
</x:WorksheetOptions>
|
110
|
+
</x:ExcelWorksheet>
|
111
|
+
<x:ExcelWorksheet>
|
112
|
+
<x:Name>Sheet2</x:Name>
|
113
|
+
<x:WorksheetOptions>
|
114
|
+
<x:DefaultRowHeight>270</x:DefaultRowHeight>
|
115
|
+
<x:ProtectContents>False</x:ProtectContents>
|
116
|
+
<x:ProtectObjects>False</x:ProtectObjects>
|
117
|
+
<x:ProtectScenarios>False</x:ProtectScenarios>
|
118
|
+
</x:WorksheetOptions>
|
119
|
+
</x:ExcelWorksheet>
|
120
|
+
<x:ExcelWorksheet>
|
121
|
+
<x:Name>Sheet3</x:Name>
|
122
|
+
<x:WorksheetOptions>
|
123
|
+
<x:DefaultRowHeight>270</x:DefaultRowHeight>
|
124
|
+
<x:ProtectContents>False</x:ProtectContents>
|
125
|
+
<x:ProtectObjects>False</x:ProtectObjects>
|
126
|
+
<x:ProtectScenarios>False</x:ProtectScenarios>
|
127
|
+
</x:WorksheetOptions>
|
128
|
+
</x:ExcelWorksheet>
|
129
|
+
</x:ExcelWorksheets>
|
130
|
+
<x:WindowHeight>8505</x:WindowHeight>
|
131
|
+
<x:WindowWidth>10575</x:WindowWidth>
|
132
|
+
<x:WindowTopX>480</x:WindowTopX>
|
133
|
+
<x:WindowTopY>90</x:WindowTopY>
|
134
|
+
<x:ProtectStructure>False</x:ProtectStructure>
|
135
|
+
<x:ProtectWindows>False</x:ProtectWindows>
|
136
|
+
</x:ExcelWorkbook>
|
137
|
+
</xml><![endif]-->
|
138
|
+
</head>
|
139
|
+
|
140
|
+
<body link=blue vlink=purple>
|
141
|
+
|
142
|
+
<table x:str border=0 cellpadding=0 cellspacing=0 width=216 style='border-collapse:
|
143
|
+
collapse;table-layout:fixed;width:162pt'>
|
144
|
+
<col width=72 span=3 style='width:54pt'>
|
145
|
+
<tr height=18 style='height:13.5pt'>
|
146
|
+
<td height=18 width=72 style='height:13.5pt;width:54pt'>a</td>
|
147
|
+
<td width=72 style='width:54pt'>b</td>
|
148
|
+
<td width=72 style='width:54pt'>c</td>
|
149
|
+
</tr>
|
150
|
+
<tr height=18 style='height:13.5pt'>
|
151
|
+
<td height=18 style='height:13.5pt'>d</td>
|
152
|
+
<td>e</td>
|
153
|
+
<td>f</td>
|
154
|
+
</tr>
|
155
|
+
<![if supportMisalignedColumns]>
|
156
|
+
<tr height=0 style='display:none'>
|
157
|
+
<td width=72 style='width:54pt'></td>
|
158
|
+
<td width=72 style='width:54pt'></td>
|
159
|
+
<td width=72 style='width:54pt'></td>
|
160
|
+
</tr>
|
161
|
+
<![endif]>
|
162
|
+
</table>
|
163
|
+
|
164
|
+
</body>
|
165
|
+
|
166
|
+
</html>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xls_html_cleaner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- wtnabe
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: "0"
|
24
24
|
version:
|
25
25
|
description: Clean up your Excel generated HTML
|
26
26
|
email: wtnabe@gmail.com
|
@@ -36,8 +36,8 @@ files:
|
|
36
36
|
- ChangeLog
|
37
37
|
- Rakefile
|
38
38
|
- bin/xls_html_cleaner
|
39
|
+
- test/html/simple_excel.htm
|
39
40
|
- test/html/simple_ooo.html
|
40
|
-
- test/output/simple_ooo.html
|
41
41
|
- test/test_helper.rb
|
42
42
|
- test/xls_html_cleaner_test.rb
|
43
43
|
- lib/xls_html_cleaner.rb
|
data/test/output/simple_ooo.html
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
|
2
|
-
|
3
|
-
<html>
|
4
|
-
<head>
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
<title></title>
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
</head>
|
22
|
-
|
23
|
-
<body>
|
24
|
-
<table>
|
25
|
-
<colgroup><col /><col /><col /></colgroup>
|
26
|
-
<tbody>
|
27
|
-
<tr>
|
28
|
-
<td>a</td>
|
29
|
-
<td>d</td>
|
30
|
-
<td>g</td>
|
31
|
-
</tr>
|
32
|
-
<tr>
|
33
|
-
<td>b</td>
|
34
|
-
<td>e</td>
|
35
|
-
<td>h</td>
|
36
|
-
</tr>
|
37
|
-
<tr>
|
38
|
-
<td>c</td>
|
39
|
-
<td>f</td>
|
40
|
-
<td>I</td>
|
41
|
-
</tr>
|
42
|
-
</tbody>
|
43
|
-
</table>
|
44
|
-
|
45
|
-
|
46
|
-
</body>
|
47
|
-
|
48
|
-
</html>
|