xls_html_cleaner 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -19,10 +19,17 @@ Clean up your Excel generated HTML
19
19
 
20
20
  == Features/Problems
21
21
 
22
+ * remove not allowed `tags' ( not elements )
23
+ * remove all attributes
24
+ * remove comment
25
+ * leave white spaces
26
+
27
+ If you give this script a non-html document, I don't know what happens :-)
22
28
 
23
29
  == Synopsis
24
30
 
25
31
  $ xls_html_cleaner SRC_HTML > DEST_HTML
32
+ $ cat SRC_HTML | xls_html_cleaner
26
33
 
27
34
  == Copyright
28
35
 
data/Rakefile CHANGED
@@ -57,8 +57,7 @@ spec = Gem::Specification.new do |s|
57
57
  #s.autorequire = ""
58
58
  s.test_files = Dir["test/*_test.rb"]
59
59
 
60
- s.add_dependency('hpricot', '>=1.3.1')
61
- #s.required_ruby_version = '>= 1.8.2'
60
+ s.add_dependency('hpricot')
62
61
 
63
62
  s.files = %w(README ChangeLog Rakefile) +
64
63
  Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
@@ -2,7 +2,7 @@ require 'rubygems' unless defined? RubyGems
2
2
  require 'hpricot'
3
3
 
4
4
  class XlsHtmlCleaner
5
- VERSION = '0.0.1'
5
+ VERSION = '0.0.2'
6
6
 
7
7
  ALLOW_TAGS = %w( html head title body
8
8
  table thead tbody tfoot tr th td col colgroup )
@@ -0,0 +1,166 @@
1
+ <html xmlns:o="urn:schemas-microsoft-com:office:office"
2
+ xmlns:x="urn:schemas-microsoft-com:office:excel"
3
+ xmlns="http://www.w3.org/TR/REC-html40">
4
+
5
+ <head>
6
+ <meta http-equiv=Content-Type content="text/html; charset=shift_jis">
7
+ <meta name=ProgId content=Excel.Sheet>
8
+ <meta name=Generator content="Microsoft Excel 11">
9
+ <link rel=File-List href="simple_excel.files/filelist.xml">
10
+ <link rel=Edit-Time-Data href="simple_excel.files/editdata.mso">
11
+ <link rel=OLE-Object-Data href="simple_excel.files/oledata.mso">
12
+ <!--[if gte mso 9]><xml>
13
+ <o:DocumentProperties>
14
+ <o:Author> </o:Author>
15
+ <o:LastAuthor> </o:LastAuthor>
16
+ <o:Created>2010-02-15T00:40:17Z</o:Created>
17
+ <o:LastSaved>2010-02-15T00:41:04Z</o:LastSaved>
18
+ <o:Version>11.9999</o:Version>
19
+ </o:DocumentProperties>
20
+ </xml><![endif]-->
21
+ <style>
22
+ <!--table
23
+ {mso-displayed-decimal-separator:"\.";
24
+ mso-displayed-thousand-separator:"\,";}
25
+ @page
26
+ {margin:.98in .79in .98in .79in;
27
+ mso-header-margin:.51in;
28
+ mso-footer-margin:.51in;}
29
+ tr
30
+ {mso-height-source:auto;
31
+ mso-ruby-visibility:none;}
32
+ col
33
+ {mso-width-source:auto;
34
+ mso-ruby-visibility:none;}
35
+ br
36
+ {mso-data-placement:same-cell;}
37
+ .style0
38
+ {mso-number-format:General;
39
+ text-align:general;
40
+ vertical-align:middle;
41
+ white-space:nowrap;
42
+ mso-rotate:0;
43
+ mso-background-source:auto;
44
+ mso-pattern:auto;
45
+ color:windowtext;
46
+ font-size:11.0pt;
47
+ font-weight:400;
48
+ font-style:normal;
49
+ text-decoration:none;
50
+ font-family:"�l�r �o�S�V�b�N", monospace;
51
+ mso-font-charset:128;
52
+ border:none;
53
+ mso-protection:locked visible;
54
+ mso-style-name:�W��;
55
+ mso-style-id:0;}
56
+ td
57
+ {mso-style-parent:style0;
58
+ padding-top:1px;
59
+ padding-right:1px;
60
+ padding-left:1px;
61
+ mso-ignore:padding;
62
+ color:windowtext;
63
+ font-size:11.0pt;
64
+ font-weight:400;
65
+ font-style:normal;
66
+ text-decoration:none;
67
+ font-family:"�l�r �o�S�V�b�N", monospace;
68
+ mso-font-charset:128;
69
+ mso-number-format:General;
70
+ text-align:general;
71
+ vertical-align:middle;
72
+ border:none;
73
+ mso-background-source:auto;
74
+ mso-pattern:auto;
75
+ mso-protection:locked visible;
76
+ white-space:nowrap;
77
+ mso-rotate:0;}
78
+ ruby
79
+ {ruby-align:left;}
80
+ rt
81
+ {color:windowtext;
82
+ font-size:6.0pt;
83
+ font-weight:400;
84
+ font-style:normal;
85
+ text-decoration:none;
86
+ font-family:"�l�r �o�S�V�b�N", monospace;
87
+ mso-font-charset:128;
88
+ mso-char-type:katakana;
89
+ display:none;}
90
+ -->
91
+ </style>
92
+ <!--[if gte mso 9]><xml>
93
+ <x:ExcelWorkbook>
94
+ <x:ExcelWorksheets>
95
+ <x:ExcelWorksheet>
96
+ <x:Name>Sheet1</x:Name>
97
+ <x:WorksheetOptions>
98
+ <x:DefaultRowHeight>270</x:DefaultRowHeight>
99
+ <x:Selected/>
100
+ <x:Panes>
101
+ <x:Pane>
102
+ <x:Number>3</x:Number>
103
+ <x:ActiveRow>2</x:ActiveRow>
104
+ </x:Pane>
105
+ </x:Panes>
106
+ <x:ProtectContents>False</x:ProtectContents>
107
+ <x:ProtectObjects>False</x:ProtectObjects>
108
+ <x:ProtectScenarios>False</x:ProtectScenarios>
109
+ </x:WorksheetOptions>
110
+ </x:ExcelWorksheet>
111
+ <x:ExcelWorksheet>
112
+ <x:Name>Sheet2</x:Name>
113
+ <x:WorksheetOptions>
114
+ <x:DefaultRowHeight>270</x:DefaultRowHeight>
115
+ <x:ProtectContents>False</x:ProtectContents>
116
+ <x:ProtectObjects>False</x:ProtectObjects>
117
+ <x:ProtectScenarios>False</x:ProtectScenarios>
118
+ </x:WorksheetOptions>
119
+ </x:ExcelWorksheet>
120
+ <x:ExcelWorksheet>
121
+ <x:Name>Sheet3</x:Name>
122
+ <x:WorksheetOptions>
123
+ <x:DefaultRowHeight>270</x:DefaultRowHeight>
124
+ <x:ProtectContents>False</x:ProtectContents>
125
+ <x:ProtectObjects>False</x:ProtectObjects>
126
+ <x:ProtectScenarios>False</x:ProtectScenarios>
127
+ </x:WorksheetOptions>
128
+ </x:ExcelWorksheet>
129
+ </x:ExcelWorksheets>
130
+ <x:WindowHeight>8505</x:WindowHeight>
131
+ <x:WindowWidth>10575</x:WindowWidth>
132
+ <x:WindowTopX>480</x:WindowTopX>
133
+ <x:WindowTopY>90</x:WindowTopY>
134
+ <x:ProtectStructure>False</x:ProtectStructure>
135
+ <x:ProtectWindows>False</x:ProtectWindows>
136
+ </x:ExcelWorkbook>
137
+ </xml><![endif]-->
138
+ </head>
139
+
140
+ <body link=blue vlink=purple>
141
+
142
+ <table x:str border=0 cellpadding=0 cellspacing=0 width=216 style='border-collapse:
143
+ collapse;table-layout:fixed;width:162pt'>
144
+ <col width=72 span=3 style='width:54pt'>
145
+ <tr height=18 style='height:13.5pt'>
146
+ <td height=18 width=72 style='height:13.5pt;width:54pt'>a</td>
147
+ <td width=72 style='width:54pt'>b</td>
148
+ <td width=72 style='width:54pt'>c</td>
149
+ </tr>
150
+ <tr height=18 style='height:13.5pt'>
151
+ <td height=18 style='height:13.5pt'>d</td>
152
+ <td>e</td>
153
+ <td>f</td>
154
+ </tr>
155
+ <![if supportMisalignedColumns]>
156
+ <tr height=0 style='display:none'>
157
+ <td width=72 style='width:54pt'></td>
158
+ <td width=72 style='width:54pt'></td>
159
+ <td width=72 style='width:54pt'></td>
160
+ </tr>
161
+ <![endif]>
162
+ </table>
163
+
164
+ </body>
165
+
166
+ </html>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xls_html_cleaner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - wtnabe
@@ -20,7 +20,7 @@ dependencies:
20
20
  requirements:
21
21
  - - ">="
22
22
  - !ruby/object:Gem::Version
23
- version: 1.3.1
23
+ version: "0"
24
24
  version:
25
25
  description: Clean up your Excel generated HTML
26
26
  email: wtnabe@gmail.com
@@ -36,8 +36,8 @@ files:
36
36
  - ChangeLog
37
37
  - Rakefile
38
38
  - bin/xls_html_cleaner
39
+ - test/html/simple_excel.htm
39
40
  - test/html/simple_ooo.html
40
- - test/output/simple_ooo.html
41
41
  - test/test_helper.rb
42
42
  - test/xls_html_cleaner_test.rb
43
43
  - lib/xls_html_cleaner.rb
@@ -1,48 +0,0 @@
1
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
2
-
3
- <html>
4
- <head>
5
-
6
-
7
-
8
- <title></title>
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
- </head>
22
-
23
- <body>
24
- <table>
25
- <colgroup><col /><col /><col /></colgroup>
26
- <tbody>
27
- <tr>
28
- <td>a</td>
29
- <td>d</td>
30
- <td>g</td>
31
- </tr>
32
- <tr>
33
- <td>b</td>
34
- <td>e</td>
35
- <td>h</td>
36
- </tr>
37
- <tr>
38
- <td>c</td>
39
- <td>f</td>
40
- <td>I</td>
41
- </tr>
42
- </tbody>
43
- </table>
44
-
45
-
46
- </body>
47
-
48
- </html>