xls_html_cleaner 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -19,10 +19,17 @@ Clean up your Excel generated HTML
19
19
 
20
20
  == Features/Problems
21
21
 
22
+ * remove not allowed `tags' ( not elements )
23
+ * remove all attributes
24
+ * remove comment
25
+ * leave white spaces
26
+
27
+ If you give this script a non-html document, I don't know what happens :-)
22
28
 
23
29
  == Synopsis
24
30
 
25
31
  $ xls_html_cleaner SRC_HTML > DEST_HTML
32
+ $ cat SRC_HTML | xls_html_cleaner
26
33
 
27
34
  == Copyright
28
35
 
data/Rakefile CHANGED
@@ -57,8 +57,7 @@ spec = Gem::Specification.new do |s|
57
57
  #s.autorequire = ""
58
58
  s.test_files = Dir["test/*_test.rb"]
59
59
 
60
- s.add_dependency('hpricot', '>=1.3.1')
61
- #s.required_ruby_version = '>= 1.8.2'
60
+ s.add_dependency('hpricot')
62
61
 
63
62
  s.files = %w(README ChangeLog Rakefile) +
64
63
  Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
@@ -2,7 +2,7 @@ require 'rubygems' unless defined? RubyGems
2
2
  require 'hpricot'
3
3
 
4
4
  class XlsHtmlCleaner
5
- VERSION = '0.0.1'
5
+ VERSION = '0.0.2'
6
6
 
7
7
  ALLOW_TAGS = %w( html head title body
8
8
  table thead tbody tfoot tr th td col colgroup )
@@ -0,0 +1,166 @@
1
+ <html xmlns:o="urn:schemas-microsoft-com:office:office"
2
+ xmlns:x="urn:schemas-microsoft-com:office:excel"
3
+ xmlns="http://www.w3.org/TR/REC-html40">
4
+
5
+ <head>
6
+ <meta http-equiv=Content-Type content="text/html; charset=shift_jis">
7
+ <meta name=ProgId content=Excel.Sheet>
8
+ <meta name=Generator content="Microsoft Excel 11">
9
+ <link rel=File-List href="simple_excel.files/filelist.xml">
10
+ <link rel=Edit-Time-Data href="simple_excel.files/editdata.mso">
11
+ <link rel=OLE-Object-Data href="simple_excel.files/oledata.mso">
12
+ <!--[if gte mso 9]><xml>
13
+ <o:DocumentProperties>
14
+ <o:Author> </o:Author>
15
+ <o:LastAuthor> </o:LastAuthor>
16
+ <o:Created>2010-02-15T00:40:17Z</o:Created>
17
+ <o:LastSaved>2010-02-15T00:41:04Z</o:LastSaved>
18
+ <o:Version>11.9999</o:Version>
19
+ </o:DocumentProperties>
20
+ </xml><![endif]-->
21
+ <style>
22
+ <!--table
23
+ {mso-displayed-decimal-separator:"\.";
24
+ mso-displayed-thousand-separator:"\,";}
25
+ @page
26
+ {margin:.98in .79in .98in .79in;
27
+ mso-header-margin:.51in;
28
+ mso-footer-margin:.51in;}
29
+ tr
30
+ {mso-height-source:auto;
31
+ mso-ruby-visibility:none;}
32
+ col
33
+ {mso-width-source:auto;
34
+ mso-ruby-visibility:none;}
35
+ br
36
+ {mso-data-placement:same-cell;}
37
+ .style0
38
+ {mso-number-format:General;
39
+ text-align:general;
40
+ vertical-align:middle;
41
+ white-space:nowrap;
42
+ mso-rotate:0;
43
+ mso-background-source:auto;
44
+ mso-pattern:auto;
45
+ color:windowtext;
46
+ font-size:11.0pt;
47
+ font-weight:400;
48
+ font-style:normal;
49
+ text-decoration:none;
50
+ font-family:"�l�r �o�S�V�b�N", monospace;
51
+ mso-font-charset:128;
52
+ border:none;
53
+ mso-protection:locked visible;
54
+ mso-style-name:�W��;
55
+ mso-style-id:0;}
56
+ td
57
+ {mso-style-parent:style0;
58
+ padding-top:1px;
59
+ padding-right:1px;
60
+ padding-left:1px;
61
+ mso-ignore:padding;
62
+ color:windowtext;
63
+ font-size:11.0pt;
64
+ font-weight:400;
65
+ font-style:normal;
66
+ text-decoration:none;
67
+ font-family:"�l�r �o�S�V�b�N", monospace;
68
+ mso-font-charset:128;
69
+ mso-number-format:General;
70
+ text-align:general;
71
+ vertical-align:middle;
72
+ border:none;
73
+ mso-background-source:auto;
74
+ mso-pattern:auto;
75
+ mso-protection:locked visible;
76
+ white-space:nowrap;
77
+ mso-rotate:0;}
78
+ ruby
79
+ {ruby-align:left;}
80
+ rt
81
+ {color:windowtext;
82
+ font-size:6.0pt;
83
+ font-weight:400;
84
+ font-style:normal;
85
+ text-decoration:none;
86
+ font-family:"�l�r �o�S�V�b�N", monospace;
87
+ mso-font-charset:128;
88
+ mso-char-type:katakana;
89
+ display:none;}
90
+ -->
91
+ </style>
92
+ <!--[if gte mso 9]><xml>
93
+ <x:ExcelWorkbook>
94
+ <x:ExcelWorksheets>
95
+ <x:ExcelWorksheet>
96
+ <x:Name>Sheet1</x:Name>
97
+ <x:WorksheetOptions>
98
+ <x:DefaultRowHeight>270</x:DefaultRowHeight>
99
+ <x:Selected/>
100
+ <x:Panes>
101
+ <x:Pane>
102
+ <x:Number>3</x:Number>
103
+ <x:ActiveRow>2</x:ActiveRow>
104
+ </x:Pane>
105
+ </x:Panes>
106
+ <x:ProtectContents>False</x:ProtectContents>
107
+ <x:ProtectObjects>False</x:ProtectObjects>
108
+ <x:ProtectScenarios>False</x:ProtectScenarios>
109
+ </x:WorksheetOptions>
110
+ </x:ExcelWorksheet>
111
+ <x:ExcelWorksheet>
112
+ <x:Name>Sheet2</x:Name>
113
+ <x:WorksheetOptions>
114
+ <x:DefaultRowHeight>270</x:DefaultRowHeight>
115
+ <x:ProtectContents>False</x:ProtectContents>
116
+ <x:ProtectObjects>False</x:ProtectObjects>
117
+ <x:ProtectScenarios>False</x:ProtectScenarios>
118
+ </x:WorksheetOptions>
119
+ </x:ExcelWorksheet>
120
+ <x:ExcelWorksheet>
121
+ <x:Name>Sheet3</x:Name>
122
+ <x:WorksheetOptions>
123
+ <x:DefaultRowHeight>270</x:DefaultRowHeight>
124
+ <x:ProtectContents>False</x:ProtectContents>
125
+ <x:ProtectObjects>False</x:ProtectObjects>
126
+ <x:ProtectScenarios>False</x:ProtectScenarios>
127
+ </x:WorksheetOptions>
128
+ </x:ExcelWorksheet>
129
+ </x:ExcelWorksheets>
130
+ <x:WindowHeight>8505</x:WindowHeight>
131
+ <x:WindowWidth>10575</x:WindowWidth>
132
+ <x:WindowTopX>480</x:WindowTopX>
133
+ <x:WindowTopY>90</x:WindowTopY>
134
+ <x:ProtectStructure>False</x:ProtectStructure>
135
+ <x:ProtectWindows>False</x:ProtectWindows>
136
+ </x:ExcelWorkbook>
137
+ </xml><![endif]-->
138
+ </head>
139
+
140
+ <body link=blue vlink=purple>
141
+
142
+ <table x:str border=0 cellpadding=0 cellspacing=0 width=216 style='border-collapse:
143
+ collapse;table-layout:fixed;width:162pt'>
144
+ <col width=72 span=3 style='width:54pt'>
145
+ <tr height=18 style='height:13.5pt'>
146
+ <td height=18 width=72 style='height:13.5pt;width:54pt'>a</td>
147
+ <td width=72 style='width:54pt'>b</td>
148
+ <td width=72 style='width:54pt'>c</td>
149
+ </tr>
150
+ <tr height=18 style='height:13.5pt'>
151
+ <td height=18 style='height:13.5pt'>d</td>
152
+ <td>e</td>
153
+ <td>f</td>
154
+ </tr>
155
+ <![if supportMisalignedColumns]>
156
+ <tr height=0 style='display:none'>
157
+ <td width=72 style='width:54pt'></td>
158
+ <td width=72 style='width:54pt'></td>
159
+ <td width=72 style='width:54pt'></td>
160
+ </tr>
161
+ <![endif]>
162
+ </table>
163
+
164
+ </body>
165
+
166
+ </html>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xls_html_cleaner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - wtnabe
@@ -20,7 +20,7 @@ dependencies:
20
20
  requirements:
21
21
  - - ">="
22
22
  - !ruby/object:Gem::Version
23
- version: 1.3.1
23
+ version: "0"
24
24
  version:
25
25
  description: Clean up your Excel generated HTML
26
26
  email: wtnabe@gmail.com
@@ -36,8 +36,8 @@ files:
36
36
  - ChangeLog
37
37
  - Rakefile
38
38
  - bin/xls_html_cleaner
39
+ - test/html/simple_excel.htm
39
40
  - test/html/simple_ooo.html
40
- - test/output/simple_ooo.html
41
41
  - test/test_helper.rb
42
42
  - test/xls_html_cleaner_test.rb
43
43
  - lib/xls_html_cleaner.rb
@@ -1,48 +0,0 @@
1
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
2
-
3
- <html>
4
- <head>
5
-
6
-
7
-
8
- <title></title>
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
- </head>
22
-
23
- <body>
24
- <table>
25
- <colgroup><col /><col /><col /></colgroup>
26
- <tbody>
27
- <tr>
28
- <td>a</td>
29
- <td>d</td>
30
- <td>g</td>
31
- </tr>
32
- <tr>
33
- <td>b</td>
34
- <td>e</td>
35
- <td>h</td>
36
- </tr>
37
- <tr>
38
- <td>c</td>
39
- <td>f</td>
40
- <td>I</td>
41
- </tr>
42
- </tbody>
43
- </table>
44
-
45
-
46
- </body>
47
-
48
- </html>