xls_html_cleaner 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +4 -0
- data/README +31 -0
- data/Rakefile +113 -0
- data/bin/xls_html_cleaner +5 -0
- data/lib/xls_html_cleaner.rb +30 -0
- data/test/html/simple_ooo.html +44 -0
- data/test/output/simple_ooo.html +48 -0
- data/test/test_helper.rb +3 -0
- data/test/xls_html_cleaner_test.rb +23 -0
- metadata +84 -0
data/ChangeLog
ADDED
data/README
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
= xls_html_cleaner
|
3
|
+
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Clean up your Excel generated HTML
|
8
|
+
|
9
|
+
== Installation
|
10
|
+
|
11
|
+
=== Archive Installation
|
12
|
+
|
13
|
+
rake install
|
14
|
+
|
15
|
+
=== Gem Installation
|
16
|
+
|
17
|
+
gem install xls_html_cleaner
|
18
|
+
|
19
|
+
|
20
|
+
== Features/Problems
|
21
|
+
|
22
|
+
|
23
|
+
== Synopsis
|
24
|
+
|
25
|
+
$ xls_html_cleaner SRC_HTML > DEST_HTML
|
26
|
+
|
27
|
+
== Copyright
|
28
|
+
|
29
|
+
Author:: wtnabe
|
30
|
+
Copyright:: Copyright (c) 2010 wtnabe
|
31
|
+
License:: Two-clause BSD
|
data/Rakefile
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# -*- mode: ruby -*-
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rake'
|
4
|
+
require 'rake/clean'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'rake/packagetask'
|
7
|
+
require 'rake/gempackagetask'
|
8
|
+
require 'rake/rdoctask'
|
9
|
+
require 'fileutils'
|
10
|
+
require 'lib/xls_html_cleaner'
|
11
|
+
include FileUtils
|
12
|
+
|
13
|
+
NAME = "xls_html_cleaner"
|
14
|
+
AUTHOR = "wtnabe"
|
15
|
+
EMAIL = "wtnabe@gmail.com"
|
16
|
+
DESCRIPTION = "Clean up your Excel generated HTML"
|
17
|
+
RUBYFORGE_PROJECT = "xls_html_cleaner"
|
18
|
+
HOMEPATH = "http://github.com/wtnabe/xls_html_cleaner"
|
19
|
+
BIN_FILES = %w( xls_html_cleaner )
|
20
|
+
|
21
|
+
VERS = XlsHtmlCleaner::VERSION
|
22
|
+
REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
23
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config']
|
24
|
+
RDOC_OPTS = [
|
25
|
+
'--title', "#{NAME} documentation",
|
26
|
+
"--charset", "utf-8",
|
27
|
+
"--opname", "index.html",
|
28
|
+
"--line-numbers",
|
29
|
+
"--main", "README",
|
30
|
+
"--inline-source",
|
31
|
+
]
|
32
|
+
|
33
|
+
task :default => [:test]
|
34
|
+
task :package => [:clean]
|
35
|
+
|
36
|
+
Rake::TestTask.new("test") do |t|
|
37
|
+
t.libs << "test"
|
38
|
+
t.pattern = "test/**/*_test.rb"
|
39
|
+
t.verbose = true
|
40
|
+
end
|
41
|
+
|
42
|
+
spec = Gem::Specification.new do |s|
|
43
|
+
s.name = NAME
|
44
|
+
s.version = VERS
|
45
|
+
s.platform = Gem::Platform::RUBY
|
46
|
+
s.has_rdoc = true
|
47
|
+
s.extra_rdoc_files = ["README", "ChangeLog"]
|
48
|
+
s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
|
49
|
+
s.summary = DESCRIPTION
|
50
|
+
s.description = DESCRIPTION
|
51
|
+
s.author = AUTHOR
|
52
|
+
s.email = EMAIL
|
53
|
+
s.homepage = HOMEPATH
|
54
|
+
s.executables = BIN_FILES
|
55
|
+
s.bindir = "bin"
|
56
|
+
s.require_path = "lib"
|
57
|
+
#s.autorequire = ""
|
58
|
+
s.test_files = Dir["test/*_test.rb"]
|
59
|
+
|
60
|
+
s.add_dependency('hpricot', '>=1.3.1')
|
61
|
+
#s.required_ruby_version = '>= 1.8.2'
|
62
|
+
|
63
|
+
s.files = %w(README ChangeLog Rakefile) +
|
64
|
+
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
65
|
+
Dir.glob("ext/**/*.{h,c,rb}") +
|
66
|
+
Dir.glob("examples/**/*.rb") +
|
67
|
+
Dir.glob("tools/*.rb") +
|
68
|
+
Dir.glob("rails/*.rb")
|
69
|
+
|
70
|
+
s.extensions = FileList["ext/**/extconf.rb"].to_a
|
71
|
+
end
|
72
|
+
|
73
|
+
Rake::GemPackageTask.new(spec) do |p|
|
74
|
+
p.need_tar = true
|
75
|
+
p.gem_spec = spec
|
76
|
+
end
|
77
|
+
|
78
|
+
desc "install #{NAME}-#{VERS}.gem"
|
79
|
+
task :install do
|
80
|
+
name = "#{NAME}-#{VERS}.gem"
|
81
|
+
sh %{rake package}
|
82
|
+
sh %{sudo gem install pkg/#{name}}
|
83
|
+
end
|
84
|
+
|
85
|
+
desc "uninstall gem"
|
86
|
+
task :uninstall => [:clean] do
|
87
|
+
sh %{sudo gem uninstall #{NAME}}
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
Rake::RDocTask.new do |rdoc|
|
92
|
+
rdoc.rdoc_dir = 'html'
|
93
|
+
rdoc.options += RDOC_OPTS
|
94
|
+
rdoc.template = "resh"
|
95
|
+
#rdoc.template = "#{ENV['template']}.rb" if ENV['template']
|
96
|
+
if ENV['DOC_FILES']
|
97
|
+
rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
|
98
|
+
else
|
99
|
+
rdoc.rdoc_files.include('README', 'ChangeLog')
|
100
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
101
|
+
rdoc.rdoc_files.include('ext/**/*.c')
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
desc 'Show information about the gem.'
|
106
|
+
task :debug_gem do
|
107
|
+
puts spec.to_ruby
|
108
|
+
end
|
109
|
+
|
110
|
+
desc 'Update gem spec'
|
111
|
+
task :gemspec do
|
112
|
+
open("#{NAME}.gemspec", 'w').write spec.to_ruby
|
113
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems' unless defined? RubyGems
|
2
|
+
require 'hpricot'
|
3
|
+
|
4
|
+
class XlsHtmlCleaner
|
5
|
+
VERSION = '0.0.1'
|
6
|
+
|
7
|
+
ALLOW_TAGS = %w( html head title body
|
8
|
+
table thead tbody tfoot tr th td col colgroup )
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@parser = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
def clean!( str )
|
15
|
+
@parser = Hpricot( str )
|
16
|
+
@parser.traverse_all_element { |e|
|
17
|
+
if ( e.elem? and !ALLOW_TAGS.include?( e.name.downcase ) )
|
18
|
+
e.swap( e.inner_html.size > 0 ? e.inner_html : "\n" )
|
19
|
+
end
|
20
|
+
if ( e.comment? )
|
21
|
+
e.swap( "\n" )
|
22
|
+
end
|
23
|
+
if ( e.elem? )
|
24
|
+
e.attributes.to_hash.each_key { |a|
|
25
|
+
e.remove_attribute( a )
|
26
|
+
}
|
27
|
+
end
|
28
|
+
}.to_s
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
|
2
|
+
|
3
|
+
<HTML>
|
4
|
+
<HEAD>
|
5
|
+
|
6
|
+
<META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=utf-8">
|
7
|
+
<TITLE></TITLE>
|
8
|
+
<META NAME="GENERATOR" CONTENT="OpenOffice.org 3.1 (Unix)">
|
9
|
+
<META NAME="CREATED" CONTENT="20100213;23342400">
|
10
|
+
<META NAME="CHANGED" CONTENT="0;0">
|
11
|
+
|
12
|
+
<STYLE>
|
13
|
+
<!--
|
14
|
+
BODY,DIV,TABLE,THEAD,TBODY,TFOOT,TR,TH,TD,P { font-family:"ヒラギノ角ゴ ProN W3"; font-size:x-small }
|
15
|
+
-->
|
16
|
+
</STYLE>
|
17
|
+
|
18
|
+
</HEAD>
|
19
|
+
|
20
|
+
<BODY TEXT="#000000">
|
21
|
+
<TABLE FRAME=VOID CELLSPACING=0 COLS=3 RULES=NONE BORDER=0>
|
22
|
+
<COLGROUP><COL WIDTH=100><COL WIDTH=100><COL WIDTH=100></COLGROUP>
|
23
|
+
<TBODY>
|
24
|
+
<TR>
|
25
|
+
<TD WIDTH=100 HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">a</FONT></TD>
|
26
|
+
<TD WIDTH=100 ALIGN=LEFT><FONT FACE="Arial">d</FONT></TD>
|
27
|
+
<TD WIDTH=100 ALIGN=LEFT><FONT FACE="Arial">g</FONT></TD>
|
28
|
+
</TR>
|
29
|
+
<TR>
|
30
|
+
<TD HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">b</FONT></TD>
|
31
|
+
<TD ALIGN=LEFT><FONT FACE="Arial">e</FONT></TD>
|
32
|
+
<TD ALIGN=LEFT><FONT FACE="Arial">h</FONT></TD>
|
33
|
+
</TR>
|
34
|
+
<TR>
|
35
|
+
<TD HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">c</FONT></TD>
|
36
|
+
<TD ALIGN=LEFT><FONT FACE="Arial">f</FONT></TD>
|
37
|
+
<TD ALIGN=LEFT><FONT FACE="Arial">I</FONT></TD>
|
38
|
+
</TR>
|
39
|
+
</TBODY>
|
40
|
+
</TABLE>
|
41
|
+
<!-- ************************************************************************** -->
|
42
|
+
</BODY>
|
43
|
+
|
44
|
+
</HTML>
|
@@ -0,0 +1,48 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
<title></title>
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body>
|
24
|
+
<table>
|
25
|
+
<colgroup><col /><col /><col /></colgroup>
|
26
|
+
<tbody>
|
27
|
+
<tr>
|
28
|
+
<td>a</td>
|
29
|
+
<td>d</td>
|
30
|
+
<td>g</td>
|
31
|
+
</tr>
|
32
|
+
<tr>
|
33
|
+
<td>b</td>
|
34
|
+
<td>e</td>
|
35
|
+
<td>h</td>
|
36
|
+
</tr>
|
37
|
+
<tr>
|
38
|
+
<td>c</td>
|
39
|
+
<td>f</td>
|
40
|
+
<td>I</td>
|
41
|
+
</tr>
|
42
|
+
</tbody>
|
43
|
+
</table>
|
44
|
+
|
45
|
+
|
46
|
+
</body>
|
47
|
+
|
48
|
+
</html>
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
require "test/unit"
|
4
|
+
class XlsHtmlCleanerTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
@obj = XlsHtmlCleaner.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_clean!
|
10
|
+
Dir.glob( File.dirname( __FILE__ ) + '/html/*' ) { |html|
|
11
|
+
begin
|
12
|
+
open( File.join( File.dirname( __FILE__ ),
|
13
|
+
'output',
|
14
|
+
File.basename( html ) ), 'wb' ) { |f|
|
15
|
+
f.write( @obj.clean!( open( html ).read ) )
|
16
|
+
}
|
17
|
+
assert( true )
|
18
|
+
rescue e
|
19
|
+
raise e
|
20
|
+
end
|
21
|
+
}
|
22
|
+
end
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xls_html_cleaner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- wtnabe
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-15 00:00:00 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.3.1
|
24
|
+
version:
|
25
|
+
description: Clean up your Excel generated HTML
|
26
|
+
email: wtnabe@gmail.com
|
27
|
+
executables:
|
28
|
+
- xls_html_cleaner
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README
|
33
|
+
- ChangeLog
|
34
|
+
files:
|
35
|
+
- README
|
36
|
+
- ChangeLog
|
37
|
+
- Rakefile
|
38
|
+
- bin/xls_html_cleaner
|
39
|
+
- test/html/simple_ooo.html
|
40
|
+
- test/output/simple_ooo.html
|
41
|
+
- test/test_helper.rb
|
42
|
+
- test/xls_html_cleaner_test.rb
|
43
|
+
- lib/xls_html_cleaner.rb
|
44
|
+
has_rdoc: true
|
45
|
+
homepage: http://github.com/wtnabe/xls_html_cleaner
|
46
|
+
licenses: []
|
47
|
+
|
48
|
+
post_install_message:
|
49
|
+
rdoc_options:
|
50
|
+
- --title
|
51
|
+
- xls_html_cleaner documentation
|
52
|
+
- --charset
|
53
|
+
- utf-8
|
54
|
+
- --opname
|
55
|
+
- index.html
|
56
|
+
- --line-numbers
|
57
|
+
- --main
|
58
|
+
- README
|
59
|
+
- --inline-source
|
60
|
+
- --exclude
|
61
|
+
- ^(examples|extras)/
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: "0"
|
75
|
+
version:
|
76
|
+
requirements: []
|
77
|
+
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 1.3.5
|
80
|
+
signing_key:
|
81
|
+
specification_version: 3
|
82
|
+
summary: Clean up your Excel generated HTML
|
83
|
+
test_files:
|
84
|
+
- test/xls_html_cleaner_test.rb
|