xls_html_cleaner 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/README +31 -0
- data/Rakefile +113 -0
- data/bin/xls_html_cleaner +5 -0
- data/lib/xls_html_cleaner.rb +30 -0
- data/test/html/simple_ooo.html +44 -0
- data/test/output/simple_ooo.html +48 -0
- data/test/test_helper.rb +3 -0
- data/test/xls_html_cleaner_test.rb +23 -0
- metadata +84 -0
data/ChangeLog
ADDED
data/README
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
= xls_html_cleaner
|
3
|
+
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Clean up your Excel generated HTML
|
8
|
+
|
9
|
+
== Installation
|
10
|
+
|
11
|
+
=== Archive Installation
|
12
|
+
|
13
|
+
rake install
|
14
|
+
|
15
|
+
=== Gem Installation
|
16
|
+
|
17
|
+
gem install xls_html_cleaner
|
18
|
+
|
19
|
+
|
20
|
+
== Features/Problems
|
21
|
+
|
22
|
+
|
23
|
+
== Synopsis
|
24
|
+
|
25
|
+
$ xls_html_cleaner SRC_HTML > DEST_HTML
|
26
|
+
|
27
|
+
== Copyright
|
28
|
+
|
29
|
+
Author:: wtnabe
|
30
|
+
Copyright:: Copyright (c) 2010 wtnabe
|
31
|
+
License:: Two-clause BSD
|
data/Rakefile
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# -*- mode: ruby -*-
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rake'
|
4
|
+
require 'rake/clean'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'rake/packagetask'
|
7
|
+
require 'rake/gempackagetask'
|
8
|
+
require 'rake/rdoctask'
|
9
|
+
require 'fileutils'
|
10
|
+
require 'lib/xls_html_cleaner'
|
11
|
+
include FileUtils
|
12
|
+
|
13
|
+
NAME = "xls_html_cleaner"
|
14
|
+
AUTHOR = "wtnabe"
|
15
|
+
EMAIL = "wtnabe@gmail.com"
|
16
|
+
DESCRIPTION = "Clean up your Excel generated HTML"
|
17
|
+
RUBYFORGE_PROJECT = "xls_html_cleaner"
|
18
|
+
HOMEPATH = "http://github.com/wtnabe/xls_html_cleaner"
|
19
|
+
BIN_FILES = %w( xls_html_cleaner )
|
20
|
+
|
21
|
+
VERS = XlsHtmlCleaner::VERSION
|
22
|
+
REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
23
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config']
|
24
|
+
RDOC_OPTS = [
|
25
|
+
'--title', "#{NAME} documentation",
|
26
|
+
"--charset", "utf-8",
|
27
|
+
"--opname", "index.html",
|
28
|
+
"--line-numbers",
|
29
|
+
"--main", "README",
|
30
|
+
"--inline-source",
|
31
|
+
]
|
32
|
+
|
33
|
+
task :default => [:test]
|
34
|
+
task :package => [:clean]
|
35
|
+
|
36
|
+
Rake::TestTask.new("test") do |t|
|
37
|
+
t.libs << "test"
|
38
|
+
t.pattern = "test/**/*_test.rb"
|
39
|
+
t.verbose = true
|
40
|
+
end
|
41
|
+
|
42
|
+
spec = Gem::Specification.new do |s|
|
43
|
+
s.name = NAME
|
44
|
+
s.version = VERS
|
45
|
+
s.platform = Gem::Platform::RUBY
|
46
|
+
s.has_rdoc = true
|
47
|
+
s.extra_rdoc_files = ["README", "ChangeLog"]
|
48
|
+
s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
|
49
|
+
s.summary = DESCRIPTION
|
50
|
+
s.description = DESCRIPTION
|
51
|
+
s.author = AUTHOR
|
52
|
+
s.email = EMAIL
|
53
|
+
s.homepage = HOMEPATH
|
54
|
+
s.executables = BIN_FILES
|
55
|
+
s.bindir = "bin"
|
56
|
+
s.require_path = "lib"
|
57
|
+
#s.autorequire = ""
|
58
|
+
s.test_files = Dir["test/*_test.rb"]
|
59
|
+
|
60
|
+
s.add_dependency('hpricot', '>=1.3.1')
|
61
|
+
#s.required_ruby_version = '>= 1.8.2'
|
62
|
+
|
63
|
+
s.files = %w(README ChangeLog Rakefile) +
|
64
|
+
Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
|
65
|
+
Dir.glob("ext/**/*.{h,c,rb}") +
|
66
|
+
Dir.glob("examples/**/*.rb") +
|
67
|
+
Dir.glob("tools/*.rb") +
|
68
|
+
Dir.glob("rails/*.rb")
|
69
|
+
|
70
|
+
s.extensions = FileList["ext/**/extconf.rb"].to_a
|
71
|
+
end
|
72
|
+
|
73
|
+
Rake::GemPackageTask.new(spec) do |p|
|
74
|
+
p.need_tar = true
|
75
|
+
p.gem_spec = spec
|
76
|
+
end
|
77
|
+
|
78
|
+
desc "install #{NAME}-#{VERS}.gem"
|
79
|
+
task :install do
|
80
|
+
name = "#{NAME}-#{VERS}.gem"
|
81
|
+
sh %{rake package}
|
82
|
+
sh %{sudo gem install pkg/#{name}}
|
83
|
+
end
|
84
|
+
|
85
|
+
desc "uninstall gem"
|
86
|
+
task :uninstall => [:clean] do
|
87
|
+
sh %{sudo gem uninstall #{NAME}}
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
Rake::RDocTask.new do |rdoc|
|
92
|
+
rdoc.rdoc_dir = 'html'
|
93
|
+
rdoc.options += RDOC_OPTS
|
94
|
+
rdoc.template = "resh"
|
95
|
+
#rdoc.template = "#{ENV['template']}.rb" if ENV['template']
|
96
|
+
if ENV['DOC_FILES']
|
97
|
+
rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
|
98
|
+
else
|
99
|
+
rdoc.rdoc_files.include('README', 'ChangeLog')
|
100
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
101
|
+
rdoc.rdoc_files.include('ext/**/*.c')
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
desc 'Show information about the gem.'
|
106
|
+
task :debug_gem do
|
107
|
+
puts spec.to_ruby
|
108
|
+
end
|
109
|
+
|
110
|
+
desc 'Update gem spec'
|
111
|
+
task :gemspec do
|
112
|
+
open("#{NAME}.gemspec", 'w').write spec.to_ruby
|
113
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems' unless defined? RubyGems
|
2
|
+
require 'hpricot'
|
3
|
+
|
4
|
+
class XlsHtmlCleaner
|
5
|
+
VERSION = '0.0.1'
|
6
|
+
|
7
|
+
ALLOW_TAGS = %w( html head title body
|
8
|
+
table thead tbody tfoot tr th td col colgroup )
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@parser = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
def clean!( str )
|
15
|
+
@parser = Hpricot( str )
|
16
|
+
@parser.traverse_all_element { |e|
|
17
|
+
if ( e.elem? and !ALLOW_TAGS.include?( e.name.downcase ) )
|
18
|
+
e.swap( e.inner_html.size > 0 ? e.inner_html : "\n" )
|
19
|
+
end
|
20
|
+
if ( e.comment? )
|
21
|
+
e.swap( "\n" )
|
22
|
+
end
|
23
|
+
if ( e.elem? )
|
24
|
+
e.attributes.to_hash.each_key { |a|
|
25
|
+
e.remove_attribute( a )
|
26
|
+
}
|
27
|
+
end
|
28
|
+
}.to_s
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
|
2
|
+
|
3
|
+
<HTML>
|
4
|
+
<HEAD>
|
5
|
+
|
6
|
+
<META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=utf-8">
|
7
|
+
<TITLE></TITLE>
|
8
|
+
<META NAME="GENERATOR" CONTENT="OpenOffice.org 3.1 (Unix)">
|
9
|
+
<META NAME="CREATED" CONTENT="20100213;23342400">
|
10
|
+
<META NAME="CHANGED" CONTENT="0;0">
|
11
|
+
|
12
|
+
<STYLE>
|
13
|
+
<!--
|
14
|
+
BODY,DIV,TABLE,THEAD,TBODY,TFOOT,TR,TH,TD,P { font-family:"ヒラギノ角ゴ ProN W3"; font-size:x-small }
|
15
|
+
-->
|
16
|
+
</STYLE>
|
17
|
+
|
18
|
+
</HEAD>
|
19
|
+
|
20
|
+
<BODY TEXT="#000000">
|
21
|
+
<TABLE FRAME=VOID CELLSPACING=0 COLS=3 RULES=NONE BORDER=0>
|
22
|
+
<COLGROUP><COL WIDTH=100><COL WIDTH=100><COL WIDTH=100></COLGROUP>
|
23
|
+
<TBODY>
|
24
|
+
<TR>
|
25
|
+
<TD WIDTH=100 HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">a</FONT></TD>
|
26
|
+
<TD WIDTH=100 ALIGN=LEFT><FONT FACE="Arial">d</FONT></TD>
|
27
|
+
<TD WIDTH=100 ALIGN=LEFT><FONT FACE="Arial">g</FONT></TD>
|
28
|
+
</TR>
|
29
|
+
<TR>
|
30
|
+
<TD HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">b</FONT></TD>
|
31
|
+
<TD ALIGN=LEFT><FONT FACE="Arial">e</FONT></TD>
|
32
|
+
<TD ALIGN=LEFT><FONT FACE="Arial">h</FONT></TD>
|
33
|
+
</TR>
|
34
|
+
<TR>
|
35
|
+
<TD HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">c</FONT></TD>
|
36
|
+
<TD ALIGN=LEFT><FONT FACE="Arial">f</FONT></TD>
|
37
|
+
<TD ALIGN=LEFT><FONT FACE="Arial">I</FONT></TD>
|
38
|
+
</TR>
|
39
|
+
</TBODY>
|
40
|
+
</TABLE>
|
41
|
+
<!-- ************************************************************************** -->
|
42
|
+
</BODY>
|
43
|
+
|
44
|
+
</HTML>
|
@@ -0,0 +1,48 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
|
2
|
+
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
<title></title>
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body>
|
24
|
+
<table>
|
25
|
+
<colgroup><col /><col /><col /></colgroup>
|
26
|
+
<tbody>
|
27
|
+
<tr>
|
28
|
+
<td>a</td>
|
29
|
+
<td>d</td>
|
30
|
+
<td>g</td>
|
31
|
+
</tr>
|
32
|
+
<tr>
|
33
|
+
<td>b</td>
|
34
|
+
<td>e</td>
|
35
|
+
<td>h</td>
|
36
|
+
</tr>
|
37
|
+
<tr>
|
38
|
+
<td>c</td>
|
39
|
+
<td>f</td>
|
40
|
+
<td>I</td>
|
41
|
+
</tr>
|
42
|
+
</tbody>
|
43
|
+
</table>
|
44
|
+
|
45
|
+
|
46
|
+
</body>
|
47
|
+
|
48
|
+
</html>
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper.rb'
|
2
|
+
|
3
|
+
require "test/unit"
|
4
|
+
class XlsHtmlCleanerTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
@obj = XlsHtmlCleaner.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_clean!
|
10
|
+
Dir.glob( File.dirname( __FILE__ ) + '/html/*' ) { |html|
|
11
|
+
begin
|
12
|
+
open( File.join( File.dirname( __FILE__ ),
|
13
|
+
'output',
|
14
|
+
File.basename( html ) ), 'wb' ) { |f|
|
15
|
+
f.write( @obj.clean!( open( html ).read ) )
|
16
|
+
}
|
17
|
+
assert( true )
|
18
|
+
rescue e
|
19
|
+
raise e
|
20
|
+
end
|
21
|
+
}
|
22
|
+
end
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xls_html_cleaner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- wtnabe
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-15 00:00:00 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.3.1
|
24
|
+
version:
|
25
|
+
description: Clean up your Excel generated HTML
|
26
|
+
email: wtnabe@gmail.com
|
27
|
+
executables:
|
28
|
+
- xls_html_cleaner
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README
|
33
|
+
- ChangeLog
|
34
|
+
files:
|
35
|
+
- README
|
36
|
+
- ChangeLog
|
37
|
+
- Rakefile
|
38
|
+
- bin/xls_html_cleaner
|
39
|
+
- test/html/simple_ooo.html
|
40
|
+
- test/output/simple_ooo.html
|
41
|
+
- test/test_helper.rb
|
42
|
+
- test/xls_html_cleaner_test.rb
|
43
|
+
- lib/xls_html_cleaner.rb
|
44
|
+
has_rdoc: true
|
45
|
+
homepage: http://github.com/wtnabe/xls_html_cleaner
|
46
|
+
licenses: []
|
47
|
+
|
48
|
+
post_install_message:
|
49
|
+
rdoc_options:
|
50
|
+
- --title
|
51
|
+
- xls_html_cleaner documentation
|
52
|
+
- --charset
|
53
|
+
- utf-8
|
54
|
+
- --opname
|
55
|
+
- index.html
|
56
|
+
- --line-numbers
|
57
|
+
- --main
|
58
|
+
- README
|
59
|
+
- --inline-source
|
60
|
+
- --exclude
|
61
|
+
- ^(examples|extras)/
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: "0"
|
75
|
+
version:
|
76
|
+
requirements: []
|
77
|
+
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 1.3.5
|
80
|
+
signing_key:
|
81
|
+
specification_version: 3
|
82
|
+
summary: Clean up your Excel generated HTML
|
83
|
+
test_files:
|
84
|
+
- test/xls_html_cleaner_test.rb
|