xls_html_cleaner 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 / 2010-02-15
2
+
3
+ * initial release
4
+
data/README ADDED
@@ -0,0 +1,31 @@
1
+
2
+ = xls_html_cleaner
3
+
4
+
5
+ == Description
6
+
7
+ Clean up your Excel generated HTML
8
+
9
+ == Installation
10
+
11
+ === Archive Installation
12
+
13
+ rake install
14
+
15
+ === Gem Installation
16
+
17
+ gem install xls_html_cleaner
18
+
19
+
20
+ == Features/Problems
21
+
22
+
23
+ == Synopsis
24
+
25
+ $ xls_html_cleaner SRC_HTML > DEST_HTML
26
+
27
+ == Copyright
28
+
29
+ Author:: wtnabe
30
+ Copyright:: Copyright (c) 2010 wtnabe
31
+ License:: Two-clause BSD
data/Rakefile ADDED
@@ -0,0 +1,113 @@
1
+ # -*- mode: ruby -*-
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'rake/clean'
5
+ require 'rake/testtask'
6
+ require 'rake/packagetask'
7
+ require 'rake/gempackagetask'
8
+ require 'rake/rdoctask'
9
+ require 'fileutils'
10
+ require 'lib/xls_html_cleaner'
11
+ include FileUtils
12
+
13
+ NAME = "xls_html_cleaner"
14
+ AUTHOR = "wtnabe"
15
+ EMAIL = "wtnabe@gmail.com"
16
+ DESCRIPTION = "Clean up your Excel generated HTML"
17
+ RUBYFORGE_PROJECT = "xls_html_cleaner"
18
+ HOMEPATH = "http://github.com/wtnabe/xls_html_cleaner"
19
+ BIN_FILES = %w( xls_html_cleaner )
20
+
21
+ VERS = XlsHtmlCleaner::VERSION
22
+ REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
23
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config']
24
+ RDOC_OPTS = [
25
+ '--title', "#{NAME} documentation",
26
+ "--charset", "utf-8",
27
+ "--opname", "index.html",
28
+ "--line-numbers",
29
+ "--main", "README",
30
+ "--inline-source",
31
+ ]
32
+
33
+ task :default => [:test]
34
+ task :package => [:clean]
35
+
36
+ Rake::TestTask.new("test") do |t|
37
+ t.libs << "test"
38
+ t.pattern = "test/**/*_test.rb"
39
+ t.verbose = true
40
+ end
41
+
42
+ spec = Gem::Specification.new do |s|
43
+ s.name = NAME
44
+ s.version = VERS
45
+ s.platform = Gem::Platform::RUBY
46
+ s.has_rdoc = true
47
+ s.extra_rdoc_files = ["README", "ChangeLog"]
48
+ s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
49
+ s.summary = DESCRIPTION
50
+ s.description = DESCRIPTION
51
+ s.author = AUTHOR
52
+ s.email = EMAIL
53
+ s.homepage = HOMEPATH
54
+ s.executables = BIN_FILES
55
+ s.bindir = "bin"
56
+ s.require_path = "lib"
57
+ #s.autorequire = ""
58
+ s.test_files = Dir["test/*_test.rb"]
59
+
60
+ s.add_dependency('hpricot', '>=1.3.1')
61
+ #s.required_ruby_version = '>= 1.8.2'
62
+
63
+ s.files = %w(README ChangeLog Rakefile) +
64
+ Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
65
+ Dir.glob("ext/**/*.{h,c,rb}") +
66
+ Dir.glob("examples/**/*.rb") +
67
+ Dir.glob("tools/*.rb") +
68
+ Dir.glob("rails/*.rb")
69
+
70
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
71
+ end
72
+
73
+ Rake::GemPackageTask.new(spec) do |p|
74
+ p.need_tar = true
75
+ p.gem_spec = spec
76
+ end
77
+
78
+ desc "install #{NAME}-#{VERS}.gem"
79
+ task :install do
80
+ name = "#{NAME}-#{VERS}.gem"
81
+ sh %{rake package}
82
+ sh %{sudo gem install pkg/#{name}}
83
+ end
84
+
85
+ desc "uninstall gem"
86
+ task :uninstall => [:clean] do
87
+ sh %{sudo gem uninstall #{NAME}}
88
+ end
89
+
90
+
91
+ Rake::RDocTask.new do |rdoc|
92
+ rdoc.rdoc_dir = 'html'
93
+ rdoc.options += RDOC_OPTS
94
+ rdoc.template = "resh"
95
+ #rdoc.template = "#{ENV['template']}.rb" if ENV['template']
96
+ if ENV['DOC_FILES']
97
+ rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
98
+ else
99
+ rdoc.rdoc_files.include('README', 'ChangeLog')
100
+ rdoc.rdoc_files.include('lib/**/*.rb')
101
+ rdoc.rdoc_files.include('ext/**/*.c')
102
+ end
103
+ end
104
+
105
+ desc 'Show information about the gem.'
106
+ task :debug_gem do
107
+ puts spec.to_ruby
108
+ end
109
+
110
+ desc 'Update gem spec'
111
+ task :gemspec do
112
+ open("#{NAME}.gemspec", 'w').write spec.to_ruby
113
+ end
@@ -0,0 +1,5 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require File.dirname( __FILE__ ) + '/../lib/xls_html_cleaner'
4
+
5
+ puts XlsHtmlCleaner.new.clean!( readlines.to_s )
@@ -0,0 +1,30 @@
1
+ require 'rubygems' unless defined? RubyGems
2
+ require 'hpricot'
3
+
4
+ class XlsHtmlCleaner
5
+ VERSION = '0.0.1'
6
+
7
+ ALLOW_TAGS = %w( html head title body
8
+ table thead tbody tfoot tr th td col colgroup )
9
+
10
+ def initialize
11
+ @parser = nil
12
+ end
13
+
14
+ def clean!( str )
15
+ @parser = Hpricot( str )
16
+ @parser.traverse_all_element { |e|
17
+ if ( e.elem? and !ALLOW_TAGS.include?( e.name.downcase ) )
18
+ e.swap( e.inner_html.size > 0 ? e.inner_html : "\n" )
19
+ end
20
+ if ( e.comment? )
21
+ e.swap( "\n" )
22
+ end
23
+ if ( e.elem? )
24
+ e.attributes.to_hash.each_key { |a|
25
+ e.remove_attribute( a )
26
+ }
27
+ end
28
+ }.to_s
29
+ end
30
+ end
@@ -0,0 +1,44 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
2
+
3
+ <HTML>
4
+ <HEAD>
5
+
6
+ <META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=utf-8">
7
+ <TITLE></TITLE>
8
+ <META NAME="GENERATOR" CONTENT="OpenOffice.org 3.1 (Unix)">
9
+ <META NAME="CREATED" CONTENT="20100213;23342400">
10
+ <META NAME="CHANGED" CONTENT="0;0">
11
+
12
+ <STYLE>
13
+ <!--
14
+ BODY,DIV,TABLE,THEAD,TBODY,TFOOT,TR,TH,TD,P { font-family:"ヒラギノ角ゴ ProN W3"; font-size:x-small }
15
+ -->
16
+ </STYLE>
17
+
18
+ </HEAD>
19
+
20
+ <BODY TEXT="#000000">
21
+ <TABLE FRAME=VOID CELLSPACING=0 COLS=3 RULES=NONE BORDER=0>
22
+ <COLGROUP><COL WIDTH=100><COL WIDTH=100><COL WIDTH=100></COLGROUP>
23
+ <TBODY>
24
+ <TR>
25
+ <TD WIDTH=100 HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">a</FONT></TD>
26
+ <TD WIDTH=100 ALIGN=LEFT><FONT FACE="Arial">d</FONT></TD>
27
+ <TD WIDTH=100 ALIGN=LEFT><FONT FACE="Arial">g</FONT></TD>
28
+ </TR>
29
+ <TR>
30
+ <TD HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">b</FONT></TD>
31
+ <TD ALIGN=LEFT><FONT FACE="Arial">e</FONT></TD>
32
+ <TD ALIGN=LEFT><FONT FACE="Arial">h</FONT></TD>
33
+ </TR>
34
+ <TR>
35
+ <TD HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">c</FONT></TD>
36
+ <TD ALIGN=LEFT><FONT FACE="Arial">f</FONT></TD>
37
+ <TD ALIGN=LEFT><FONT FACE="Arial">I</FONT></TD>
38
+ </TR>
39
+ </TBODY>
40
+ </TABLE>
41
+ <!-- ************************************************************************** -->
42
+ </BODY>
43
+
44
+ </HTML>
@@ -0,0 +1,48 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
2
+
3
+ <html>
4
+ <head>
5
+
6
+
7
+
8
+ <title></title>
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+ </head>
22
+
23
+ <body>
24
+ <table>
25
+ <colgroup><col /><col /><col /></colgroup>
26
+ <tbody>
27
+ <tr>
28
+ <td>a</td>
29
+ <td>d</td>
30
+ <td>g</td>
31
+ </tr>
32
+ <tr>
33
+ <td>b</td>
34
+ <td>e</td>
35
+ <td>h</td>
36
+ </tr>
37
+ <tr>
38
+ <td>c</td>
39
+ <td>f</td>
40
+ <td>I</td>
41
+ </tr>
42
+ </tbody>
43
+ </table>
44
+
45
+
46
+ </body>
47
+
48
+ </html>
@@ -0,0 +1,3 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/../lib/xls_html_cleaner'
3
+
@@ -0,0 +1,23 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ require "test/unit"
4
+ class XlsHtmlCleanerTest < Test::Unit::TestCase
5
+ def setup
6
+ @obj = XlsHtmlCleaner.new
7
+ end
8
+
9
+ def test_clean!
10
+ Dir.glob( File.dirname( __FILE__ ) + '/html/*' ) { |html|
11
+ begin
12
+ open( File.join( File.dirname( __FILE__ ),
13
+ 'output',
14
+ File.basename( html ) ), 'wb' ) { |f|
15
+ f.write( @obj.clean!( open( html ).read ) )
16
+ }
17
+ assert( true )
18
+ rescue e
19
+ raise e
20
+ end
21
+ }
22
+ end
23
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xls_html_cleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - wtnabe
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-15 00:00:00 +09:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.3.1
24
+ version:
25
+ description: Clean up your Excel generated HTML
26
+ email: wtnabe@gmail.com
27
+ executables:
28
+ - xls_html_cleaner
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README
33
+ - ChangeLog
34
+ files:
35
+ - README
36
+ - ChangeLog
37
+ - Rakefile
38
+ - bin/xls_html_cleaner
39
+ - test/html/simple_ooo.html
40
+ - test/output/simple_ooo.html
41
+ - test/test_helper.rb
42
+ - test/xls_html_cleaner_test.rb
43
+ - lib/xls_html_cleaner.rb
44
+ has_rdoc: true
45
+ homepage: http://github.com/wtnabe/xls_html_cleaner
46
+ licenses: []
47
+
48
+ post_install_message:
49
+ rdoc_options:
50
+ - --title
51
+ - xls_html_cleaner documentation
52
+ - --charset
53
+ - utf-8
54
+ - --opname
55
+ - index.html
56
+ - --line-numbers
57
+ - --main
58
+ - README
59
+ - --inline-source
60
+ - --exclude
61
+ - ^(examples|extras)/
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: "0"
75
+ version:
76
+ requirements: []
77
+
78
+ rubyforge_project:
79
+ rubygems_version: 1.3.5
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: Clean up your Excel generated HTML
83
+ test_files:
84
+ - test/xls_html_cleaner_test.rb