xls_html_cleaner 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 / 2010-02-15
2
+
3
+ * initial release
4
+
data/README ADDED
@@ -0,0 +1,31 @@
1
+
2
+ = xls_html_cleaner
3
+
4
+
5
+ == Description
6
+
7
+ Clean up your Excel generated HTML
8
+
9
+ == Installation
10
+
11
+ === Archive Installation
12
+
13
+ rake install
14
+
15
+ === Gem Installation
16
+
17
+ gem install xls_html_cleaner
18
+
19
+
20
+ == Features/Problems
21
+
22
+
23
+ == Synopsis
24
+
25
+ $ xls_html_cleaner SRC_HTML > DEST_HTML
26
+
27
+ == Copyright
28
+
29
+ Author:: wtnabe
30
+ Copyright:: Copyright (c) 2010 wtnabe
31
+ License:: Two-clause BSD
data/Rakefile ADDED
@@ -0,0 +1,113 @@
1
+ # -*- mode: ruby -*-
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'rake/clean'
5
+ require 'rake/testtask'
6
+ require 'rake/packagetask'
7
+ require 'rake/gempackagetask'
8
+ require 'rake/rdoctask'
9
+ require 'fileutils'
10
+ require 'lib/xls_html_cleaner'
11
+ include FileUtils
12
+
13
+ NAME = "xls_html_cleaner"
14
+ AUTHOR = "wtnabe"
15
+ EMAIL = "wtnabe@gmail.com"
16
+ DESCRIPTION = "Clean up your Excel generated HTML"
17
+ RUBYFORGE_PROJECT = "xls_html_cleaner"
18
+ HOMEPATH = "http://github.com/wtnabe/xls_html_cleaner"
19
+ BIN_FILES = %w( xls_html_cleaner )
20
+
21
+ VERS = XlsHtmlCleaner::VERSION
22
+ REV = File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
23
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config']
24
+ RDOC_OPTS = [
25
+ '--title', "#{NAME} documentation",
26
+ "--charset", "utf-8",
27
+ "--opname", "index.html",
28
+ "--line-numbers",
29
+ "--main", "README",
30
+ "--inline-source",
31
+ ]
32
+
33
+ task :default => [:test]
34
+ task :package => [:clean]
35
+
36
+ Rake::TestTask.new("test") do |t|
37
+ t.libs << "test"
38
+ t.pattern = "test/**/*_test.rb"
39
+ t.verbose = true
40
+ end
41
+
42
+ spec = Gem::Specification.new do |s|
43
+ s.name = NAME
44
+ s.version = VERS
45
+ s.platform = Gem::Platform::RUBY
46
+ s.has_rdoc = true
47
+ s.extra_rdoc_files = ["README", "ChangeLog"]
48
+ s.rdoc_options += RDOC_OPTS + ['--exclude', '^(examples|extras)/']
49
+ s.summary = DESCRIPTION
50
+ s.description = DESCRIPTION
51
+ s.author = AUTHOR
52
+ s.email = EMAIL
53
+ s.homepage = HOMEPATH
54
+ s.executables = BIN_FILES
55
+ s.bindir = "bin"
56
+ s.require_path = "lib"
57
+ #s.autorequire = ""
58
+ s.test_files = Dir["test/*_test.rb"]
59
+
60
+ s.add_dependency('hpricot', '>=1.3.1')
61
+ #s.required_ruby_version = '>= 1.8.2'
62
+
63
+ s.files = %w(README ChangeLog Rakefile) +
64
+ Dir.glob("{bin,doc,test,lib,templates,generator,extras,website,script}/**/*") +
65
+ Dir.glob("ext/**/*.{h,c,rb}") +
66
+ Dir.glob("examples/**/*.rb") +
67
+ Dir.glob("tools/*.rb") +
68
+ Dir.glob("rails/*.rb")
69
+
70
+ s.extensions = FileList["ext/**/extconf.rb"].to_a
71
+ end
72
+
73
+ Rake::GemPackageTask.new(spec) do |p|
74
+ p.need_tar = true
75
+ p.gem_spec = spec
76
+ end
77
+
78
+ desc "install #{NAME}-#{VERS}.gem"
79
+ task :install do
80
+ name = "#{NAME}-#{VERS}.gem"
81
+ sh %{rake package}
82
+ sh %{sudo gem install pkg/#{name}}
83
+ end
84
+
85
+ desc "uninstall gem"
86
+ task :uninstall => [:clean] do
87
+ sh %{sudo gem uninstall #{NAME}}
88
+ end
89
+
90
+
91
+ Rake::RDocTask.new do |rdoc|
92
+ rdoc.rdoc_dir = 'html'
93
+ rdoc.options += RDOC_OPTS
94
+ rdoc.template = "resh"
95
+ #rdoc.template = "#{ENV['template']}.rb" if ENV['template']
96
+ if ENV['DOC_FILES']
97
+ rdoc.rdoc_files.include(ENV['DOC_FILES'].split(/,\s*/))
98
+ else
99
+ rdoc.rdoc_files.include('README', 'ChangeLog')
100
+ rdoc.rdoc_files.include('lib/**/*.rb')
101
+ rdoc.rdoc_files.include('ext/**/*.c')
102
+ end
103
+ end
104
+
105
+ desc 'Show information about the gem.'
106
+ task :debug_gem do
107
+ puts spec.to_ruby
108
+ end
109
+
110
+ desc 'Update gem spec'
111
+ task :gemspec do
112
+ open("#{NAME}.gemspec", 'w').write spec.to_ruby
113
+ end
@@ -0,0 +1,5 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require File.dirname( __FILE__ ) + '/../lib/xls_html_cleaner'
4
+
5
+ puts XlsHtmlCleaner.new.clean!( readlines.to_s )
@@ -0,0 +1,30 @@
1
+ require 'rubygems' unless defined? RubyGems
2
+ require 'hpricot'
3
+
4
+ class XlsHtmlCleaner
5
+ VERSION = '0.0.1'
6
+
7
+ ALLOW_TAGS = %w( html head title body
8
+ table thead tbody tfoot tr th td col colgroup )
9
+
10
+ def initialize
11
+ @parser = nil
12
+ end
13
+
14
+ def clean!( str )
15
+ @parser = Hpricot( str )
16
+ @parser.traverse_all_element { |e|
17
+ if ( e.elem? and !ALLOW_TAGS.include?( e.name.downcase ) )
18
+ e.swap( e.inner_html.size > 0 ? e.inner_html : "\n" )
19
+ end
20
+ if ( e.comment? )
21
+ e.swap( "\n" )
22
+ end
23
+ if ( e.elem? )
24
+ e.attributes.to_hash.each_key { |a|
25
+ e.remove_attribute( a )
26
+ }
27
+ end
28
+ }.to_s
29
+ end
30
+ end
@@ -0,0 +1,44 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
2
+
3
+ <HTML>
4
+ <HEAD>
5
+
6
+ <META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=utf-8">
7
+ <TITLE></TITLE>
8
+ <META NAME="GENERATOR" CONTENT="OpenOffice.org 3.1 (Unix)">
9
+ <META NAME="CREATED" CONTENT="20100213;23342400">
10
+ <META NAME="CHANGED" CONTENT="0;0">
11
+
12
+ <STYLE>
13
+ <!--
14
+ BODY,DIV,TABLE,THEAD,TBODY,TFOOT,TR,TH,TD,P { font-family:"ヒラギノ角ゴ ProN W3"; font-size:x-small }
15
+ -->
16
+ </STYLE>
17
+
18
+ </HEAD>
19
+
20
+ <BODY TEXT="#000000">
21
+ <TABLE FRAME=VOID CELLSPACING=0 COLS=3 RULES=NONE BORDER=0>
22
+ <COLGROUP><COL WIDTH=100><COL WIDTH=100><COL WIDTH=100></COLGROUP>
23
+ <TBODY>
24
+ <TR>
25
+ <TD WIDTH=100 HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">a</FONT></TD>
26
+ <TD WIDTH=100 ALIGN=LEFT><FONT FACE="Arial">d</FONT></TD>
27
+ <TD WIDTH=100 ALIGN=LEFT><FONT FACE="Arial">g</FONT></TD>
28
+ </TR>
29
+ <TR>
30
+ <TD HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">b</FONT></TD>
31
+ <TD ALIGN=LEFT><FONT FACE="Arial">e</FONT></TD>
32
+ <TD ALIGN=LEFT><FONT FACE="Arial">h</FONT></TD>
33
+ </TR>
34
+ <TR>
35
+ <TD HEIGHT=18 ALIGN=LEFT><FONT FACE="Arial">c</FONT></TD>
36
+ <TD ALIGN=LEFT><FONT FACE="Arial">f</FONT></TD>
37
+ <TD ALIGN=LEFT><FONT FACE="Arial">I</FONT></TD>
38
+ </TR>
39
+ </TBODY>
40
+ </TABLE>
41
+ <!-- ************************************************************************** -->
42
+ </BODY>
43
+
44
+ </HTML>
@@ -0,0 +1,48 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
2
+
3
+ <html>
4
+ <head>
5
+
6
+
7
+
8
+ <title></title>
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+ </head>
22
+
23
+ <body>
24
+ <table>
25
+ <colgroup><col /><col /><col /></colgroup>
26
+ <tbody>
27
+ <tr>
28
+ <td>a</td>
29
+ <td>d</td>
30
+ <td>g</td>
31
+ </tr>
32
+ <tr>
33
+ <td>b</td>
34
+ <td>e</td>
35
+ <td>h</td>
36
+ </tr>
37
+ <tr>
38
+ <td>c</td>
39
+ <td>f</td>
40
+ <td>I</td>
41
+ </tr>
42
+ </tbody>
43
+ </table>
44
+
45
+
46
+ </body>
47
+
48
+ </html>
@@ -0,0 +1,3 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/../lib/xls_html_cleaner'
3
+
@@ -0,0 +1,23 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ require "test/unit"
4
+ class XlsHtmlCleanerTest < Test::Unit::TestCase
5
+ def setup
6
+ @obj = XlsHtmlCleaner.new
7
+ end
8
+
9
+ def test_clean!
10
+ Dir.glob( File.dirname( __FILE__ ) + '/html/*' ) { |html|
11
+ begin
12
+ open( File.join( File.dirname( __FILE__ ),
13
+ 'output',
14
+ File.basename( html ) ), 'wb' ) { |f|
15
+ f.write( @obj.clean!( open( html ).read ) )
16
+ }
17
+ assert( true )
18
+ rescue e
19
+ raise e
20
+ end
21
+ }
22
+ end
23
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xls_html_cleaner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - wtnabe
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-15 00:00:00 +09:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.3.1
24
+ version:
25
+ description: Clean up your Excel generated HTML
26
+ email: wtnabe@gmail.com
27
+ executables:
28
+ - xls_html_cleaner
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README
33
+ - ChangeLog
34
+ files:
35
+ - README
36
+ - ChangeLog
37
+ - Rakefile
38
+ - bin/xls_html_cleaner
39
+ - test/html/simple_ooo.html
40
+ - test/output/simple_ooo.html
41
+ - test/test_helper.rb
42
+ - test/xls_html_cleaner_test.rb
43
+ - lib/xls_html_cleaner.rb
44
+ has_rdoc: true
45
+ homepage: http://github.com/wtnabe/xls_html_cleaner
46
+ licenses: []
47
+
48
+ post_install_message:
49
+ rdoc_options:
50
+ - --title
51
+ - xls_html_cleaner documentation
52
+ - --charset
53
+ - utf-8
54
+ - --opname
55
+ - index.html
56
+ - --line-numbers
57
+ - --main
58
+ - README
59
+ - --inline-source
60
+ - --exclude
61
+ - ^(examples|extras)/
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: "0"
75
+ version:
76
+ requirements: []
77
+
78
+ rubyforge_project:
79
+ rubygems_version: 1.3.5
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: Clean up your Excel generated HTML
83
+ test_files:
84
+ - test/xls_html_cleaner_test.rb