tidy 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +40 -0
- data/MANIFEST +12 -0
- data/README.txt.en +35 -0
- data/VERSION +1 -0
- data/lib/tidy.rb +65 -0
- data/lib/tidy/tidybuf.rb +42 -0
- data/lib/tidy/tidyerr.rb +30 -0
- data/lib/tidy/tidylib.rb +115 -0
- data/lib/tidy/tidyobj.rb +103 -0
- data/lib/tidy/tidyopt.rb +31 -0
- data/test/usage.rb +13 -0
- data/tidy.gemspec +13 -0
- metadata +50 -0
data/CHANGES
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
V 1.0.0
|
2
|
+
|
3
|
+
Oct/18/2004 - Declared stable, now packaged as a RubyGem
|
4
|
+
|
5
|
+
VB.2.5
|
6
|
+
|
7
|
+
- Auto-detection and ENV variables removed, adds too much complexity
|
8
|
+
Module location is now specified by defining $TIDYLIB before require 'tidy'
|
9
|
+
$TIDYLIB is the system path to the library (ex: /usr/lib/tidylib.so)
|
10
|
+
Raises an error if $TIDYLIB is not defined
|
11
|
+
|
12
|
+
V.B2.4
|
13
|
+
|
14
|
+
Now uses ENV['TIDY_LIB'] instead of $TIDY_LIB
|
15
|
+
|
16
|
+
V B2.3
|
17
|
+
|
18
|
+
- Added library auto-detection
|
19
|
+
a) use $TIDY_LIB if defined
|
20
|
+
b) Search $: and ENV['PATH'] paths for (tidy|tidylib|libtidy|htmltidy).(dll|so)
|
21
|
+
c) Raise an error if not found, otherwise loads
|
22
|
+
|
23
|
+
V.B2.2
|
24
|
+
|
25
|
+
- Tidy.to_b added
|
26
|
+
|
27
|
+
V.B2.1
|
28
|
+
|
29
|
+
- load_config method added
|
30
|
+
|
31
|
+
V.B2
|
32
|
+
|
33
|
+
- load(path) method added, library must be loaded explicity
|
34
|
+
this avoids hard coding the library name
|
35
|
+
require 'tidy'
|
36
|
+
Tidy.load('path/to/tidylib.so')
|
37
|
+
|
38
|
+
V.B1
|
39
|
+
|
40
|
+
First release
|
data/MANIFEST
ADDED
data/README.txt.en
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
Tidy README
|
2
|
+
============
|
3
|
+
|
4
|
+
Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
|
5
|
+
|
6
|
+
Requirements
|
7
|
+
------------
|
8
|
+
|
9
|
+
* Recent version of Ruby
|
10
|
+
* RubyGems 1.8+ (http://rubygems.rubyforge.org)
|
11
|
+
* HTML Tidy Library (compiled)
|
12
|
+
|
13
|
+
Install
|
14
|
+
-------
|
15
|
+
|
16
|
+
- Download library from http://tidy.sf.net (pre-compiled versions available)
|
17
|
+
|
18
|
+
- Download the tidy .gem file to a directory. Then type:
|
19
|
+
|
20
|
+
$ gem install tidy .gem
|
21
|
+
|
22
|
+
- Open test/usage.rb, edit $TIDYLIB to point to your compiled tidy library, run.
|
23
|
+
|
24
|
+
Usage
|
25
|
+
-----
|
26
|
+
|
27
|
+
See API docs
|
28
|
+
|
29
|
+
License
|
30
|
+
-------
|
31
|
+
|
32
|
+
Distributes under the same terms as Ruby
|
33
|
+
http://www.ruby-lang.org/en/LICENSE.txt
|
34
|
+
|
35
|
+
Kevin Howe <kh at newclear.ca>
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
data/lib/tidy.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
|
2
|
+
#
|
3
|
+
# = Usage
|
4
|
+
#
|
5
|
+
# $TIDYLIB = '/usr/lib/tidylib.so'
|
6
|
+
# require 'rubygems'
|
7
|
+
# require_gem 'tidy'
|
8
|
+
# html = '<html><title>title</title>Body</html>'
|
9
|
+
# xml = Tidy.open(:show_warnings=>true) do |tidy|
|
10
|
+
# tidy.options.output_xml = true
|
11
|
+
# puts tidy.options.show_warnings
|
12
|
+
# xml = tidy.clean(html)
|
13
|
+
# puts tidy.errors
|
14
|
+
# puts tidy.diagnostics
|
15
|
+
# xml
|
16
|
+
# end
|
17
|
+
# puts xml
|
18
|
+
#
|
19
|
+
# Author:: Kevin Howe
|
20
|
+
# License:: Distributes under the same terms as Ruby
|
21
|
+
#
|
22
|
+
module Tidy
|
23
|
+
|
24
|
+
require 'dl/import'
|
25
|
+
require 'dl/struct'
|
26
|
+
require 'tidy/tidybuf'
|
27
|
+
require 'tidy/tidyerr'
|
28
|
+
require 'tidy/tidylib'
|
29
|
+
require 'tidy/tidyobj'
|
30
|
+
require 'tidy/tidyopt'
|
31
|
+
|
32
|
+
module_function
|
33
|
+
|
34
|
+
# Return a Tidyobj instance
|
35
|
+
#
|
36
|
+
def new(options=nil)
|
37
|
+
Tidyobj.new(options)
|
38
|
+
end
|
39
|
+
|
40
|
+
# With no block, open is a synonym for Tidy.new .
|
41
|
+
# If a block is present, it is passed aTidy as a parameter.
|
42
|
+
# aTidyObj.release is ensured at end of the block
|
43
|
+
#
|
44
|
+
def open(options=nil)
|
45
|
+
tidy = Tidy.new(options)
|
46
|
+
if block_given?
|
47
|
+
begin
|
48
|
+
yield tidy
|
49
|
+
ensure
|
50
|
+
tidy.release
|
51
|
+
end
|
52
|
+
else
|
53
|
+
tidy
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Convert to boolean.
|
58
|
+
# 0, false and nil return false, anything else true
|
59
|
+
#
|
60
|
+
def to_b(value)
|
61
|
+
return false if [0,false,nil].include?(value)
|
62
|
+
true
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
data/lib/tidy/tidybuf.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# Buffer structure
|
2
|
+
#
|
3
|
+
class Tidybuf
|
4
|
+
|
5
|
+
extend DL::Importable
|
6
|
+
|
7
|
+
# Access TidyBuffer instance
|
8
|
+
#
|
9
|
+
attr_reader(:struct)
|
10
|
+
|
11
|
+
# Mimic TidyBuffer
|
12
|
+
#
|
13
|
+
TidyBuffer = struct [
|
14
|
+
"byte* bp",
|
15
|
+
"uint size",
|
16
|
+
"uint allocated",
|
17
|
+
"uint next"
|
18
|
+
]
|
19
|
+
|
20
|
+
def initialize()
|
21
|
+
@struct = TidyBuffer.malloc
|
22
|
+
end
|
23
|
+
|
24
|
+
# Free current contents and zero out
|
25
|
+
#
|
26
|
+
def free()
|
27
|
+
Tidylib.buf_free(@struct)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Convert to array
|
31
|
+
#
|
32
|
+
def to_a
|
33
|
+
to_s.split("\r\n")
|
34
|
+
end
|
35
|
+
|
36
|
+
# Convert to string
|
37
|
+
#
|
38
|
+
def to_s
|
39
|
+
@struct.bp.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
data/lib/tidy/tidyerr.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# Parameterized error message
|
2
|
+
#
|
3
|
+
class Tidyerr < String
|
4
|
+
|
5
|
+
# Error parameter
|
6
|
+
#
|
7
|
+
attr_reader :severity, :line, :column, :message
|
8
|
+
|
9
|
+
# Create new instance
|
10
|
+
#
|
11
|
+
def initialize(error)
|
12
|
+
super(error.to_s)
|
13
|
+
parameterize
|
14
|
+
end
|
15
|
+
|
16
|
+
# Parse error message into parameters (where applicable)
|
17
|
+
#
|
18
|
+
def parameterize()
|
19
|
+
if to_str[0,4] == 'line'
|
20
|
+
tokens = to_str.split(' ', 7)
|
21
|
+
@severity = tokens[5][0,1] # W or E
|
22
|
+
@line = tokens[1].to_i
|
23
|
+
@column = tokens[3].to_i
|
24
|
+
@message = tokens[6]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
protected :parameterize
|
29
|
+
|
30
|
+
end
|
data/lib/tidy/tidylib.rb
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
# Ruby wrapper for HTML Tidy Library Project (http://tidy.sf.net)
|
2
|
+
#
|
3
|
+
module Tidylib
|
4
|
+
|
5
|
+
extend DL::Importable
|
6
|
+
|
7
|
+
module_function
|
8
|
+
|
9
|
+
# Load library (requires that $TIDYLIB be defined)
|
10
|
+
#
|
11
|
+
def load()
|
12
|
+
raise LoadError, 'Tidy requires that $TIDYLIB be defined' if $TIDYLIB.nil?
|
13
|
+
begin
|
14
|
+
dlload($TIDYLIB)
|
15
|
+
rescue
|
16
|
+
raise LoadError, sprintf('Unable to load %s', $TIDYLIB)
|
17
|
+
end
|
18
|
+
extern "void *tidyCreate()"
|
19
|
+
extern "void tidyBufFree(void*)"
|
20
|
+
extern "int tidyCleanAndRepair(void*)"
|
21
|
+
extern "int tidyLoadConfig(void*, char*)"
|
22
|
+
extern "int tidyOptGetIdForName(char*)"
|
23
|
+
extern "char tidyOptGetValue(void*, unsigned int)"
|
24
|
+
extern "int tidyOptParseValue(void*, char*, char*)"
|
25
|
+
extern "int tidyParseString(void*, char*)"
|
26
|
+
extern "void tidyRelease(void*)"
|
27
|
+
extern "char* tidyReleaseDate()"
|
28
|
+
extern "int tidyRunDiagnostics(void*)"
|
29
|
+
extern "int tidySaveBuffer(void*, void*)"
|
30
|
+
extern "int tidySetErrorBuffer(void*, void*)"
|
31
|
+
end
|
32
|
+
|
33
|
+
# tidyBufFree
|
34
|
+
#
|
35
|
+
def buf_free(buf)
|
36
|
+
tidyBufFree(buf)
|
37
|
+
end
|
38
|
+
|
39
|
+
# tidyCreate
|
40
|
+
#
|
41
|
+
def create()
|
42
|
+
tidyCreate()
|
43
|
+
end
|
44
|
+
|
45
|
+
# tidyCleanAndRepair
|
46
|
+
#
|
47
|
+
def clean_and_repair(doc)
|
48
|
+
tidyCleanAndRepair(doc)
|
49
|
+
end
|
50
|
+
|
51
|
+
# tidyLoadConfig
|
52
|
+
#
|
53
|
+
def load_config(doc, file)
|
54
|
+
tidyLoadConfig(doc, file.to_s)
|
55
|
+
end
|
56
|
+
|
57
|
+
# tidyOptParseValue
|
58
|
+
#
|
59
|
+
def opt_parse_value(doc, name, value)
|
60
|
+
tidyOptParseValue(doc, translate_name(name), value.to_s)
|
61
|
+
end
|
62
|
+
|
63
|
+
# tidyOptGetValue (returns true/false instead of 1/0)
|
64
|
+
#
|
65
|
+
def opt_get_value(doc, name)
|
66
|
+
value = tidyOptGetValue(doc, tidyOptGetIdForName(translate_name(name)))
|
67
|
+
Tidy.to_b(value)
|
68
|
+
end
|
69
|
+
|
70
|
+
# tidyParseString
|
71
|
+
#
|
72
|
+
def parse_string(doc, str)
|
73
|
+
tidyParseString(doc, str.to_s)
|
74
|
+
end
|
75
|
+
|
76
|
+
# tidyRelease
|
77
|
+
#
|
78
|
+
def release(doc)
|
79
|
+
tidyRelease(doc)
|
80
|
+
end
|
81
|
+
|
82
|
+
# tidyReleaseDate
|
83
|
+
#
|
84
|
+
def release_date()
|
85
|
+
tidyReleaseDate()
|
86
|
+
end
|
87
|
+
|
88
|
+
# tidyRunDiagnostics
|
89
|
+
#
|
90
|
+
def run_diagnostics(doc)
|
91
|
+
tidyRunDiagnostics(doc)
|
92
|
+
end
|
93
|
+
|
94
|
+
# tidySaveBuffer
|
95
|
+
#
|
96
|
+
def save_buffer(doc, buf)
|
97
|
+
tidySaveBuffer(doc, buf)
|
98
|
+
end
|
99
|
+
|
100
|
+
# tidySetErrorBuffer
|
101
|
+
#
|
102
|
+
def set_error_buffer(doc, buf)
|
103
|
+
tidySetErrorBuffer(doc, buf)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Convert to string replacing underscores with dashes.
|
107
|
+
# :output_xml becomes 'output-xml'
|
108
|
+
#
|
109
|
+
def translate_name(name)
|
110
|
+
name.to_s.sub('_', '-')
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
Tidylib.load
|
data/lib/tidy/tidyobj.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
# Ruby interface to Tidylib
|
2
|
+
#
|
3
|
+
class Tidyobj
|
4
|
+
|
5
|
+
# Diagnostics Buffer (Array of String)
|
6
|
+
#
|
7
|
+
attr_reader(:diagnostics)
|
8
|
+
|
9
|
+
# Access the tidy instance
|
10
|
+
#
|
11
|
+
attr_reader(:doc)
|
12
|
+
|
13
|
+
# Error Buffer (Array of Tidyerr)
|
14
|
+
#
|
15
|
+
attr_reader(:errors)
|
16
|
+
|
17
|
+
# Options interface (Tidyopt)
|
18
|
+
#
|
19
|
+
attr_reader(:options)
|
20
|
+
|
21
|
+
# Construct a new instance.
|
22
|
+
# Receives a hash of options to be set
|
23
|
+
#
|
24
|
+
def initialize(options=nil)
|
25
|
+
@diagnostics = Array.new
|
26
|
+
@doc = Tidylib.create
|
27
|
+
@errors = Array.new
|
28
|
+
@errbuf = Tidybuf.new
|
29
|
+
@outbuf = Tidybuf.new
|
30
|
+
@options = Tidyopt.new(@doc)
|
31
|
+
rc = Tidylib.set_error_buffer(@doc, @errbuf.struct)
|
32
|
+
verify_severe(rc)
|
33
|
+
unless options.nil?
|
34
|
+
options.each { |name, value| Tidylib.opt_parse_value(@doc, name, value) }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Clean and Repair
|
39
|
+
#
|
40
|
+
def clean(str)
|
41
|
+
verify_doc
|
42
|
+
rc = -1
|
43
|
+
|
44
|
+
# Clean and repair the string
|
45
|
+
#
|
46
|
+
rc = Tidylib.parse_string(@doc, str) # Parse the input
|
47
|
+
rc = Tidylib.clean_and_repair(@doc) if rc >= 0 # Tidy it up!
|
48
|
+
rc = (Tidylib.opt_parse_value(@doc, :force_output, true) == 1 ? rc : -1) if rc > 1 # If error, force output
|
49
|
+
rc = Tidylib.save_buffer(@doc, @outbuf.struct) if rc >= 0 # Pretty Print
|
50
|
+
verify_severe(rc)
|
51
|
+
|
52
|
+
# Save and clear output/errors
|
53
|
+
#
|
54
|
+
output = @outbuf.to_s
|
55
|
+
@errors = @errbuf.to_a.collect { |e| Tidyerr.new(e) }
|
56
|
+
@outbuf.free
|
57
|
+
@errbuf.free
|
58
|
+
|
59
|
+
# Save diagnostics
|
60
|
+
#
|
61
|
+
rc = Tidylib.run_diagnostics(@doc)
|
62
|
+
verify_severe(rc)
|
63
|
+
@diagnostics = @errbuf.to_a
|
64
|
+
@errbuf.free
|
65
|
+
|
66
|
+
output
|
67
|
+
end
|
68
|
+
|
69
|
+
# Load a tidy config file
|
70
|
+
#
|
71
|
+
def load_config(file)
|
72
|
+
verify_doc
|
73
|
+
rc = Tidylib.load_config(@doc, file)
|
74
|
+
case rc
|
75
|
+
when -1 then raise LoadError, sprintf('%s does not exist', file)
|
76
|
+
when 1 then raise LoadError, sprintf('errors parsing %s', file)
|
77
|
+
end
|
78
|
+
rc
|
79
|
+
end
|
80
|
+
|
81
|
+
# Clear the tidy instance
|
82
|
+
#
|
83
|
+
def release()
|
84
|
+
verify_doc
|
85
|
+
Tidylib.release(@doc)
|
86
|
+
@doc = nil
|
87
|
+
end
|
88
|
+
|
89
|
+
# Raise an error if the tidy document is invalid
|
90
|
+
#
|
91
|
+
def verify_doc()
|
92
|
+
raise TypeError, 'Invalid Tidy document' unless @doc.class == DL::PtrData
|
93
|
+
end
|
94
|
+
|
95
|
+
# Raise severe error based on tidy status value
|
96
|
+
#
|
97
|
+
def verify_severe(rc)
|
98
|
+
raise sprintf("A severe error (%d) occurred.\n", rc) if rc < 0
|
99
|
+
end
|
100
|
+
|
101
|
+
protected :verify_doc, :verify_severe
|
102
|
+
|
103
|
+
end
|
data/lib/tidy/tidyopt.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Ruby interface to Tidylib options
|
2
|
+
#
|
3
|
+
class Tidyopt
|
4
|
+
|
5
|
+
# Construct a new instance
|
6
|
+
#
|
7
|
+
def initialize(doc)
|
8
|
+
@doc = doc
|
9
|
+
end
|
10
|
+
|
11
|
+
# Reader for options (Hash syntax)
|
12
|
+
#
|
13
|
+
def [](name)
|
14
|
+
Tidylib.opt_get_value(@doc, name)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Writer for options (Hash syntax)
|
18
|
+
#
|
19
|
+
def []=(name, value)
|
20
|
+
Tidylib.opt_parse_value(@doc, name, value)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Reader/Writer for options (Object syntax)
|
24
|
+
#
|
25
|
+
def method_missing(name, value=:none, *args)
|
26
|
+
name = name.to_s.sub('=', '')
|
27
|
+
return self[name] if value == :none
|
28
|
+
self[name] = value
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
data/test/usage.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$TIDYLIB = '/usr/lib/tidylib.so'
|
2
|
+
require 'rubygems'
|
3
|
+
require_gem 'tidy'
|
4
|
+
html = '<html><title>title</title>Body</html>'
|
5
|
+
xml = Tidy.open(:show_warnings=>true) do |tidy|
|
6
|
+
tidy.options.output_xml = true
|
7
|
+
puts tidy.options.show_warnings
|
8
|
+
xml = tidy.clean(html)
|
9
|
+
puts tidy.errors
|
10
|
+
puts tidy.diagnostics
|
11
|
+
xml
|
12
|
+
end
|
13
|
+
puts xml
|
data/tidy.gemspec
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
spec = Gem::Specification.new do |s|
|
3
|
+
s.name = "tidy"
|
4
|
+
s.version = File.read("VERSION").strip
|
5
|
+
s.summary = "Ruby interface to HTML Tidy Library Project"
|
6
|
+
s.description = s.summary+" (http://tidy.sf.net)"
|
7
|
+
s.files = Dir.glob("**/*").delete_if { |item| item.include?("CVS") }
|
8
|
+
s.require_path = 'lib'
|
9
|
+
s.autorequire = 'tidy'
|
10
|
+
s.has_rdoc = true
|
11
|
+
s.author = "Kevin Howe"
|
12
|
+
s.homepage = "http://www.newclear.ca/ruby/tidy/"
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.1
|
3
|
+
specification_version: 1
|
4
|
+
name: tidy
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2004-11-04
|
8
|
+
summary: Ruby interface to HTML Tidy Library Project
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
author: Kevin Howe
|
12
|
+
email:
|
13
|
+
homepage: http://www.newclear.ca/ruby/tidy/
|
14
|
+
rubyforge_project:
|
15
|
+
description: Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
|
16
|
+
autorequire: tidy
|
17
|
+
default_executable:
|
18
|
+
bindir: bin
|
19
|
+
has_rdoc: true
|
20
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
+
requirements:
|
22
|
+
-
|
23
|
+
- ">"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 0.0.0
|
26
|
+
version:
|
27
|
+
platform: ruby
|
28
|
+
files:
|
29
|
+
- lib
|
30
|
+
- test
|
31
|
+
- CHANGES
|
32
|
+
- MANIFEST
|
33
|
+
- README.txt.en
|
34
|
+
- VERSION
|
35
|
+
- tidy.gemspec
|
36
|
+
- lib/tidy
|
37
|
+
- lib/tidy.rb
|
38
|
+
- lib/tidy/tidyopt.rb
|
39
|
+
- lib/tidy/tidyobj.rb
|
40
|
+
- lib/tidy/tidylib.rb
|
41
|
+
- lib/tidy/tidyerr.rb
|
42
|
+
- lib/tidy/tidybuf.rb
|
43
|
+
- test/usage.rb
|
44
|
+
test_files: []
|
45
|
+
rdoc_options: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
executables: []
|
48
|
+
extensions: []
|
49
|
+
requirements: []
|
50
|
+
dependencies: []
|