tidy 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +40 -0
- data/MANIFEST +12 -0
- data/README.txt.en +35 -0
- data/VERSION +1 -0
- data/lib/tidy.rb +65 -0
- data/lib/tidy/tidybuf.rb +42 -0
- data/lib/tidy/tidyerr.rb +30 -0
- data/lib/tidy/tidylib.rb +115 -0
- data/lib/tidy/tidyobj.rb +103 -0
- data/lib/tidy/tidyopt.rb +31 -0
- data/test/usage.rb +13 -0
- data/tidy.gemspec +13 -0
- metadata +50 -0
data/CHANGES
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
V 1.0.0
|
2
|
+
|
3
|
+
Oct/18/2004 - Declared stable, now packaged as a RubyGem
|
4
|
+
|
5
|
+
VB.2.5
|
6
|
+
|
7
|
+
- Auto-detection and ENV variables removed, adds too much complexity
|
8
|
+
Module location is now specified by defining $TIDYLIB before require 'tidy'
|
9
|
+
$TIDYLIB is the system path to the library (ex: /usr/lib/tidylib.so)
|
10
|
+
Raises an error if $TIDYLIB is not defined
|
11
|
+
|
12
|
+
V.B2.4
|
13
|
+
|
14
|
+
Now uses ENV['TIDY_LIB'] instead of $TIDY_LIB
|
15
|
+
|
16
|
+
V B2.3
|
17
|
+
|
18
|
+
- Added library auto-detection
|
19
|
+
a) use $TIDY_LIB if defined
|
20
|
+
b) Search $: and ENV['PATH'] paths for (tidy|tidylib|libtidy|htmltidy).(dll|so)
|
21
|
+
c) Raise an error if not found, otherwise loads
|
22
|
+
|
23
|
+
V.B2.2
|
24
|
+
|
25
|
+
- Tidy.to_b added
|
26
|
+
|
27
|
+
V.B2.1
|
28
|
+
|
29
|
+
- load_config method added
|
30
|
+
|
31
|
+
V.B2
|
32
|
+
|
33
|
+
- load(path) method added, library must be loaded explicity
|
34
|
+
this avoids hard coding the library name
|
35
|
+
require 'tidy'
|
36
|
+
Tidy.load('path/to/tidylib.so')
|
37
|
+
|
38
|
+
V.B1
|
39
|
+
|
40
|
+
First release
|
data/MANIFEST
ADDED
data/README.txt.en
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
Tidy README
|
2
|
+
============
|
3
|
+
|
4
|
+
Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
|
5
|
+
|
6
|
+
Requirements
|
7
|
+
------------
|
8
|
+
|
9
|
+
* Recent version of Ruby
|
10
|
+
* RubyGems 1.8+ (http://rubygems.rubyforge.org)
|
11
|
+
* HTML Tidy Library (compiled)
|
12
|
+
|
13
|
+
Install
|
14
|
+
-------
|
15
|
+
|
16
|
+
- Download library from http://tidy.sf.net (pre-compiled versions available)
|
17
|
+
|
18
|
+
- Download the tidy .gem file to a directory. Then type:
|
19
|
+
|
20
|
+
$ gem install tidy .gem
|
21
|
+
|
22
|
+
- Open test/usage.rb, edit $TIDYLIB to point to your compiled tidy library, run.
|
23
|
+
|
24
|
+
Usage
|
25
|
+
-----
|
26
|
+
|
27
|
+
See API docs
|
28
|
+
|
29
|
+
License
|
30
|
+
-------
|
31
|
+
|
32
|
+
Distributes under the same terms as Ruby
|
33
|
+
http://www.ruby-lang.org/en/LICENSE.txt
|
34
|
+
|
35
|
+
Kevin Howe <kh at newclear.ca>
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
data/lib/tidy.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
|
2
|
+
#
|
3
|
+
# = Usage
|
4
|
+
#
|
5
|
+
# $TIDYLIB = '/usr/lib/tidylib.so'
|
6
|
+
# require 'rubygems'
|
7
|
+
# require_gem 'tidy'
|
8
|
+
# html = '<html><title>title</title>Body</html>'
|
9
|
+
# xml = Tidy.open(:show_warnings=>true) do |tidy|
|
10
|
+
# tidy.options.output_xml = true
|
11
|
+
# puts tidy.options.show_warnings
|
12
|
+
# xml = tidy.clean(html)
|
13
|
+
# puts tidy.errors
|
14
|
+
# puts tidy.diagnostics
|
15
|
+
# xml
|
16
|
+
# end
|
17
|
+
# puts xml
|
18
|
+
#
|
19
|
+
# Author:: Kevin Howe
|
20
|
+
# License:: Distributes under the same terms as Ruby
|
21
|
+
#
|
22
|
+
module Tidy
|
23
|
+
|
24
|
+
require 'dl/import'
|
25
|
+
require 'dl/struct'
|
26
|
+
require 'tidy/tidybuf'
|
27
|
+
require 'tidy/tidyerr'
|
28
|
+
require 'tidy/tidylib'
|
29
|
+
require 'tidy/tidyobj'
|
30
|
+
require 'tidy/tidyopt'
|
31
|
+
|
32
|
+
module_function
|
33
|
+
|
34
|
+
# Return a Tidyobj instance
|
35
|
+
#
|
36
|
+
def new(options=nil)
|
37
|
+
Tidyobj.new(options)
|
38
|
+
end
|
39
|
+
|
40
|
+
# With no block, open is a synonym for Tidy.new .
|
41
|
+
# If a block is present, it is passed aTidy as a parameter.
|
42
|
+
# aTidyObj.release is ensured at end of the block
|
43
|
+
#
|
44
|
+
def open(options=nil)
|
45
|
+
tidy = Tidy.new(options)
|
46
|
+
if block_given?
|
47
|
+
begin
|
48
|
+
yield tidy
|
49
|
+
ensure
|
50
|
+
tidy.release
|
51
|
+
end
|
52
|
+
else
|
53
|
+
tidy
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Convert to boolean.
|
58
|
+
# 0, false and nil return false, anything else true
|
59
|
+
#
|
60
|
+
def to_b(value)
|
61
|
+
return false if [0,false,nil].include?(value)
|
62
|
+
true
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
data/lib/tidy/tidybuf.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# Buffer structure
|
2
|
+
#
|
3
|
+
class Tidybuf
|
4
|
+
|
5
|
+
extend DL::Importable
|
6
|
+
|
7
|
+
# Access TidyBuffer instance
|
8
|
+
#
|
9
|
+
attr_reader(:struct)
|
10
|
+
|
11
|
+
# Mimic TidyBuffer
|
12
|
+
#
|
13
|
+
TidyBuffer = struct [
|
14
|
+
"byte* bp",
|
15
|
+
"uint size",
|
16
|
+
"uint allocated",
|
17
|
+
"uint next"
|
18
|
+
]
|
19
|
+
|
20
|
+
def initialize()
|
21
|
+
@struct = TidyBuffer.malloc
|
22
|
+
end
|
23
|
+
|
24
|
+
# Free current contents and zero out
|
25
|
+
#
|
26
|
+
def free()
|
27
|
+
Tidylib.buf_free(@struct)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Convert to array
|
31
|
+
#
|
32
|
+
def to_a
|
33
|
+
to_s.split("\r\n")
|
34
|
+
end
|
35
|
+
|
36
|
+
# Convert to string
|
37
|
+
#
|
38
|
+
def to_s
|
39
|
+
@struct.bp.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
data/lib/tidy/tidyerr.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# Parameterized error message
|
2
|
+
#
|
3
|
+
class Tidyerr < String
|
4
|
+
|
5
|
+
# Error parameter
|
6
|
+
#
|
7
|
+
attr_reader :severity, :line, :column, :message
|
8
|
+
|
9
|
+
# Create new instance
|
10
|
+
#
|
11
|
+
def initialize(error)
|
12
|
+
super(error.to_s)
|
13
|
+
parameterize
|
14
|
+
end
|
15
|
+
|
16
|
+
# Parse error message into parameters (where applicable)
|
17
|
+
#
|
18
|
+
def parameterize()
|
19
|
+
if to_str[0,4] == 'line'
|
20
|
+
tokens = to_str.split(' ', 7)
|
21
|
+
@severity = tokens[5][0,1] # W or E
|
22
|
+
@line = tokens[1].to_i
|
23
|
+
@column = tokens[3].to_i
|
24
|
+
@message = tokens[6]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
protected :parameterize
|
29
|
+
|
30
|
+
end
|
data/lib/tidy/tidylib.rb
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
# Ruby wrapper for HTML Tidy Library Project (http://tidy.sf.net)
|
2
|
+
#
|
3
|
+
module Tidylib
|
4
|
+
|
5
|
+
extend DL::Importable
|
6
|
+
|
7
|
+
module_function
|
8
|
+
|
9
|
+
# Load library (requires that $TIDYLIB be defined)
|
10
|
+
#
|
11
|
+
def load()
|
12
|
+
raise LoadError, 'Tidy requires that $TIDYLIB be defined' if $TIDYLIB.nil?
|
13
|
+
begin
|
14
|
+
dlload($TIDYLIB)
|
15
|
+
rescue
|
16
|
+
raise LoadError, sprintf('Unable to load %s', $TIDYLIB)
|
17
|
+
end
|
18
|
+
extern "void *tidyCreate()"
|
19
|
+
extern "void tidyBufFree(void*)"
|
20
|
+
extern "int tidyCleanAndRepair(void*)"
|
21
|
+
extern "int tidyLoadConfig(void*, char*)"
|
22
|
+
extern "int tidyOptGetIdForName(char*)"
|
23
|
+
extern "char tidyOptGetValue(void*, unsigned int)"
|
24
|
+
extern "int tidyOptParseValue(void*, char*, char*)"
|
25
|
+
extern "int tidyParseString(void*, char*)"
|
26
|
+
extern "void tidyRelease(void*)"
|
27
|
+
extern "char* tidyReleaseDate()"
|
28
|
+
extern "int tidyRunDiagnostics(void*)"
|
29
|
+
extern "int tidySaveBuffer(void*, void*)"
|
30
|
+
extern "int tidySetErrorBuffer(void*, void*)"
|
31
|
+
end
|
32
|
+
|
33
|
+
# tidyBufFree
|
34
|
+
#
|
35
|
+
def buf_free(buf)
|
36
|
+
tidyBufFree(buf)
|
37
|
+
end
|
38
|
+
|
39
|
+
# tidyCreate
|
40
|
+
#
|
41
|
+
def create()
|
42
|
+
tidyCreate()
|
43
|
+
end
|
44
|
+
|
45
|
+
# tidyCleanAndRepair
|
46
|
+
#
|
47
|
+
def clean_and_repair(doc)
|
48
|
+
tidyCleanAndRepair(doc)
|
49
|
+
end
|
50
|
+
|
51
|
+
# tidyLoadConfig
|
52
|
+
#
|
53
|
+
def load_config(doc, file)
|
54
|
+
tidyLoadConfig(doc, file.to_s)
|
55
|
+
end
|
56
|
+
|
57
|
+
# tidyOptParseValue
|
58
|
+
#
|
59
|
+
def opt_parse_value(doc, name, value)
|
60
|
+
tidyOptParseValue(doc, translate_name(name), value.to_s)
|
61
|
+
end
|
62
|
+
|
63
|
+
# tidyOptGetValue (returns true/false instead of 1/0)
|
64
|
+
#
|
65
|
+
def opt_get_value(doc, name)
|
66
|
+
value = tidyOptGetValue(doc, tidyOptGetIdForName(translate_name(name)))
|
67
|
+
Tidy.to_b(value)
|
68
|
+
end
|
69
|
+
|
70
|
+
# tidyParseString
|
71
|
+
#
|
72
|
+
def parse_string(doc, str)
|
73
|
+
tidyParseString(doc, str.to_s)
|
74
|
+
end
|
75
|
+
|
76
|
+
# tidyRelease
|
77
|
+
#
|
78
|
+
def release(doc)
|
79
|
+
tidyRelease(doc)
|
80
|
+
end
|
81
|
+
|
82
|
+
# tidyReleaseDate
|
83
|
+
#
|
84
|
+
def release_date()
|
85
|
+
tidyReleaseDate()
|
86
|
+
end
|
87
|
+
|
88
|
+
# tidyRunDiagnostics
|
89
|
+
#
|
90
|
+
def run_diagnostics(doc)
|
91
|
+
tidyRunDiagnostics(doc)
|
92
|
+
end
|
93
|
+
|
94
|
+
# tidySaveBuffer
|
95
|
+
#
|
96
|
+
def save_buffer(doc, buf)
|
97
|
+
tidySaveBuffer(doc, buf)
|
98
|
+
end
|
99
|
+
|
100
|
+
# tidySetErrorBuffer
|
101
|
+
#
|
102
|
+
def set_error_buffer(doc, buf)
|
103
|
+
tidySetErrorBuffer(doc, buf)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Convert to string replacing underscores with dashes.
|
107
|
+
# :output_xml becomes 'output-xml'
|
108
|
+
#
|
109
|
+
def translate_name(name)
|
110
|
+
name.to_s.sub('_', '-')
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
Tidylib.load
|
data/lib/tidy/tidyobj.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
# Ruby interface to Tidylib
|
2
|
+
#
|
3
|
+
class Tidyobj
|
4
|
+
|
5
|
+
# Diagnostics Buffer (Array of String)
|
6
|
+
#
|
7
|
+
attr_reader(:diagnostics)
|
8
|
+
|
9
|
+
# Access the tidy instance
|
10
|
+
#
|
11
|
+
attr_reader(:doc)
|
12
|
+
|
13
|
+
# Error Buffer (Array of Tidyerr)
|
14
|
+
#
|
15
|
+
attr_reader(:errors)
|
16
|
+
|
17
|
+
# Options interface (Tidyopt)
|
18
|
+
#
|
19
|
+
attr_reader(:options)
|
20
|
+
|
21
|
+
# Construct a new instance.
|
22
|
+
# Receives a hash of options to be set
|
23
|
+
#
|
24
|
+
def initialize(options=nil)
|
25
|
+
@diagnostics = Array.new
|
26
|
+
@doc = Tidylib.create
|
27
|
+
@errors = Array.new
|
28
|
+
@errbuf = Tidybuf.new
|
29
|
+
@outbuf = Tidybuf.new
|
30
|
+
@options = Tidyopt.new(@doc)
|
31
|
+
rc = Tidylib.set_error_buffer(@doc, @errbuf.struct)
|
32
|
+
verify_severe(rc)
|
33
|
+
unless options.nil?
|
34
|
+
options.each { |name, value| Tidylib.opt_parse_value(@doc, name, value) }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Clean and Repair
|
39
|
+
#
|
40
|
+
def clean(str)
|
41
|
+
verify_doc
|
42
|
+
rc = -1
|
43
|
+
|
44
|
+
# Clean and repair the string
|
45
|
+
#
|
46
|
+
rc = Tidylib.parse_string(@doc, str) # Parse the input
|
47
|
+
rc = Tidylib.clean_and_repair(@doc) if rc >= 0 # Tidy it up!
|
48
|
+
rc = (Tidylib.opt_parse_value(@doc, :force_output, true) == 1 ? rc : -1) if rc > 1 # If error, force output
|
49
|
+
rc = Tidylib.save_buffer(@doc, @outbuf.struct) if rc >= 0 # Pretty Print
|
50
|
+
verify_severe(rc)
|
51
|
+
|
52
|
+
# Save and clear output/errors
|
53
|
+
#
|
54
|
+
output = @outbuf.to_s
|
55
|
+
@errors = @errbuf.to_a.collect { |e| Tidyerr.new(e) }
|
56
|
+
@outbuf.free
|
57
|
+
@errbuf.free
|
58
|
+
|
59
|
+
# Save diagnostics
|
60
|
+
#
|
61
|
+
rc = Tidylib.run_diagnostics(@doc)
|
62
|
+
verify_severe(rc)
|
63
|
+
@diagnostics = @errbuf.to_a
|
64
|
+
@errbuf.free
|
65
|
+
|
66
|
+
output
|
67
|
+
end
|
68
|
+
|
69
|
+
# Load a tidy config file
|
70
|
+
#
|
71
|
+
def load_config(file)
|
72
|
+
verify_doc
|
73
|
+
rc = Tidylib.load_config(@doc, file)
|
74
|
+
case rc
|
75
|
+
when -1 then raise LoadError, sprintf('%s does not exist', file)
|
76
|
+
when 1 then raise LoadError, sprintf('errors parsing %s', file)
|
77
|
+
end
|
78
|
+
rc
|
79
|
+
end
|
80
|
+
|
81
|
+
# Clear the tidy instance
|
82
|
+
#
|
83
|
+
def release()
|
84
|
+
verify_doc
|
85
|
+
Tidylib.release(@doc)
|
86
|
+
@doc = nil
|
87
|
+
end
|
88
|
+
|
89
|
+
# Raise an error if the tidy document is invalid
|
90
|
+
#
|
91
|
+
def verify_doc()
|
92
|
+
raise TypeError, 'Invalid Tidy document' unless @doc.class == DL::PtrData
|
93
|
+
end
|
94
|
+
|
95
|
+
# Raise severe error based on tidy status value
|
96
|
+
#
|
97
|
+
def verify_severe(rc)
|
98
|
+
raise sprintf("A severe error (%d) occurred.\n", rc) if rc < 0
|
99
|
+
end
|
100
|
+
|
101
|
+
protected :verify_doc, :verify_severe
|
102
|
+
|
103
|
+
end
|
data/lib/tidy/tidyopt.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Ruby interface to Tidylib options
|
2
|
+
#
|
3
|
+
class Tidyopt
|
4
|
+
|
5
|
+
# Construct a new instance
|
6
|
+
#
|
7
|
+
def initialize(doc)
|
8
|
+
@doc = doc
|
9
|
+
end
|
10
|
+
|
11
|
+
# Reader for options (Hash syntax)
|
12
|
+
#
|
13
|
+
def [](name)
|
14
|
+
Tidylib.opt_get_value(@doc, name)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Writer for options (Hash syntax)
|
18
|
+
#
|
19
|
+
def []=(name, value)
|
20
|
+
Tidylib.opt_parse_value(@doc, name, value)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Reader/Writer for options (Object syntax)
|
24
|
+
#
|
25
|
+
def method_missing(name, value=:none, *args)
|
26
|
+
name = name.to_s.sub('=', '')
|
27
|
+
return self[name] if value == :none
|
28
|
+
self[name] = value
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
data/test/usage.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$TIDYLIB = '/usr/lib/tidylib.so'
|
2
|
+
require 'rubygems'
|
3
|
+
require_gem 'tidy'
|
4
|
+
html = '<html><title>title</title>Body</html>'
|
5
|
+
xml = Tidy.open(:show_warnings=>true) do |tidy|
|
6
|
+
tidy.options.output_xml = true
|
7
|
+
puts tidy.options.show_warnings
|
8
|
+
xml = tidy.clean(html)
|
9
|
+
puts tidy.errors
|
10
|
+
puts tidy.diagnostics
|
11
|
+
xml
|
12
|
+
end
|
13
|
+
puts xml
|
data/tidy.gemspec
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
spec = Gem::Specification.new do |s|
|
3
|
+
s.name = "tidy"
|
4
|
+
s.version = File.read("VERSION").strip
|
5
|
+
s.summary = "Ruby interface to HTML Tidy Library Project"
|
6
|
+
s.description = s.summary+" (http://tidy.sf.net)"
|
7
|
+
s.files = Dir.glob("**/*").delete_if { |item| item.include?("CVS") }
|
8
|
+
s.require_path = 'lib'
|
9
|
+
s.autorequire = 'tidy'
|
10
|
+
s.has_rdoc = true
|
11
|
+
s.author = "Kevin Howe"
|
12
|
+
s.homepage = "http://www.newclear.ca/ruby/tidy/"
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.1
|
3
|
+
specification_version: 1
|
4
|
+
name: tidy
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2004-11-04
|
8
|
+
summary: Ruby interface to HTML Tidy Library Project
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
author: Kevin Howe
|
12
|
+
email:
|
13
|
+
homepage: http://www.newclear.ca/ruby/tidy/
|
14
|
+
rubyforge_project:
|
15
|
+
description: Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
|
16
|
+
autorequire: tidy
|
17
|
+
default_executable:
|
18
|
+
bindir: bin
|
19
|
+
has_rdoc: true
|
20
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
+
requirements:
|
22
|
+
-
|
23
|
+
- ">"
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 0.0.0
|
26
|
+
version:
|
27
|
+
platform: ruby
|
28
|
+
files:
|
29
|
+
- lib
|
30
|
+
- test
|
31
|
+
- CHANGES
|
32
|
+
- MANIFEST
|
33
|
+
- README.txt.en
|
34
|
+
- VERSION
|
35
|
+
- tidy.gemspec
|
36
|
+
- lib/tidy
|
37
|
+
- lib/tidy.rb
|
38
|
+
- lib/tidy/tidyopt.rb
|
39
|
+
- lib/tidy/tidyobj.rb
|
40
|
+
- lib/tidy/tidylib.rb
|
41
|
+
- lib/tidy/tidyerr.rb
|
42
|
+
- lib/tidy/tidybuf.rb
|
43
|
+
- test/usage.rb
|
44
|
+
test_files: []
|
45
|
+
rdoc_options: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
executables: []
|
48
|
+
extensions: []
|
49
|
+
requirements: []
|
50
|
+
dependencies: []
|