tidy 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGES ADDED
@@ -0,0 +1,40 @@
1
+ V 1.0.0
2
+
3
+ Oct/18/2004 - Declared stable, now packaged as a RubyGem
4
+
5
+ VB.2.5
6
+
7
+ - Auto-detection and ENV variables removed, adds too much complexity
8
+ Module location is now specified by defining $TIDYLIB before require 'tidy'
9
+ $TIDYLIB is the system path to the library (ex: /usr/lib/tidylib.so)
10
+ Raises an error if $TIDYLIB is not defined
11
+
12
+ V.B2.4
13
+
14
+ Now uses ENV['TIDY_LIB'] instead of $TIDY_LIB
15
+
16
+ V B2.3
17
+
18
+ - Added library auto-detection
19
+ a) use $TIDY_LIB if defined
20
+ b) Search $: and ENV['PATH'] paths for (tidy|tidylib|libtidy|htmltidy).(dll|so)
21
+ c) Raise an error if not found, otherwise loads
22
+
23
+ V.B2.2
24
+
25
+ - Tidy.to_b added
26
+
27
+ V.B2.1
28
+
29
+ - load_config method added
30
+
31
+ V.B2
32
+
33
+ - load(path) method added, library must be loaded explicity
34
+ this avoids hard coding the library name
35
+ require 'tidy'
36
+ Tidy.load('path/to/tidylib.so')
37
+
38
+ V.B1
39
+
40
+ First release
@@ -0,0 +1,12 @@
1
+ lib/tidy.rb
2
+ lib/tidy/tidybuf.rb
3
+ lib/tidy/tidyerr.rb
4
+ lib/tidy/tidylib.rb
5
+ lib/tidy/tidyobj.rb
6
+ lib/tidy/tidyopt.rb
7
+ test/usage.rb
8
+ CHANGES
9
+ MANIFEST
10
+ README.txt.en
11
+ tidy.gemspec
12
+ VERSION
@@ -0,0 +1,35 @@
1
+ Tidy README
2
+ ============
3
+
4
+ Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
5
+
6
+ Requirements
7
+ ------------
8
+
9
+ * Recent version of Ruby
10
+ * RubyGems 1.8+ (http://rubygems.rubyforge.org)
11
+ * HTML Tidy Library (compiled)
12
+
13
+ Install
14
+ -------
15
+
16
+ - Download library from http://tidy.sf.net (pre-compiled versions available)
17
+
18
+ - Download the tidy .gem file to a directory. Then type:
19
+
20
+ $ gem install tidy .gem
21
+
22
+ - Open test/usage.rb, edit $TIDYLIB to point to your compiled tidy library, run.
23
+
24
+ Usage
25
+ -----
26
+
27
+ See API docs
28
+
29
+ License
30
+ -------
31
+
32
+ Distributes under the same terms as Ruby
33
+ http://www.ruby-lang.org/en/LICENSE.txt
34
+
35
+ Kevin Howe <kh at newclear.ca>
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,65 @@
1
+ # Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
2
+ #
3
+ # = Usage
4
+ #
5
+ # $TIDYLIB = '/usr/lib/tidylib.so'
6
+ # require 'rubygems'
7
+ # require_gem 'tidy'
8
+ # html = '<html><title>title</title>Body</html>'
9
+ # xml = Tidy.open(:show_warnings=>true) do |tidy|
10
+ # tidy.options.output_xml = true
11
+ # puts tidy.options.show_warnings
12
+ # xml = tidy.clean(html)
13
+ # puts tidy.errors
14
+ # puts tidy.diagnostics
15
+ # xml
16
+ # end
17
+ # puts xml
18
+ #
19
+ # Author:: Kevin Howe
20
+ # License:: Distributes under the same terms as Ruby
21
+ #
22
+ module Tidy
23
+
24
+ require 'dl/import'
25
+ require 'dl/struct'
26
+ require 'tidy/tidybuf'
27
+ require 'tidy/tidyerr'
28
+ require 'tidy/tidylib'
29
+ require 'tidy/tidyobj'
30
+ require 'tidy/tidyopt'
31
+
32
+ module_function
33
+
34
+ # Return a Tidyobj instance
35
+ #
36
+ def new(options=nil)
37
+ Tidyobj.new(options)
38
+ end
39
+
40
+ # With no block, open is a synonym for Tidy.new .
41
+ # If a block is present, it is passed aTidy as a parameter.
42
+ # aTidyObj.release is ensured at end of the block
43
+ #
44
+ def open(options=nil)
45
+ tidy = Tidy.new(options)
46
+ if block_given?
47
+ begin
48
+ yield tidy
49
+ ensure
50
+ tidy.release
51
+ end
52
+ else
53
+ tidy
54
+ end
55
+ end
56
+
57
+ # Convert to boolean.
58
+ # 0, false and nil return false, anything else true
59
+ #
60
+ def to_b(value)
61
+ return false if [0,false,nil].include?(value)
62
+ true
63
+ end
64
+
65
+ end
@@ -0,0 +1,42 @@
1
+ # Buffer structure
2
+ #
3
+ class Tidybuf
4
+
5
+ extend DL::Importable
6
+
7
+ # Access TidyBuffer instance
8
+ #
9
+ attr_reader(:struct)
10
+
11
+ # Mimic TidyBuffer
12
+ #
13
+ TidyBuffer = struct [
14
+ "byte* bp",
15
+ "uint size",
16
+ "uint allocated",
17
+ "uint next"
18
+ ]
19
+
20
+ def initialize()
21
+ @struct = TidyBuffer.malloc
22
+ end
23
+
24
+ # Free current contents and zero out
25
+ #
26
+ def free()
27
+ Tidylib.buf_free(@struct)
28
+ end
29
+
30
+ # Convert to array
31
+ #
32
+ def to_a
33
+ to_s.split("\r\n")
34
+ end
35
+
36
+ # Convert to string
37
+ #
38
+ def to_s
39
+ @struct.bp.to_s
40
+ end
41
+
42
+ end
@@ -0,0 +1,30 @@
1
+ # Parameterized error message
2
+ #
3
+ class Tidyerr < String
4
+
5
+ # Error parameter
6
+ #
7
+ attr_reader :severity, :line, :column, :message
8
+
9
+ # Create new instance
10
+ #
11
+ def initialize(error)
12
+ super(error.to_s)
13
+ parameterize
14
+ end
15
+
16
+ # Parse error message into parameters (where applicable)
17
+ #
18
+ def parameterize()
19
+ if to_str[0,4] == 'line'
20
+ tokens = to_str.split(' ', 7)
21
+ @severity = tokens[5][0,1] # W or E
22
+ @line = tokens[1].to_i
23
+ @column = tokens[3].to_i
24
+ @message = tokens[6]
25
+ end
26
+ end
27
+
28
+ protected :parameterize
29
+
30
+ end
@@ -0,0 +1,115 @@
1
+ # Ruby wrapper for HTML Tidy Library Project (http://tidy.sf.net)
2
+ #
3
+ module Tidylib
4
+
5
+ extend DL::Importable
6
+
7
+ module_function
8
+
9
+ # Load library (requires that $TIDYLIB be defined)
10
+ #
11
+ def load()
12
+ raise LoadError, 'Tidy requires that $TIDYLIB be defined' if $TIDYLIB.nil?
13
+ begin
14
+ dlload($TIDYLIB)
15
+ rescue
16
+ raise LoadError, sprintf('Unable to load %s', $TIDYLIB)
17
+ end
18
+ extern "void *tidyCreate()"
19
+ extern "void tidyBufFree(void*)"
20
+ extern "int tidyCleanAndRepair(void*)"
21
+ extern "int tidyLoadConfig(void*, char*)"
22
+ extern "int tidyOptGetIdForName(char*)"
23
+ extern "char tidyOptGetValue(void*, unsigned int)"
24
+ extern "int tidyOptParseValue(void*, char*, char*)"
25
+ extern "int tidyParseString(void*, char*)"
26
+ extern "void tidyRelease(void*)"
27
+ extern "char* tidyReleaseDate()"
28
+ extern "int tidyRunDiagnostics(void*)"
29
+ extern "int tidySaveBuffer(void*, void*)"
30
+ extern "int tidySetErrorBuffer(void*, void*)"
31
+ end
32
+
33
+ # tidyBufFree
34
+ #
35
+ def buf_free(buf)
36
+ tidyBufFree(buf)
37
+ end
38
+
39
+ # tidyCreate
40
+ #
41
+ def create()
42
+ tidyCreate()
43
+ end
44
+
45
+ # tidyCleanAndRepair
46
+ #
47
+ def clean_and_repair(doc)
48
+ tidyCleanAndRepair(doc)
49
+ end
50
+
51
+ # tidyLoadConfig
52
+ #
53
+ def load_config(doc, file)
54
+ tidyLoadConfig(doc, file.to_s)
55
+ end
56
+
57
+ # tidyOptParseValue
58
+ #
59
+ def opt_parse_value(doc, name, value)
60
+ tidyOptParseValue(doc, translate_name(name), value.to_s)
61
+ end
62
+
63
+ # tidyOptGetValue (returns true/false instead of 1/0)
64
+ #
65
+ def opt_get_value(doc, name)
66
+ value = tidyOptGetValue(doc, tidyOptGetIdForName(translate_name(name)))
67
+ Tidy.to_b(value)
68
+ end
69
+
70
+ # tidyParseString
71
+ #
72
+ def parse_string(doc, str)
73
+ tidyParseString(doc, str.to_s)
74
+ end
75
+
76
+ # tidyRelease
77
+ #
78
+ def release(doc)
79
+ tidyRelease(doc)
80
+ end
81
+
82
+ # tidyReleaseDate
83
+ #
84
+ def release_date()
85
+ tidyReleaseDate()
86
+ end
87
+
88
+ # tidyRunDiagnostics
89
+ #
90
+ def run_diagnostics(doc)
91
+ tidyRunDiagnostics(doc)
92
+ end
93
+
94
+ # tidySaveBuffer
95
+ #
96
+ def save_buffer(doc, buf)
97
+ tidySaveBuffer(doc, buf)
98
+ end
99
+
100
+ # tidySetErrorBuffer
101
+ #
102
+ def set_error_buffer(doc, buf)
103
+ tidySetErrorBuffer(doc, buf)
104
+ end
105
+
106
+ # Convert to string replacing underscores with dashes.
107
+ # :output_xml becomes 'output-xml'
108
+ #
109
+ def translate_name(name)
110
+ name.to_s.sub('_', '-')
111
+ end
112
+
113
+ end
114
+
115
+ Tidylib.load
@@ -0,0 +1,103 @@
1
+ # Ruby interface to Tidylib
2
+ #
3
+ class Tidyobj
4
+
5
+ # Diagnostics Buffer (Array of String)
6
+ #
7
+ attr_reader(:diagnostics)
8
+
9
+ # Access the tidy instance
10
+ #
11
+ attr_reader(:doc)
12
+
13
+ # Error Buffer (Array of Tidyerr)
14
+ #
15
+ attr_reader(:errors)
16
+
17
+ # Options interface (Tidyopt)
18
+ #
19
+ attr_reader(:options)
20
+
21
+ # Construct a new instance.
22
+ # Receives a hash of options to be set
23
+ #
24
+ def initialize(options=nil)
25
+ @diagnostics = Array.new
26
+ @doc = Tidylib.create
27
+ @errors = Array.new
28
+ @errbuf = Tidybuf.new
29
+ @outbuf = Tidybuf.new
30
+ @options = Tidyopt.new(@doc)
31
+ rc = Tidylib.set_error_buffer(@doc, @errbuf.struct)
32
+ verify_severe(rc)
33
+ unless options.nil?
34
+ options.each { |name, value| Tidylib.opt_parse_value(@doc, name, value) }
35
+ end
36
+ end
37
+
38
+ # Clean and Repair
39
+ #
40
+ def clean(str)
41
+ verify_doc
42
+ rc = -1
43
+
44
+ # Clean and repair the string
45
+ #
46
+ rc = Tidylib.parse_string(@doc, str) # Parse the input
47
+ rc = Tidylib.clean_and_repair(@doc) if rc >= 0 # Tidy it up!
48
+ rc = (Tidylib.opt_parse_value(@doc, :force_output, true) == 1 ? rc : -1) if rc > 1 # If error, force output
49
+ rc = Tidylib.save_buffer(@doc, @outbuf.struct) if rc >= 0 # Pretty Print
50
+ verify_severe(rc)
51
+
52
+ # Save and clear output/errors
53
+ #
54
+ output = @outbuf.to_s
55
+ @errors = @errbuf.to_a.collect { |e| Tidyerr.new(e) }
56
+ @outbuf.free
57
+ @errbuf.free
58
+
59
+ # Save diagnostics
60
+ #
61
+ rc = Tidylib.run_diagnostics(@doc)
62
+ verify_severe(rc)
63
+ @diagnostics = @errbuf.to_a
64
+ @errbuf.free
65
+
66
+ output
67
+ end
68
+
69
+ # Load a tidy config file
70
+ #
71
+ def load_config(file)
72
+ verify_doc
73
+ rc = Tidylib.load_config(@doc, file)
74
+ case rc
75
+ when -1 then raise LoadError, sprintf('%s does not exist', file)
76
+ when 1 then raise LoadError, sprintf('errors parsing %s', file)
77
+ end
78
+ rc
79
+ end
80
+
81
+ # Clear the tidy instance
82
+ #
83
+ def release()
84
+ verify_doc
85
+ Tidylib.release(@doc)
86
+ @doc = nil
87
+ end
88
+
89
+ # Raise an error if the tidy document is invalid
90
+ #
91
+ def verify_doc()
92
+ raise TypeError, 'Invalid Tidy document' unless @doc.class == DL::PtrData
93
+ end
94
+
95
+ # Raise severe error based on tidy status value
96
+ #
97
+ def verify_severe(rc)
98
+ raise sprintf("A severe error (%d) occurred.\n", rc) if rc < 0
99
+ end
100
+
101
+ protected :verify_doc, :verify_severe
102
+
103
+ end
@@ -0,0 +1,31 @@
1
+ # Ruby interface to Tidylib options
2
+ #
3
+ class Tidyopt
4
+
5
+ # Construct a new instance
6
+ #
7
+ def initialize(doc)
8
+ @doc = doc
9
+ end
10
+
11
+ # Reader for options (Hash syntax)
12
+ #
13
+ def [](name)
14
+ Tidylib.opt_get_value(@doc, name)
15
+ end
16
+
17
+ # Writer for options (Hash syntax)
18
+ #
19
+ def []=(name, value)
20
+ Tidylib.opt_parse_value(@doc, name, value)
21
+ end
22
+
23
+ # Reader/Writer for options (Object syntax)
24
+ #
25
+ def method_missing(name, value=:none, *args)
26
+ name = name.to_s.sub('=', '')
27
+ return self[name] if value == :none
28
+ self[name] = value
29
+ end
30
+
31
+ end
@@ -0,0 +1,13 @@
1
+ $TIDYLIB = '/usr/lib/tidylib.so'
2
+ require 'rubygems'
3
+ require_gem 'tidy'
4
+ html = '<html><title>title</title>Body</html>'
5
+ xml = Tidy.open(:show_warnings=>true) do |tidy|
6
+ tidy.options.output_xml = true
7
+ puts tidy.options.show_warnings
8
+ xml = tidy.clean(html)
9
+ puts tidy.errors
10
+ puts tidy.diagnostics
11
+ xml
12
+ end
13
+ puts xml
@@ -0,0 +1,13 @@
1
+ require 'rubygems'
2
+ spec = Gem::Specification.new do |s|
3
+ s.name = "tidy"
4
+ s.version = File.read("VERSION").strip
5
+ s.summary = "Ruby interface to HTML Tidy Library Project"
6
+ s.description = s.summary+" (http://tidy.sf.net)"
7
+ s.files = Dir.glob("**/*").delete_if { |item| item.include?("CVS") }
8
+ s.require_path = 'lib'
9
+ s.autorequire = 'tidy'
10
+ s.has_rdoc = true
11
+ s.author = "Kevin Howe"
12
+ s.homepage = "http://www.newclear.ca/ruby/tidy/"
13
+ end
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.1
3
+ specification_version: 1
4
+ name: tidy
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2004-11-04
8
+ summary: Ruby interface to HTML Tidy Library Project
9
+ require_paths:
10
+ - lib
11
+ author: Kevin Howe
12
+ email:
13
+ homepage: http://www.newclear.ca/ruby/tidy/
14
+ rubyforge_project:
15
+ description: Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
16
+ autorequire: tidy
17
+ default_executable:
18
+ bindir: bin
19
+ has_rdoc: true
20
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
21
+ requirements:
22
+ -
23
+ - ">"
24
+ - !ruby/object:Gem::Version
25
+ version: 0.0.0
26
+ version:
27
+ platform: ruby
28
+ files:
29
+ - lib
30
+ - test
31
+ - CHANGES
32
+ - MANIFEST
33
+ - README.txt.en
34
+ - VERSION
35
+ - tidy.gemspec
36
+ - lib/tidy
37
+ - lib/tidy.rb
38
+ - lib/tidy/tidyopt.rb
39
+ - lib/tidy/tidyobj.rb
40
+ - lib/tidy/tidylib.rb
41
+ - lib/tidy/tidyerr.rb
42
+ - lib/tidy/tidybuf.rb
43
+ - test/usage.rb
44
+ test_files: []
45
+ rdoc_options: []
46
+ extra_rdoc_files: []
47
+ executables: []
48
+ extensions: []
49
+ requirements: []
50
+ dependencies: []