tidy-fork 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGES ADDED
@@ -0,0 +1,86 @@
1
+ V 1.1.4 2009/5/13
2
+ - Added a naive solution to having to do Tidy.path="/usr/lib/libtidy.so"
3
+ - Automatically deduces the loadpath
4
+
5
+ V 1.1.3 2008/10/20
6
+ - Introduced Tidy.fresh_tidy_version = bool flag to enable/disable support
7
+ for fresh Tidylib versions. Disabled by default, turn it on if you
8
+ experience seg faults.
9
+ - Segmentation fault when loading the TidyBuffer from the dynamic library.
10
+ Fix for 'tidybuf.rb:40: [BUG] Segmentation fault' error.
11
+ [Patch by Ben Walding (bwalding)]
12
+
13
+ - The line separator is hard-encoded instead of using the $/ magic variable,
14
+ so it breaks on unix platforms.
15
+ [Patch by Damien Merenne <dam@cosinux.org>]
16
+
17
+ V 1.1.2 2005/03/14
18
+
19
+ - Tidybuf.to_s bugfix: added nil check
20
+ @struct.bp ? @struct.bp.to_s(@struct.size) : ''
21
+
22
+ V 1.1.1 2005/03/07
23
+
24
+ - Bugfix
25
+
26
+ Tidybuf.to_s assumes that TidyBuffer.bp is null-terminated. This is
27
+ only true if the length of the data is evenly divisable by 256, but
28
+ if Tidy output is e.g. 512 bytes long the buffer is not null-terminated
29
+ and the code reads out of bounds. Result: garbage chars in output.
30
+
31
+ Fix: Code changed to read only TidyBuffer.size bytes from the buffer.
32
+ In TidyBuffer#to_s: @struct.bp.to_s => @struct.bp.to_s(@struct.size)
33
+ '.'*345 = 512 char output (for testing)
34
+
35
+ V 1.1.0 2005/02/21
36
+
37
+ - $TIDYLIB global eliminated
38
+ Path to the library is now specified as follows:
39
+ Tidy.path = '/path/to/tidylib.so'
40
+
41
+ - minor code/docs/gemspec cleanup
42
+
43
+ V 1.0.1
44
+
45
+ .sub calls changed to .gsub
46
+
47
+ V 1.0.0
48
+
49
+ Oct/18/2004 - Declared stable, now packaged as a RubyGem
50
+
51
+ VB.2.5
52
+
53
+ - Auto-detection and ENV variables removed, adds too much complexity
54
+ Module location is now specified by defining $TIDYLIB before require 'tidy'
55
+ $TIDYLIB is the system path to the library (ex: /usr/lib/tidylib.so)
56
+ Raises an error if $TIDYLIB is not defined
57
+
58
+ V.B2.4
59
+
60
+ Now uses ENV['TIDY_LIB'] instead of $TIDY_LIB
61
+
62
+ V B2.3
63
+
64
+ - Added library auto-detection
65
+ a) use $TIDY_LIB if defined
66
+ b) Search $: and ENV['PATH'] paths for (tidy|tidylib|libtidy|htmltidy).(dll|so)
67
+ c) Raise an error if not found, otherwise loads
68
+
69
+ V.B2.2
70
+
71
+ - Tidy.to_b added
72
+
73
+ V.B2.1
74
+
75
+ - load_config method added
76
+
77
+ V.B2
78
+
79
+ - load(path) method added, library must be loaded explicity
80
+ this avoids hard coding the library name
81
+ require 'tidy'
82
+ Tidy.load('path/to/tidylib.so')
83
+
84
+ V.B1
85
+
86
+ First release
data/MANIFEST ADDED
@@ -0,0 +1,12 @@
1
+ lib/tidy.rb
2
+ lib/tidy/tidybuf.rb
3
+ lib/tidy/tidyerr.rb
4
+ lib/tidy/tidylib.rb
5
+ lib/tidy/tidyobj.rb
6
+ lib/tidy/tidyopt.rb
7
+ test/usage.rb
8
+ CHANGES
9
+ MANIFEST
10
+ README.txt.en
11
+ tidy.gemspec
12
+ VERSION
data/README.txt.en ADDED
@@ -0,0 +1,36 @@
1
+ Tidy README
2
+ ============
3
+
4
+ Ruby interface to HTML Tidy Library Project (http://tidy.sf.net)
5
+
6
+ Requirements
7
+ ------------
8
+
9
+ * Recent version of Ruby
10
+ * RubyGems 1.8+ (http://rubygems.rubyforge.org)
11
+ * HTML Tidy Library (compiled)
12
+
13
+ Install
14
+ -------
15
+
16
+ - Download library from http://tidy.sf.net (pre-compiled versions available).
17
+
18
+ - Install files using one of the following:
19
+
20
+ $ gem install tidy-x-x-x.gem
21
+ $ ruby install.rb
22
+
23
+ - Open test/usage.rb, change Tidy.path to point to your compiled Tidy library, run.
24
+
25
+ Usage
26
+ -----
27
+
28
+ See API docs
29
+
30
+ License
31
+ -------
32
+
33
+ Distributes under the same terms as Ruby
34
+ http://www.ruby-lang.org/en/LICENSE.txt
35
+
36
+ Kevin Howe <kh at newclear.ca>
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.1.3
data/install.rb ADDED
@@ -0,0 +1,11 @@
1
+ require 'rbconfig'
2
+ require 'ftools'
3
+
4
+ # Install lib
5
+ dst_dir = Config::CONFIG['sitelibdir']
6
+ Dir.chdir('lib') {
7
+ Dir['**/*'].reject { |f| f =~ /\.(cvs|gem|svn)($|\/)/i or not File.file?(f) }.each { |file|
8
+ File.mkpath File.join(dst_dir, File.dirname(file)), true
9
+ File.install file, File.join(dst_dir, file), 0644, true
10
+ }
11
+ }
@@ -0,0 +1,50 @@
1
+ # Buffer structure.
2
+ #
3
+ class Tidybuf
4
+
5
+ extend DL::Importable
6
+
7
+ # Access TidyBuffer instance.
8
+ #
9
+ attr_reader(:struct)
10
+
11
+ def initialize
12
+ @struct = self.class.struct(construct_tidy_buffer).malloc
13
+ Tidylib.buf_init(@struct)
14
+ end
15
+
16
+ # Free current contents and zero out.
17
+ #
18
+ def free
19
+ Tidylib.buf_free(@struct)
20
+ end
21
+
22
+ # Convert to array.
23
+ #
24
+ def to_a
25
+ to_s.split($/)
26
+ end
27
+
28
+ # Convert to string.
29
+ #
30
+ def to_s
31
+ @struct.bp ? @struct.bp.to_s(@struct.size) : ''
32
+ end
33
+
34
+ protected
35
+
36
+ def construct_tidy_buffer
37
+ struct_rows = [
38
+ "TidyAllocator* allocator",
39
+ "byte* bp",
40
+ "uint size",
41
+ "uint allocated",
42
+ "uint next"
43
+ ]
44
+ if Tidy.fresh_tidy_version
45
+ struct_rows.unshift "int* allocator"
46
+ end
47
+ struct_rows
48
+ end
49
+
50
+ end
@@ -0,0 +1,30 @@
1
+ # Parameterized error message.
2
+ #
3
+ class Tidyerr < String
4
+
5
+ # Error parameter.
6
+ #
7
+ attr_reader :severity, :line, :column, :message
8
+
9
+ # Create new instance.
10
+ #
11
+ def initialize(error)
12
+ super(error.to_s)
13
+ parameterize
14
+ end
15
+
16
+ # Parse error message into parameters (where applicable).
17
+ #
18
+ def parameterize
19
+ if to_str[0,4] == 'line'
20
+ tokens = to_str.split(' ', 7)
21
+ @severity = tokens[5][0,1] # W or E
22
+ @line = tokens[1].to_i
23
+ @column = tokens[3].to_i
24
+ @message = tokens[6]
25
+ end
26
+ end
27
+
28
+ protected :parameterize
29
+
30
+ end
@@ -0,0 +1,117 @@
1
+ # Ruby wrapper for HTML Tidy Library Project (http://tidy.sf.net).
2
+ #
3
+ module Tidylib
4
+
5
+ extend DL::Importable
6
+
7
+ module_function
8
+
9
+ # Load the library.
10
+ #
11
+ def load(path)
12
+ begin
13
+ dlload(path)
14
+ rescue
15
+ raise LoadError, "Unable to load #{path}"
16
+ end
17
+ extern "void *tidyCreate()"
18
+ extern "void tidyBufFree(void*)"
19
+ extern "void tidyBufInit(void*)"
20
+ extern "int tidyCleanAndRepair(void*)"
21
+ extern "int tidyLoadConfig(void*, char*)"
22
+ extern "int tidyOptGetIdForName(char*)"
23
+ extern "char tidyOptGetValue(void*, unsigned int)"
24
+ extern "int tidyOptParseValue(void*, char*, char*)"
25
+ extern "int tidyParseString(void*, char*)"
26
+ extern "void tidyRelease(void*)"
27
+ extern "char* tidyReleaseDate()"
28
+ extern "int tidyRunDiagnostics(void*)"
29
+ extern "int tidySaveBuffer(void*, void*)"
30
+ extern "int tidySetErrorBuffer(void*, void*)"
31
+ end
32
+
33
+ # tidyBufInit
34
+ def buf_init(buf)
35
+ tidyBufInit(buf)
36
+ end
37
+
38
+ # tidyBufFree
39
+ #
40
+ def buf_free(buf)
41
+ tidyBufFree(buf)
42
+ end
43
+
44
+ # tidyCreate
45
+ #
46
+ def create
47
+ tidyCreate
48
+ end
49
+
50
+ # tidyCleanAndRepair
51
+ #
52
+ def clean_and_repair(doc)
53
+ tidyCleanAndRepair(doc)
54
+ end
55
+
56
+ # tidyLoadConfig
57
+ #
58
+ def load_config(doc, file)
59
+ tidyLoadConfig(doc, file.to_s)
60
+ end
61
+
62
+ # tidyOptParseValue
63
+ #
64
+ def opt_parse_value(doc, name, value)
65
+ tidyOptParseValue(doc, translate_name(name), value.to_s)
66
+ end
67
+
68
+ # tidyOptGetValue (returns true/false instead of 1/0)
69
+ #
70
+ def opt_get_value(doc, name)
71
+ value = tidyOptGetValue(doc, tidyOptGetIdForName(translate_name(name)))
72
+ Tidy.to_b(value)
73
+ end
74
+
75
+ # tidyParseString
76
+ #
77
+ def parse_string(doc, str)
78
+ tidyParseString(doc, str.to_s)
79
+ end
80
+
81
+ # tidyRelease
82
+ #
83
+ def release(doc)
84
+ tidyRelease(doc)
85
+ end
86
+
87
+ # tidyReleaseDate
88
+ #
89
+ def release_date
90
+ tidyReleaseDate
91
+ end
92
+
93
+ # tidyRunDiagnostics
94
+ #
95
+ def run_diagnostics(doc)
96
+ tidyRunDiagnostics(doc)
97
+ end
98
+
99
+ # tidySaveBuffer
100
+ #
101
+ def save_buffer(doc, buf)
102
+ tidySaveBuffer(doc, buf)
103
+ end
104
+
105
+ # tidySetErrorBuffer
106
+ #
107
+ def set_error_buffer(doc, buf)
108
+ tidySetErrorBuffer(doc, buf)
109
+ end
110
+
111
+ # Convert to string, replace underscores with dashes (:output_xml => 'output-xml').
112
+ #
113
+ def translate_name(name)
114
+ name.to_s.gsub('_', '-')
115
+ end
116
+
117
+ end
@@ -0,0 +1,103 @@
1
+ # Ruby interface to Tidylib.
2
+ #
3
+ class Tidyobj
4
+
5
+ # Diagnostics Buffer (Array of String).
6
+ #
7
+ attr_reader(:diagnostics)
8
+
9
+ # Access the tidy instance.
10
+ #
11
+ attr_reader(:doc)
12
+
13
+ # Error Buffer (Array of Tidyerr).
14
+ #
15
+ attr_reader(:errors)
16
+
17
+ # Options interface (Tidyopt).
18
+ #
19
+ attr_reader(:options)
20
+
21
+ # Construct a new instance.
22
+ # Receives a hash of options to be set.
23
+ #
24
+ def initialize(options=nil)
25
+ @diagnostics = Array.new
26
+ @doc = Tidylib.create
27
+ @errors = Array.new
28
+ @errbuf = Tidybuf.new
29
+ @outbuf = Tidybuf.new
30
+ @options = Tidyopt.new(@doc)
31
+ rc = Tidylib.set_error_buffer(@doc, @errbuf.struct)
32
+ verify_severe(rc)
33
+ unless options.nil?
34
+ options.each { |name, value| Tidylib.opt_parse_value(@doc, name, value) }
35
+ end
36
+ end
37
+
38
+ # Clean and Repair.
39
+ #
40
+ def clean(str)
41
+ verify_doc
42
+ rc = -1
43
+
44
+ # Clean and repair the string.
45
+ #
46
+ rc = Tidylib.parse_string(@doc, str) # Parse the input
47
+ rc = Tidylib.clean_and_repair(@doc) if rc >= 0 # Tidy it up!
48
+ rc = (Tidylib.opt_parse_value(@doc, :force_output, true) == 1 ? rc : -1) if rc > 1 # If error, force output
49
+ rc = Tidylib.save_buffer(@doc, @outbuf.struct) if rc >= 0 # Pretty Print
50
+ verify_severe(rc)
51
+
52
+ # Save and clear output/errors.
53
+ #
54
+ output = @outbuf.to_s
55
+ @errors = @errbuf.to_a.collect { |e| Tidyerr.new(e) }
56
+ @outbuf.free
57
+ @errbuf.free
58
+
59
+ # Save diagnostics.
60
+ #
61
+ rc = Tidylib.run_diagnostics(@doc)
62
+ verify_severe(rc)
63
+ @diagnostics = @errbuf.to_a
64
+ @errbuf.free
65
+
66
+ output
67
+ end
68
+
69
+ # Load a tidy config file.
70
+ #
71
+ def load_config(file)
72
+ verify_doc
73
+ rc = Tidylib.load_config(@doc, file)
74
+ case rc
75
+ when -1 then raise LoadError, "#{file} does not exist"
76
+ when 1 then raise LoadError, "errors parsing #{file}"
77
+ end
78
+ rc
79
+ end
80
+
81
+ # Clear the tidy instance.
82
+ #
83
+ def release
84
+ verify_doc
85
+ Tidylib.release(@doc)
86
+ @doc = nil
87
+ end
88
+
89
+ # Raise an error if the tidy document is invalid.
90
+ #
91
+ def verify_doc
92
+ raise TypeError, 'Invalid Tidy document' unless @doc.class == DL::PtrData
93
+ end
94
+
95
+ # Raise severe error based on tidy status value.
96
+ #
97
+ def verify_severe(rc)
98
+ raise "A severe error (#{rc}) occurred.\n" if rc < 0
99
+ end
100
+
101
+ protected :verify_doc, :verify_severe
102
+
103
+ end
@@ -0,0 +1,31 @@
1
+ # Ruby interface to Tidylib options.
2
+ #
3
+ class Tidyopt
4
+
5
+ # Construct a new instance.
6
+ #
7
+ def initialize(doc)
8
+ @doc = doc
9
+ end
10
+
11
+ # Reader for options (Hash syntax).
12
+ #
13
+ def [](name)
14
+ Tidylib.opt_get_value(@doc, name)
15
+ end
16
+
17
+ # Writer for options (Hash syntax).
18
+ #
19
+ def []=(name, value)
20
+ Tidylib.opt_parse_value(@doc, name, value)
21
+ end
22
+
23
+ # Reader/Writer for options (Object syntax).
24
+ #
25
+ def method_missing(name, value=:none, *args)
26
+ name = name.to_s.gsub('=', '')
27
+ return self[name] if value == :none
28
+ self[name] = value
29
+ end
30
+
31
+ end
data/lib/tidy.rb ADDED
@@ -0,0 +1,83 @@
1
+ # Ruby interface to HTML Tidy Library Project (http://tidy.sf.net).
2
+ #
3
+ # =Usage
4
+ #
5
+ # require 'tidy'
6
+ # Tidy.path = '/usr/lib/tidylib.so'
7
+ # html = '<html><title>title</title>Body</html>'
8
+ # xml = Tidy.open(:show_warnings=>true) do |tidy|
9
+ # tidy.options.output_xml = true
10
+ # puts tidy.options.show_warnings
11
+ # xml = tidy.clean(html)
12
+ # puts tidy.errors
13
+ # puts tidy.diagnostics
14
+ # xml
15
+ # end
16
+ # puts xml
17
+ #
18
+ # Author:: Kevin Howe
19
+ # License:: Distributes under the same terms as Ruby
20
+ #
21
+ module Tidy
22
+
23
+ require 'dl/import'
24
+ require 'dl/struct'
25
+ require 'tidy/tidybuf'
26
+ require 'tidy/tidyerr'
27
+ require 'tidy/tidylib'
28
+ require 'tidy/tidyobj'
29
+ require 'tidy/tidyopt'
30
+
31
+ module_function
32
+
33
+ # Return a Tidyobj instance.
34
+ #
35
+ def new(options=nil)
36
+ Tidyobj.new(options)
37
+ end
38
+
39
+ # Path to Tidylib.
40
+ #
41
+ def path() @path end
42
+
43
+ # Set the path to Tidylib (automatically loads the library).
44
+ #
45
+ def path=(path)
46
+ Tidylib.load(path)
47
+ @path = path
48
+ end
49
+
50
+ def fresh_tidy_version=(bool)
51
+ @fresh_tidy_version = bool
52
+ end
53
+
54
+ def fresh_tidy_version
55
+ !!@fresh_tidy_version
56
+ end
57
+
58
+ # With no block, open is a synonym for Tidy.new.
59
+ # If a block is present, it is passed aTidy as a parameter.
60
+ # aTidyObj.release is ensured at end of the block.
61
+ #
62
+ def open(options=nil)
63
+ raise "Tidy.path was not specified." unless @path
64
+ tidy = Tidy.new(options)
65
+ if block_given?
66
+ begin
67
+ yield tidy
68
+ ensure
69
+ tidy.release
70
+ end
71
+ else
72
+ tidy
73
+ end
74
+ end
75
+
76
+ # Convert to boolean.
77
+ # 0, false and nil return false, anything else true.
78
+ #
79
+ def to_b(value)
80
+ [0,false,nil].include?(value) ? false : true
81
+ end
82
+
83
+ end
data/test/usage.rb ADDED
@@ -0,0 +1,13 @@
1
+ $LOAD_PATH.unshift('../lib')
2
+ require 'tidy'
3
+ Tidy.path = '/usr/lib/tidylib.so'
4
+ html = '<html><title>title</title>Body</html>'
5
+ xml = Tidy.open(:show_warnings=>true) do |tidy|
6
+ tidy.options.output_xml = true
7
+ puts tidy.options.show_warnings
8
+ xml = tidy.clean(html)
9
+ puts tidy.errors
10
+ puts tidy.diagnostics
11
+ xml
12
+ end
13
+ puts xml
data/tidy-fork.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ MANIFEST = %w(
2
+ CHANGES
3
+ install.rb
4
+ lib
5
+ lib/tidy
6
+ lib/tidy/tidybuf.rb
7
+ lib/tidy/tidyerr.rb
8
+ lib/tidy/tidylib.rb
9
+ lib/tidy/tidyobj.rb
10
+ lib/tidy/tidyopt.rb
11
+ lib/tidy.rb
12
+ MANIFEST
13
+ README.txt.en
14
+ test
15
+ test/usage.rb
16
+ tidy-fork.gemspec
17
+ VERSION
18
+ )
19
+
20
+ spec = Gem::Specification.new do |s|
21
+ s.name = 'tidy-fork'
22
+ s.version = "1.1.3"
23
+ s.authors = ['Kevin Howe', 'Dima Sabanin']
24
+ s.email = 'kh@newclear.ca'
25
+ s.homepage = 'http://github.com/railsmonk/tidy-fork'
26
+ s.platform = Gem::Platform::RUBY
27
+ s.summary = 'Forked Ruby interface to HTML Tidy Library Project with important bugfixes'
28
+ s.files = MANIFEST
29
+ s.require_path = 'lib'
30
+ s.rdoc_options << '--all' << '--inline-source' << '--main' << 'lib/tidy.rb'
31
+ s.has_rdoc = true
32
+ # s.rubyforge_project = 'tidy'
33
+ end
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tidy-fork
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.3
5
+ platform: ruby
6
+ authors:
7
+ - Kevin Howe
8
+ - Dima Sabanin
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-11-18 00:00:00 +07:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description:
18
+ email: kh@newclear.ca
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - CHANGES
27
+ - install.rb
28
+ - lib/tidy/tidybuf.rb
29
+ - lib/tidy/tidyerr.rb
30
+ - lib/tidy/tidylib.rb
31
+ - lib/tidy/tidyobj.rb
32
+ - lib/tidy/tidyopt.rb
33
+ - lib/tidy.rb
34
+ - MANIFEST
35
+ - README.txt.en
36
+ - test/usage.rb
37
+ - tidy-fork.gemspec
38
+ - VERSION
39
+ has_rdoc: true
40
+ homepage: http://github.com/railsmonk/tidy-fork
41
+ licenses: []
42
+
43
+ post_install_message:
44
+ rdoc_options:
45
+ - --all
46
+ - --inline-source
47
+ - --main
48
+ - lib/tidy.rb
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ requirements: []
64
+
65
+ rubyforge_project:
66
+ rubygems_version: 1.3.5
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: Forked Ruby interface to HTML Tidy Library Project with important bugfixes
70
+ test_files: []
71
+