tidy-ext 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/LICENSE +50 -0
- data/README +12 -0
- data/Rakefile +60 -0
- data/VERSION +1 -0
- data/ext/tidy/access.c +3310 -0
- data/ext/tidy/access.h +279 -0
- data/ext/tidy/alloc.c +107 -0
- data/ext/tidy/attrask.c +209 -0
- data/ext/tidy/attrdict.c +2398 -0
- data/ext/tidy/attrdict.h +122 -0
- data/ext/tidy/attrget.c +213 -0
- data/ext/tidy/attrs.c +1911 -0
- data/ext/tidy/attrs.h +374 -0
- data/ext/tidy/buffio.c +232 -0
- data/ext/tidy/buffio.h +118 -0
- data/ext/tidy/charsets.c +1032 -0
- data/ext/tidy/charsets.h +14 -0
- data/ext/tidy/clean.c +2674 -0
- data/ext/tidy/clean.h +87 -0
- data/ext/tidy/config.c +1746 -0
- data/ext/tidy/config.h +153 -0
- data/ext/tidy/entities.c +419 -0
- data/ext/tidy/entities.h +24 -0
- data/ext/tidy/extconf.rb +5 -0
- data/ext/tidy/fileio.c +106 -0
- data/ext/tidy/fileio.h +46 -0
- data/ext/tidy/forward.h +69 -0
- data/ext/tidy/iconvtc.c +105 -0
- data/ext/tidy/iconvtc.h +15 -0
- data/ext/tidy/istack.c +373 -0
- data/ext/tidy/lexer.c +3825 -0
- data/ext/tidy/lexer.h +617 -0
- data/ext/tidy/localize.c +1882 -0
- data/ext/tidy/mappedio.c +329 -0
- data/ext/tidy/mappedio.h +16 -0
- data/ext/tidy/message.h +207 -0
- data/ext/tidy/parser.c +4408 -0
- data/ext/tidy/parser.h +76 -0
- data/ext/tidy/platform.h +636 -0
- data/ext/tidy/pprint.c +2276 -0
- data/ext/tidy/pprint.h +93 -0
- data/ext/tidy/ruby-tidy.c +195 -0
- data/ext/tidy/streamio.c +1407 -0
- data/ext/tidy/streamio.h +222 -0
- data/ext/tidy/tagask.c +286 -0
- data/ext/tidy/tags.c +955 -0
- data/ext/tidy/tags.h +235 -0
- data/ext/tidy/tidy-int.h +129 -0
- data/ext/tidy/tidy.h +1097 -0
- data/ext/tidy/tidyenum.h +622 -0
- data/ext/tidy/tidylib.c +1751 -0
- data/ext/tidy/tmbstr.c +306 -0
- data/ext/tidy/tmbstr.h +92 -0
- data/ext/tidy/utf8.c +539 -0
- data/ext/tidy/utf8.h +52 -0
- data/ext/tidy/version.h +14 -0
- data/ext/tidy/win32tc.c +795 -0
- data/ext/tidy/win32tc.h +19 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/tidy/compat_spec.rb +44 -0
- data/spec/tidy/remote_uri_spec.rb +14 -0
- data/spec/tidy/test1.html +5 -0
- data/spec/tidy/tidy_spec.rb +34 -0
- metadata +125 -0
data/ext/tidy/win32tc.h
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#ifndef __WIN32TC_H__
|
2
|
+
#define __WIN32TC_H__
|
3
|
+
#ifdef TIDY_WIN32_MLANG_SUPPORT
|
4
|
+
|
5
|
+
/* win32tc.h -- Interface to Win32 transcoding routines
|
6
|
+
|
7
|
+
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
8
|
+
See tidy.h for the copyright notice.
|
9
|
+
|
10
|
+
$Id: win32tc.h,v 1.3 2006/12/29 16:31:09 arnaud02 Exp $
|
11
|
+
*/
|
12
|
+
|
13
|
+
uint TY_(Win32MLangGetCPFromName)(TidyAllocator *allocator,ctmbstr encoding);
|
14
|
+
Bool TY_(Win32MLangInitInputTranscoder)(StreamIn * in, uint wincp);
|
15
|
+
void TY_(Win32MLangUninitInputTranscoder)(StreamIn * in);
|
16
|
+
int TY_(Win32MLangGetChar)(byte firstByte, StreamIn * in, uint * bytesRead);
|
17
|
+
|
18
|
+
#endif /* TIDY_WIN32_MLANG_SUPPORT */
|
19
|
+
#endif /* __WIN32TC_H__ */
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "..", "spec_helper")
|
2
|
+
|
3
|
+
describe "tidy compatibility methods" do
|
4
|
+
|
5
|
+
subject { Tidy.new }
|
6
|
+
|
7
|
+
it "should be defined" do
|
8
|
+
defined?(Tidy).should be_true
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should have an open method" do
|
12
|
+
Tidy.should respond_to :open
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should have a clean method" do
|
16
|
+
subject.should respond_to :clean
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should have an errors attribute" do
|
20
|
+
subject.should respond_to :errors
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should have a path attribute" do
|
24
|
+
Tidy.should respond_to :path
|
25
|
+
Tidy.path = "hello world"
|
26
|
+
Tidy.path.should == "hello world"
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should parse a string" do
|
30
|
+
tidy = Tidy.open({}) do |tidy|
|
31
|
+
xml = tidy.clean("<html><body>String</body></html>")
|
32
|
+
xml.should_not be_empty
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should be able to parse more than one string consecutively" do
|
37
|
+
tidy = Tidy.open({}) do |tidy|
|
38
|
+
errors1, html1 = tidy.clean("<html><body>String</body></html>")
|
39
|
+
errors2, html2 = tidy.clean("<html><head><title>hello</title></head><body>String</body></html>")
|
40
|
+
errors1.should_not == errors2
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "..", "spec_helper")
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
describe "tidy class methods" do
|
5
|
+
|
6
|
+
subject { Tidy.new }
|
7
|
+
|
8
|
+
it "should parse the google.com uri" do
|
9
|
+
uri = open("http://www.google.com")
|
10
|
+
page = uri.read
|
11
|
+
errors, html = subject.parse(page)
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "..", "spec_helper")
|
2
|
+
|
3
|
+
describe "tidy class methods" do
|
4
|
+
|
5
|
+
subject { Tidy.new }
|
6
|
+
|
7
|
+
it "should have a new method" do
|
8
|
+
Tidy.should respond_to :new
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should parse a string" do
|
12
|
+
errors, html = subject.parse("<html><body>String</body></html>")
|
13
|
+
errors.should_not be_nil
|
14
|
+
html.should_not be_nil
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should parse a file" do
|
18
|
+
file = File.new(File.join(File.dirname(__FILE__),'test1.html'))
|
19
|
+
errors, html = subject.parse(file)
|
20
|
+
errors.should_not be_nil
|
21
|
+
html.should_not be_nil
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should respond to access" do
|
25
|
+
subject.should respond_to(:access)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should be able to parse more than one string consecutively" do
|
29
|
+
errors1, html1 = subject.parse("<html><body>String</body></html>")
|
30
|
+
errors2, html2 = subject.parse("<html><head><title>hello</title></head><body>String</body></html>")
|
31
|
+
errors1.should_not == errors2
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tidy-ext
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 7
|
9
|
+
version: 0.1.7
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Carl Douglas
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-06-10 00:00:00 +10:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: " Tidies up web pages.\n"
|
22
|
+
email: carl.douglas@gmail.com
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions:
|
26
|
+
- ext/tidy/extconf.rb
|
27
|
+
extra_rdoc_files:
|
28
|
+
- LICENSE
|
29
|
+
files:
|
30
|
+
- .gitignore
|
31
|
+
- LICENSE
|
32
|
+
- README
|
33
|
+
- Rakefile
|
34
|
+
- VERSION
|
35
|
+
- ext/tidy/access.c
|
36
|
+
- ext/tidy/access.h
|
37
|
+
- ext/tidy/alloc.c
|
38
|
+
- ext/tidy/attrask.c
|
39
|
+
- ext/tidy/attrdict.c
|
40
|
+
- ext/tidy/attrdict.h
|
41
|
+
- ext/tidy/attrget.c
|
42
|
+
- ext/tidy/attrs.c
|
43
|
+
- ext/tidy/attrs.h
|
44
|
+
- ext/tidy/buffio.c
|
45
|
+
- ext/tidy/buffio.h
|
46
|
+
- ext/tidy/charsets.c
|
47
|
+
- ext/tidy/charsets.h
|
48
|
+
- ext/tidy/clean.c
|
49
|
+
- ext/tidy/clean.h
|
50
|
+
- ext/tidy/config.c
|
51
|
+
- ext/tidy/config.h
|
52
|
+
- ext/tidy/entities.c
|
53
|
+
- ext/tidy/entities.h
|
54
|
+
- ext/tidy/extconf.rb
|
55
|
+
- ext/tidy/fileio.c
|
56
|
+
- ext/tidy/fileio.h
|
57
|
+
- ext/tidy/forward.h
|
58
|
+
- ext/tidy/iconvtc.c
|
59
|
+
- ext/tidy/iconvtc.h
|
60
|
+
- ext/tidy/istack.c
|
61
|
+
- ext/tidy/lexer.c
|
62
|
+
- ext/tidy/lexer.h
|
63
|
+
- ext/tidy/localize.c
|
64
|
+
- ext/tidy/mappedio.c
|
65
|
+
- ext/tidy/mappedio.h
|
66
|
+
- ext/tidy/message.h
|
67
|
+
- ext/tidy/parser.c
|
68
|
+
- ext/tidy/parser.h
|
69
|
+
- ext/tidy/platform.h
|
70
|
+
- ext/tidy/pprint.c
|
71
|
+
- ext/tidy/pprint.h
|
72
|
+
- ext/tidy/ruby-tidy.c
|
73
|
+
- ext/tidy/streamio.c
|
74
|
+
- ext/tidy/streamio.h
|
75
|
+
- ext/tidy/tagask.c
|
76
|
+
- ext/tidy/tags.c
|
77
|
+
- ext/tidy/tags.h
|
78
|
+
- ext/tidy/tidy-int.h
|
79
|
+
- ext/tidy/tidy.h
|
80
|
+
- ext/tidy/tidyenum.h
|
81
|
+
- ext/tidy/tidylib.c
|
82
|
+
- ext/tidy/tmbstr.c
|
83
|
+
- ext/tidy/tmbstr.h
|
84
|
+
- ext/tidy/utf8.c
|
85
|
+
- ext/tidy/utf8.h
|
86
|
+
- ext/tidy/version.h
|
87
|
+
- ext/tidy/win32tc.c
|
88
|
+
- ext/tidy/win32tc.h
|
89
|
+
- spec/spec_helper.rb
|
90
|
+
- spec/tidy/compat_spec.rb
|
91
|
+
- spec/tidy/remote_uri_spec.rb
|
92
|
+
- spec/tidy/test1.html
|
93
|
+
- spec/tidy/tidy_spec.rb
|
94
|
+
has_rdoc: true
|
95
|
+
homepage: http://github.com/carld/tidy
|
96
|
+
licenses: []
|
97
|
+
|
98
|
+
post_install_message:
|
99
|
+
rdoc_options:
|
100
|
+
- --charset=UTF-8
|
101
|
+
require_paths:
|
102
|
+
- lib
|
103
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
segments:
|
108
|
+
- 0
|
109
|
+
version: "0"
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
segments:
|
115
|
+
- 0
|
116
|
+
version: "0"
|
117
|
+
requirements: []
|
118
|
+
|
119
|
+
rubyforge_project:
|
120
|
+
rubygems_version: 1.3.6
|
121
|
+
signing_key:
|
122
|
+
specification_version: 3
|
123
|
+
summary: HTML Tidy library implemented as a Ruby extension.
|
124
|
+
test_files: []
|
125
|
+
|