htree 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data.tar.gz.sig +4 -0
  2. data/Makefile +20 -0
  3. data/Manifest +58 -0
  4. data/README +61 -0
  5. data/Rakefile +37 -0
  6. data/htree.gemspec +32 -0
  7. data/init.rb +1 -0
  8. data/install.rb +112 -0
  9. data/lib/htree.rb +97 -0
  10. data/lib/htree/container.rb +8 -0
  11. data/lib/htree/context.rb +69 -0
  12. data/lib/htree/display.rb +46 -0
  13. data/lib/htree/doc.rb +149 -0
  14. data/lib/htree/elem.rb +262 -0
  15. data/lib/htree/encoder.rb +217 -0
  16. data/lib/htree/equality.rb +219 -0
  17. data/lib/htree/extract_text.rb +37 -0
  18. data/lib/htree/fstr.rb +32 -0
  19. data/lib/htree/gencode.rb +193 -0
  20. data/lib/htree/htmlinfo.rb +672 -0
  21. data/lib/htree/inspect.rb +108 -0
  22. data/lib/htree/leaf.rb +92 -0
  23. data/lib/htree/loc.rb +369 -0
  24. data/lib/htree/modules.rb +49 -0
  25. data/lib/htree/name.rb +122 -0
  26. data/lib/htree/output.rb +212 -0
  27. data/lib/htree/parse.rb +410 -0
  28. data/lib/htree/raw_string.rb +127 -0
  29. data/lib/htree/regexp-util.rb +19 -0
  30. data/lib/htree/rexml.rb +131 -0
  31. data/lib/htree/scan.rb +176 -0
  32. data/lib/htree/tag.rb +113 -0
  33. data/lib/htree/template.rb +961 -0
  34. data/lib/htree/text.rb +115 -0
  35. data/lib/htree/traverse.rb +497 -0
  36. data/test-all.rb +5 -0
  37. data/test/assign.html +1 -0
  38. data/test/template.html +4 -0
  39. data/test/test-attr.rb +67 -0
  40. data/test/test-charset.rb +79 -0
  41. data/test/test-context.rb +29 -0
  42. data/test/test-display_xml.rb +45 -0
  43. data/test/test-elem-new.rb +101 -0
  44. data/test/test-encoder.rb +53 -0
  45. data/test/test-equality.rb +55 -0
  46. data/test/test-extract_text.rb +18 -0
  47. data/test/test-gencode.rb +27 -0
  48. data/test/test-leaf.rb +25 -0
  49. data/test/test-loc.rb +60 -0
  50. data/test/test-namespace.rb +147 -0
  51. data/test/test-output.rb +133 -0
  52. data/test/test-parse.rb +115 -0
  53. data/test/test-raw_string.rb +17 -0
  54. data/test/test-rexml.rb +70 -0
  55. data/test/test-scan.rb +153 -0
  56. data/test/test-security.rb +37 -0
  57. data/test/test-subnode.rb +142 -0
  58. data/test/test-template.rb +313 -0
  59. data/test/test-text.rb +43 -0
  60. data/test/test-traverse.rb +69 -0
  61. metadata +166 -0
  62. metadata.gz.sig +1 -0
@@ -0,0 +1,4 @@
1
+ gS����B��ߦGgLAn���۱�� ���R�����nq�9繯�d�%���mZ��uo�ݷd�b��=j��4�6H��� �>�\��ٺ��9s���'�~�R�fh� ����R%�\;�k
2
+ ��[����
3
+ f��Z�<���lQ4XD�,�y�桾��ʛ���5��������g�Y8�^�<�_l�GknmL�-c�!�=�θ��4���:b4%u�L�N�8G�<0���c�
4
+ ������w�� kXB�
@@ -0,0 +1,20 @@
1
+ RUBY=ruby
2
+
3
+ all: README rdoc/index.html
4
+
5
+ README: misc/README.erb
6
+ erb misc/README.erb > README
7
+
8
+ check test:
9
+ $(RUBY) -I. test-all.rb
10
+
11
+ install:
12
+ $(RUBY) install.rb
13
+
14
+ .PHONY: check test all install
15
+
16
+ RB = htree.rb htree/modules.rb $(wildcard htree/[a-l]*.rb) $(wildcard htree/[n-z]*.rb)
17
+ rdoc/index.html: $(RB)
18
+ rm -rf doc
19
+ rdoc --op rdoc $(RB)
20
+
@@ -0,0 +1,58 @@
1
+ Makefile
2
+ README
3
+ Rakefile
4
+ init.rb
5
+ install.rb
6
+ lib/htree.rb
7
+ lib/htree/container.rb
8
+ lib/htree/context.rb
9
+ lib/htree/display.rb
10
+ lib/htree/doc.rb
11
+ lib/htree/elem.rb
12
+ lib/htree/encoder.rb
13
+ lib/htree/equality.rb
14
+ lib/htree/extract_text.rb
15
+ lib/htree/fstr.rb
16
+ lib/htree/gencode.rb
17
+ lib/htree/htmlinfo.rb
18
+ lib/htree/inspect.rb
19
+ lib/htree/leaf.rb
20
+ lib/htree/loc.rb
21
+ lib/htree/modules.rb
22
+ lib/htree/name.rb
23
+ lib/htree/output.rb
24
+ lib/htree/parse.rb
25
+ lib/htree/raw_string.rb
26
+ lib/htree/regexp-util.rb
27
+ lib/htree/rexml.rb
28
+ lib/htree/scan.rb
29
+ lib/htree/tag.rb
30
+ lib/htree/template.rb
31
+ lib/htree/text.rb
32
+ lib/htree/traverse.rb
33
+ test-all.rb
34
+ test/assign.html
35
+ test/template.html
36
+ test/test-attr.rb
37
+ test/test-charset.rb
38
+ test/test-context.rb
39
+ test/test-display_xml.rb
40
+ test/test-elem-new.rb
41
+ test/test-encoder.rb
42
+ test/test-equality.rb
43
+ test/test-extract_text.rb
44
+ test/test-gencode.rb
45
+ test/test-leaf.rb
46
+ test/test-loc.rb
47
+ test/test-namespace.rb
48
+ test/test-output.rb
49
+ test/test-parse.rb
50
+ test/test-raw_string.rb
51
+ test/test-rexml.rb
52
+ test/test-scan.rb
53
+ test/test-security.rb
54
+ test/test-subnode.rb
55
+ test/test-template.rb
56
+ test/test-text.rb
57
+ test/test-traverse.rb
58
+ Manifest
data/README ADDED
@@ -0,0 +1,61 @@
1
+ = htree - HTML/XML tree library
2
+
3
+ htree provides a tree data structure which represent HTML and XML data.
4
+
5
+ == Feature
6
+
7
+ * Permissive unified HTML/XML parser
8
+ * byte-to-byte round-tripping unparser
9
+ * XML namespace support
10
+ * Dedicated class for escaped string. This ease sanitization.
11
+ * HTML/XHTML/XML generator
12
+ * template engine
13
+ * recursive template expansion
14
+ * converter to REXML document
15
+
16
+ == Home Page
17
+
18
+ http://www.a-k-r.org/htree/
19
+
20
+ == Requirements
21
+
22
+ * ruby : http://www.ruby-lang.org/
23
+
24
+ == Download
25
+
26
+ * latest release: http://www.a-k-r.org/htree/htree-0.7.tar.gz
27
+
28
+ * development version in CVS repository:
29
+
30
+ http://cvs.m17n.org/viewcvs/ruby/htree/ or checkout with:
31
+
32
+ % cvs -d :pserver:anonymous@cvs.m17n.org:/cvs/ruby co htree
33
+
34
+ == Install
35
+
36
+ % ruby install.rb
37
+
38
+ == Reference Manual
39
+
40
+ See rdoc/index.html or
41
+ http://www.a-k-r.org/htree/rdoc/
42
+
43
+ == Usage
44
+
45
+ Following two-line script convert HTML to XHTML.
46
+
47
+ require 'htree'
48
+ HTree(STDIN).display_xml
49
+
50
+ The conversion method to REXML is provided as to_rexml.
51
+
52
+ HTree(...).to_rexml
53
+
54
+ == License
55
+
56
+ Ruby's
57
+
58
+ == Author
59
+ Tanaka Akira <akr@fsij.org>
60
+
61
+ [packaged as gem- andrew packer <agp@ppolitics.org>]
@@ -0,0 +1,37 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'echoe'
6
+
7
+
8
+ Echoe.new('htree', '0.7.0') do |p|
9
+ p.summary = "HTML/XML tree library"
10
+ p.description = "Htree provides a tree data structure which represent HTML and XML data"
11
+ p.url = "http://www.a-k-r.org/htree/"
12
+ p.author = "Tanaka Akira"
13
+ p.email = "akr@fsij.org"
14
+ p.ignore_pattern = ["tmp/*", "script/*"]
15
+ p.development_dependencies = []
16
+ end
17
+
18
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
19
+
20
+ desc 'Default: run unit tests.'
21
+ task :default => :test
22
+
23
+ desc 'Test'
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.pattern = 'test/test-*.rb'
27
+ t.verbose = true
28
+ end
29
+
30
+ desc 'RDoc'
31
+ Rake::RDocTask.new(:rdoc) do |rdoc|
32
+ rdoc.rdoc_dir = 'rdoc'
33
+ rdoc.title = 'HTree'
34
+ rdoc.options << '-NS'
35
+ rdoc.rdoc_files.include('README')
36
+ rdoc.rdoc_files.include('lib/**/*.rb')
37
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{htree}
5
+ s.version = "0.7.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Tanaka Akira"]
9
+ s.date = %q{2010-01-27}
10
+ s.description = %q{Htree provides a tree data structure which represent HTML and XML data}
11
+ s.email = %q{akr@fsij.org}
12
+ s.extra_rdoc_files = ["README", "lib/htree.rb", "lib/htree/container.rb", "lib/htree/context.rb", "lib/htree/display.rb", "lib/htree/doc.rb", "lib/htree/elem.rb", "lib/htree/encoder.rb", "lib/htree/equality.rb", "lib/htree/extract_text.rb", "lib/htree/fstr.rb", "lib/htree/gencode.rb", "lib/htree/htmlinfo.rb", "lib/htree/inspect.rb", "lib/htree/leaf.rb", "lib/htree/loc.rb", "lib/htree/modules.rb", "lib/htree/name.rb", "lib/htree/output.rb", "lib/htree/parse.rb", "lib/htree/raw_string.rb", "lib/htree/regexp-util.rb", "lib/htree/rexml.rb", "lib/htree/scan.rb", "lib/htree/tag.rb", "lib/htree/template.rb", "lib/htree/text.rb", "lib/htree/traverse.rb"]
13
+ s.files = ["Makefile", "README", "Rakefile", "init.rb", "install.rb", "lib/htree.rb", "lib/htree/container.rb", "lib/htree/context.rb", "lib/htree/display.rb", "lib/htree/doc.rb", "lib/htree/elem.rb", "lib/htree/encoder.rb", "lib/htree/equality.rb", "lib/htree/extract_text.rb", "lib/htree/fstr.rb", "lib/htree/gencode.rb", "lib/htree/htmlinfo.rb", "lib/htree/inspect.rb", "lib/htree/leaf.rb", "lib/htree/loc.rb", "lib/htree/modules.rb", "lib/htree/name.rb", "lib/htree/output.rb", "lib/htree/parse.rb", "lib/htree/raw_string.rb", "lib/htree/regexp-util.rb", "lib/htree/rexml.rb", "lib/htree/scan.rb", "lib/htree/tag.rb", "lib/htree/template.rb", "lib/htree/text.rb", "lib/htree/traverse.rb", "test-all.rb", "test/assign.html", "test/template.html", "test/test-attr.rb", "test/test-charset.rb", "test/test-context.rb", "test/test-display_xml.rb", "test/test-elem-new.rb", "test/test-encoder.rb", "test/test-equality.rb", "test/test-extract_text.rb", "test/test-gencode.rb", "test/test-leaf.rb", "test/test-loc.rb", "test/test-namespace.rb", "test/test-output.rb", "test/test-parse.rb", "test/test-raw_string.rb", "test/test-rexml.rb", "test/test-scan.rb", "test/test-security.rb", "test/test-subnode.rb", "test/test-template.rb", "test/test-text.rb", "test/test-traverse.rb", "Manifest", "htree.gemspec"]
14
+ s.homepage = %q{http://www.a-k-r.org/htree/}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Htree", "--main", "README"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{htree}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{HTML/XML tree library}
20
+ s.signing_key = '/Users/agp/.gem/gem-private_key.pem'
21
+ s.cert_chain = ['gem-public_cert.pem']
22
+
23
+ if s.respond_to? :specification_version then
24
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
+ s.specification_version = 3
26
+
27
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
28
+ else
29
+ end
30
+ else
31
+ end
32
+ end
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'htree'
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # usage: ruby install.rb [-n] [--destdir=DESTDIR]
4
+ # options:
5
+ # -n : don't install
6
+ # --destdir=DESTDIR
7
+ #
8
+ # Author: Tanaka Akira <akr@m17n.org>
9
+
10
+ require 'optparse'
11
+ require 'fileutils'
12
+
13
+ def target_directory
14
+ $:.each {|loc|
15
+ if %r{/site_ruby/[\d.]+\z} =~ loc
16
+ return loc
17
+ end
18
+ }
19
+ raise "could not find target install directory"
20
+ end
21
+
22
+ CVS_FILES = {}
23
+ def cvs_files(dir)
24
+ return CVS_FILES[dir] if CVS_FILES.include? dir
25
+ if File.directory? "#{dir}/CVS"
26
+ result = {}
27
+ File.foreach("#{dir}/CVS/Entries") {|line|
28
+ case line
29
+ when %r{\A/([^/]+)/} then result[$1] = true
30
+ when %r{\AD/([^/]+)/} then result[$1] = true
31
+ end
32
+ }
33
+ else
34
+ result = nil
35
+ end
36
+ CVS_FILES[dir] = result
37
+ result
38
+ end
39
+
40
+ def each_target(&block)
41
+ target_set = {}
42
+ cvs = cvs_files('.')
43
+ Dir.glob("*.rb") {|filename|
44
+ next if /\Atest-/ =~ filename
45
+ next if /\Ainstall/ =~ filename
46
+ next if cvs && !cvs.include?(filename)
47
+ target_set[filename] = true
48
+ yield filename
49
+ each_require(filename, target_set, &block)
50
+ }
51
+ end
52
+
53
+ def each_require(file, target_set, &block)
54
+ File.foreach(file) {|line|
55
+ next if /\A\s*require\s+['"]([^'"]+)['"]/ !~ line
56
+ feature = $1
57
+ filename = "#{feature}.rb"
58
+ next if target_set.include? filename
59
+ next if !File.exist?(filename)
60
+ target_set[filename] = true
61
+ yield filename
62
+ each_require(filename, target_set, &block)
63
+ }
64
+ end
65
+
66
+ def collect_target
67
+ result = []
68
+ each_target {|filename| result << filename }
69
+ result.sort!
70
+ result
71
+ end
72
+
73
+ def install_file(src, dst)
74
+ ignore_exc(Errno::ENOENT) { return if FileUtils.compare_file src, dst }
75
+ # check shadow
76
+ ignore_exc(Errno::ENOENT) { File.unlink dst }
77
+ FileUtils.mkdir_p(File.dirname(dst), :mode=>0755)
78
+ FileUtils.cp(src, dst, :verbose => true)
79
+ File.chmod(0644, dst)
80
+ end
81
+
82
+ def ignore_exc(exc)
83
+ begin
84
+ yield
85
+ rescue exc
86
+ end
87
+ end
88
+
89
+ $opt_n = false
90
+ $opt_destdir = ""
91
+ ARGV.options {|q|
92
+ q.banner = 'ruby install.rb [opts]'
93
+ q.def_option('--help', 'show this message') {puts q; exit(0)}
94
+ q.def_option('-n', "don't install") { $opt_n = true }
95
+ q.def_option('--destdir=DESTDIR', "specify DESTDIR") {|destdir| $opt_destdir = destdir }
96
+ q.parse!
97
+ }
98
+
99
+ if $opt_n
100
+ dir = target_directory
101
+ collect_target.each {|filename|
102
+ puts "-> #{$opt_destdir}#{dir}/#{filename}"
103
+ }
104
+ exit
105
+ else
106
+ File.umask 022
107
+ dir = target_directory
108
+ collect_target.each {|filename|
109
+ install_file filename, "#{$opt_destdir}#{dir}/#{filename}"
110
+ }
111
+ end
112
+
@@ -0,0 +1,97 @@
1
+ #
2
+ # = htree.rb
3
+ #
4
+ # HTML/XML document tree
5
+ #
6
+ # Author:: Tanaka Akira <akr@fsij.org>
7
+ #
8
+ # == Features
9
+ #
10
+ # - Permissive unified HTML/XML parser
11
+ # - byte-to-byte round-tripping unparser
12
+ # - XML namespace support
13
+ # - Dedicated class for escaped string. This ease sanitization.
14
+ # - XHTML/XML generator
15
+ # - template engine: link:files/htree/template_rb.html
16
+ # - recursive template expansion
17
+ # - REXML tree generator: link:files/htree/rexml_rb.html
18
+ #
19
+ # == Example
20
+ #
21
+ # The following one-liner prints parsed tree object.
22
+ #
23
+ # % ruby -rhtree -e 'pp HTree(ARGF)' html-file
24
+ #
25
+ # The following two-line script convert HTML to XHTML.
26
+ #
27
+ # require 'htree'
28
+ # HTree(STDIN).display_xml
29
+ #
30
+ # The conversion method to REXML is provided as to_rexml.
31
+ #
32
+ # HTree(...).to_rexml
33
+ #
34
+ # == Module/Class Hierarchy
35
+ #
36
+ # * HTree
37
+ # * HTree::Name
38
+ # * HTree::Context
39
+ # * HTree::Location
40
+ # * HTree::Node
41
+ # * HTree::Doc
42
+ # * HTree::Elem
43
+ # * HTree::Text
44
+ # * HTree::XMLDecl
45
+ # * HTree::DocType
46
+ # * HTree::ProcIns
47
+ # * HTree::Comment
48
+ # * HTree::BogusETag
49
+ # * HTree::Error
50
+ #
51
+ # == Method Summary
52
+ #
53
+ # HTree provides following methods.
54
+ #
55
+ # - Parsing Methods
56
+ # - HTree(<i>html_string</i>) -> HTree::Doc
57
+ # - HTree.parse(<i>input</i>) -> HTree::Doc
58
+ #
59
+ # - Generation Methods
60
+ # - HTree::Node#display_xml -> STDOUT
61
+ # - HTree::Node#display_xml(<i>out</i>) -> <i>out</i>
62
+ # - HTree::Node#display_xml(<i>out</i>, <i>encoding</i>) -> <i>out</i>
63
+ # - HTree::Text#to_s -> String
64
+ #
65
+ # - Template Methods
66
+ # - HTree.expand_template{<i>template_string</i>} -> STDOUT
67
+ # - HTree.expand_template(<i>out</i>){<i>template_string</i>} -> <i>out</i>
68
+ # - HTree.expand_template(<i>out</i>, <i>encoding</i>){<i>template_string</i>} -> <i>out</i>
69
+ # - HTree.compile_template(<i>template_string</i>) -> Module
70
+ # - HTree{<i>template_string</i>} -> HTree::Doc
71
+ #
72
+ # - Traverse Methods
73
+ # - HTree::Elem#attributes -> Hash[HTree::Name -> HTree::Text]
74
+ # - HTree::Elem::Location#attributes -> Hash[HTree::Name -> HTree::Location]
75
+ #
76
+ # - Predicate Methods
77
+ # - HTree::Traverse#doc? -> true or false
78
+ # - HTree::Traverse#elem? -> true or false
79
+ # - HTree::Traverse#text? -> true or false
80
+ # - HTree::Traverse#xmldecl? -> true or false
81
+ # - HTree::Traverse#doctype? -> true or false
82
+ # - HTree::Traverse#procins? -> true or false
83
+ # - HTree::Traverse#comment? -> true or false
84
+ # - HTree::Traverse#bogusetag? -> true or false
85
+ #
86
+ # - REXML Tree Generator
87
+ # - HTree::Node#to_rexml -> REXML::Child
88
+
89
+ require 'htree/parse'
90
+ require 'htree/extract_text'
91
+ require 'htree/equality'
92
+ require 'htree/inspect'
93
+ require 'htree/display'
94
+ require 'htree/loc'
95
+ require 'htree/traverse'
96
+ require 'htree/template'
97
+ require 'htree/rexml'
@@ -0,0 +1,8 @@
1
+ require 'htree/modules'
2
+
3
+ module HTree::Container
4
+ # +children+ returns children nodes as an array.
5
+ def children
6
+ @children.dup
7
+ end
8
+ end
@@ -0,0 +1,69 @@
1
+ module HTree
2
+ class Context
3
+ # :stopdoc:
4
+ DefaultNamespaces = {'xml'=>'http://www.w3.org/XML/1998/namespace'}
5
+ DefaultNamespaces.default = ""
6
+ DefaultNamespaces.freeze
7
+ # :startdoc:
8
+
9
+ # The optional argument `namespaces' should be a hash or nil.
10
+ # HTree::DefaultNamespaces is used if nil is specified.
11
+ #
12
+ # If it is a hash, its key should be nil or a string.
13
+ # nil means default namespace.
14
+ # The string means some prefix which must not be empty.
15
+ #
16
+ # The hash value should be a string.
17
+ # The empty string "" means unbound namespace.
18
+ def initialize(namespaces=nil)
19
+ namespaces ||= DefaultNamespaces
20
+ namespaces.each_pair {|k, v|
21
+ check_namespace_prefix(k)
22
+ check_namespace_uri(v)
23
+ }
24
+ namespaces = namespaces.dup.freeze unless namespaces.frozen?
25
+ @namespaces = namespaces
26
+ end
27
+ attr_reader :namespaces
28
+
29
+ # return a namespace URI corresponding to _prefix_.
30
+ # It returns nil if _prefix_ is not defined.
31
+ def namespace_uri(prefix)
32
+ @namespaces[prefix]
33
+ end
34
+
35
+ # generate a new Context object which namespaces are substituted by
36
+ # a hash _declared_namespaces_.
37
+ def subst_namespaces(declared_namespaces)
38
+ namespaces = @namespaces.dup
39
+ declared_namespaces.each {|k, v|
40
+ check_namespace_prefix(k)
41
+ check_namespace_uri(v)
42
+ namespaces[k] = v
43
+ }
44
+ if namespaces == @namespaces
45
+ self
46
+ else
47
+ Context.new(namespaces)
48
+ end
49
+ end
50
+
51
+ private
52
+ def check_namespace_prefix(k)
53
+ unless (String === k && !k.empty?) || k == nil
54
+ raise ArgumentError, "invalid namespace prefix: #{k.inspect}"
55
+ end
56
+ end
57
+
58
+ def check_namespace_uri(v)
59
+ unless String === v
60
+ raise ArgumentError, "invalid namespace URI: #{v.inspect}"
61
+ end
62
+ end
63
+ end
64
+
65
+ # :stopdoc:
66
+ DefaultContext = Context.new
67
+ HTMLContext = DefaultContext.subst_namespaces(nil=>"http://www.w3.org/1999/xhtml")
68
+ # :startdoc:
69
+ end