htree 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data.tar.gz.sig +4 -0
  2. data/Makefile +20 -0
  3. data/Manifest +58 -0
  4. data/README +61 -0
  5. data/Rakefile +37 -0
  6. data/htree.gemspec +32 -0
  7. data/init.rb +1 -0
  8. data/install.rb +112 -0
  9. data/lib/htree.rb +97 -0
  10. data/lib/htree/container.rb +8 -0
  11. data/lib/htree/context.rb +69 -0
  12. data/lib/htree/display.rb +46 -0
  13. data/lib/htree/doc.rb +149 -0
  14. data/lib/htree/elem.rb +262 -0
  15. data/lib/htree/encoder.rb +217 -0
  16. data/lib/htree/equality.rb +219 -0
  17. data/lib/htree/extract_text.rb +37 -0
  18. data/lib/htree/fstr.rb +32 -0
  19. data/lib/htree/gencode.rb +193 -0
  20. data/lib/htree/htmlinfo.rb +672 -0
  21. data/lib/htree/inspect.rb +108 -0
  22. data/lib/htree/leaf.rb +92 -0
  23. data/lib/htree/loc.rb +369 -0
  24. data/lib/htree/modules.rb +49 -0
  25. data/lib/htree/name.rb +122 -0
  26. data/lib/htree/output.rb +212 -0
  27. data/lib/htree/parse.rb +410 -0
  28. data/lib/htree/raw_string.rb +127 -0
  29. data/lib/htree/regexp-util.rb +19 -0
  30. data/lib/htree/rexml.rb +131 -0
  31. data/lib/htree/scan.rb +176 -0
  32. data/lib/htree/tag.rb +113 -0
  33. data/lib/htree/template.rb +961 -0
  34. data/lib/htree/text.rb +115 -0
  35. data/lib/htree/traverse.rb +497 -0
  36. data/test-all.rb +5 -0
  37. data/test/assign.html +1 -0
  38. data/test/template.html +4 -0
  39. data/test/test-attr.rb +67 -0
  40. data/test/test-charset.rb +79 -0
  41. data/test/test-context.rb +29 -0
  42. data/test/test-display_xml.rb +45 -0
  43. data/test/test-elem-new.rb +101 -0
  44. data/test/test-encoder.rb +53 -0
  45. data/test/test-equality.rb +55 -0
  46. data/test/test-extract_text.rb +18 -0
  47. data/test/test-gencode.rb +27 -0
  48. data/test/test-leaf.rb +25 -0
  49. data/test/test-loc.rb +60 -0
  50. data/test/test-namespace.rb +147 -0
  51. data/test/test-output.rb +133 -0
  52. data/test/test-parse.rb +115 -0
  53. data/test/test-raw_string.rb +17 -0
  54. data/test/test-rexml.rb +70 -0
  55. data/test/test-scan.rb +153 -0
  56. data/test/test-security.rb +37 -0
  57. data/test/test-subnode.rb +142 -0
  58. data/test/test-template.rb +313 -0
  59. data/test/test-text.rb +43 -0
  60. data/test/test-traverse.rb +69 -0
  61. metadata +166 -0
  62. metadata.gz.sig +1 -0
@@ -0,0 +1,4 @@
1
+ gS����B��ߦGgLAn���۱�� ���R�����nq�9繯�d�%���mZ��uo�ݷd�b��=j��4�6H��� �>�\��ٺ��9s���'�~�R�fh� ����R%�\;�k
2
+ ��[����
3
+ f��Z�<���lQ4XD�,�y�桾��ʛ���5��������g�Y8�^�<�_l�GknmL�-c�!�=�θ��4���:b4%u�L�N�8G�<0���c�
4
+ ������w�� kXB�
@@ -0,0 +1,20 @@
1
+ RUBY=ruby
2
+
3
+ all: README rdoc/index.html
4
+
5
+ README: misc/README.erb
6
+ erb misc/README.erb > README
7
+
8
+ check test:
9
+ $(RUBY) -I. test-all.rb
10
+
11
+ install:
12
+ $(RUBY) install.rb
13
+
14
+ .PHONY: check test all install
15
+
16
+ RB = htree.rb htree/modules.rb $(wildcard htree/[a-l]*.rb) $(wildcard htree/[n-z]*.rb)
17
+ rdoc/index.html: $(RB)
18
+ rm -rf doc
19
+ rdoc --op rdoc $(RB)
20
+
@@ -0,0 +1,58 @@
1
+ Makefile
2
+ README
3
+ Rakefile
4
+ init.rb
5
+ install.rb
6
+ lib/htree.rb
7
+ lib/htree/container.rb
8
+ lib/htree/context.rb
9
+ lib/htree/display.rb
10
+ lib/htree/doc.rb
11
+ lib/htree/elem.rb
12
+ lib/htree/encoder.rb
13
+ lib/htree/equality.rb
14
+ lib/htree/extract_text.rb
15
+ lib/htree/fstr.rb
16
+ lib/htree/gencode.rb
17
+ lib/htree/htmlinfo.rb
18
+ lib/htree/inspect.rb
19
+ lib/htree/leaf.rb
20
+ lib/htree/loc.rb
21
+ lib/htree/modules.rb
22
+ lib/htree/name.rb
23
+ lib/htree/output.rb
24
+ lib/htree/parse.rb
25
+ lib/htree/raw_string.rb
26
+ lib/htree/regexp-util.rb
27
+ lib/htree/rexml.rb
28
+ lib/htree/scan.rb
29
+ lib/htree/tag.rb
30
+ lib/htree/template.rb
31
+ lib/htree/text.rb
32
+ lib/htree/traverse.rb
33
+ test-all.rb
34
+ test/assign.html
35
+ test/template.html
36
+ test/test-attr.rb
37
+ test/test-charset.rb
38
+ test/test-context.rb
39
+ test/test-display_xml.rb
40
+ test/test-elem-new.rb
41
+ test/test-encoder.rb
42
+ test/test-equality.rb
43
+ test/test-extract_text.rb
44
+ test/test-gencode.rb
45
+ test/test-leaf.rb
46
+ test/test-loc.rb
47
+ test/test-namespace.rb
48
+ test/test-output.rb
49
+ test/test-parse.rb
50
+ test/test-raw_string.rb
51
+ test/test-rexml.rb
52
+ test/test-scan.rb
53
+ test/test-security.rb
54
+ test/test-subnode.rb
55
+ test/test-template.rb
56
+ test/test-text.rb
57
+ test/test-traverse.rb
58
+ Manifest
data/README ADDED
@@ -0,0 +1,61 @@
1
+ = htree - HTML/XML tree library
2
+
3
+ htree provides a tree data structure which represent HTML and XML data.
4
+
5
+ == Feature
6
+
7
+ * Permissive unified HTML/XML parser
8
+ * byte-to-byte round-tripping unparser
9
+ * XML namespace support
10
+ * Dedicated class for escaped string. This ease sanitization.
11
+ * HTML/XHTML/XML generator
12
+ * template engine
13
+ * recursive template expansion
14
+ * converter to REXML document
15
+
16
+ == Home Page
17
+
18
+ http://www.a-k-r.org/htree/
19
+
20
+ == Requirements
21
+
22
+ * ruby : http://www.ruby-lang.org/
23
+
24
+ == Download
25
+
26
+ * latest release: http://www.a-k-r.org/htree/htree-0.7.tar.gz
27
+
28
+ * development version in CVS repository:
29
+
30
+ http://cvs.m17n.org/viewcvs/ruby/htree/ or checkout with:
31
+
32
+ % cvs -d :pserver:anonymous@cvs.m17n.org:/cvs/ruby co htree
33
+
34
+ == Install
35
+
36
+ % ruby install.rb
37
+
38
+ == Reference Manual
39
+
40
+ See rdoc/index.html or
41
+ http://www.a-k-r.org/htree/rdoc/
42
+
43
+ == Usage
44
+
45
+ Following two-line script convert HTML to XHTML.
46
+
47
+ require 'htree'
48
+ HTree(STDIN).display_xml
49
+
50
+ The conversion method to REXML is provided as to_rexml.
51
+
52
+ HTree(...).to_rexml
53
+
54
+ == License
55
+
56
+ Ruby's
57
+
58
+ == Author
59
+ Tanaka Akira <akr@fsij.org>
60
+
61
+ [packaged as gem- andrew packer <agp@ppolitics.org>]
@@ -0,0 +1,37 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'echoe'
6
+
7
+
8
+ Echoe.new('htree', '0.7.0') do |p|
9
+ p.summary = "HTML/XML tree library"
10
+ p.description = "Htree provides a tree data structure which represent HTML and XML data"
11
+ p.url = "http://www.a-k-r.org/htree/"
12
+ p.author = "Tanaka Akira"
13
+ p.email = "akr@fsij.org"
14
+ p.ignore_pattern = ["tmp/*", "script/*"]
15
+ p.development_dependencies = []
16
+ end
17
+
18
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
19
+
20
+ desc 'Default: run unit tests.'
21
+ task :default => :test
22
+
23
+ desc 'Test'
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.pattern = 'test/test-*.rb'
27
+ t.verbose = true
28
+ end
29
+
30
+ desc 'RDoc'
31
+ Rake::RDocTask.new(:rdoc) do |rdoc|
32
+ rdoc.rdoc_dir = 'rdoc'
33
+ rdoc.title = 'HTree'
34
+ rdoc.options << '-NS'
35
+ rdoc.rdoc_files.include('README')
36
+ rdoc.rdoc_files.include('lib/**/*.rb')
37
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{htree}
5
+ s.version = "0.7.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Tanaka Akira"]
9
+ s.date = %q{2010-01-27}
10
+ s.description = %q{Htree provides a tree data structure which represent HTML and XML data}
11
+ s.email = %q{akr@fsij.org}
12
+ s.extra_rdoc_files = ["README", "lib/htree.rb", "lib/htree/container.rb", "lib/htree/context.rb", "lib/htree/display.rb", "lib/htree/doc.rb", "lib/htree/elem.rb", "lib/htree/encoder.rb", "lib/htree/equality.rb", "lib/htree/extract_text.rb", "lib/htree/fstr.rb", "lib/htree/gencode.rb", "lib/htree/htmlinfo.rb", "lib/htree/inspect.rb", "lib/htree/leaf.rb", "lib/htree/loc.rb", "lib/htree/modules.rb", "lib/htree/name.rb", "lib/htree/output.rb", "lib/htree/parse.rb", "lib/htree/raw_string.rb", "lib/htree/regexp-util.rb", "lib/htree/rexml.rb", "lib/htree/scan.rb", "lib/htree/tag.rb", "lib/htree/template.rb", "lib/htree/text.rb", "lib/htree/traverse.rb"]
13
+ s.files = ["Makefile", "README", "Rakefile", "init.rb", "install.rb", "lib/htree.rb", "lib/htree/container.rb", "lib/htree/context.rb", "lib/htree/display.rb", "lib/htree/doc.rb", "lib/htree/elem.rb", "lib/htree/encoder.rb", "lib/htree/equality.rb", "lib/htree/extract_text.rb", "lib/htree/fstr.rb", "lib/htree/gencode.rb", "lib/htree/htmlinfo.rb", "lib/htree/inspect.rb", "lib/htree/leaf.rb", "lib/htree/loc.rb", "lib/htree/modules.rb", "lib/htree/name.rb", "lib/htree/output.rb", "lib/htree/parse.rb", "lib/htree/raw_string.rb", "lib/htree/regexp-util.rb", "lib/htree/rexml.rb", "lib/htree/scan.rb", "lib/htree/tag.rb", "lib/htree/template.rb", "lib/htree/text.rb", "lib/htree/traverse.rb", "test-all.rb", "test/assign.html", "test/template.html", "test/test-attr.rb", "test/test-charset.rb", "test/test-context.rb", "test/test-display_xml.rb", "test/test-elem-new.rb", "test/test-encoder.rb", "test/test-equality.rb", "test/test-extract_text.rb", "test/test-gencode.rb", "test/test-leaf.rb", "test/test-loc.rb", "test/test-namespace.rb", "test/test-output.rb", "test/test-parse.rb", "test/test-raw_string.rb", "test/test-rexml.rb", "test/test-scan.rb", "test/test-security.rb", "test/test-subnode.rb", "test/test-template.rb", "test/test-text.rb", "test/test-traverse.rb", "Manifest", "htree.gemspec"]
14
+ s.homepage = %q{http://www.a-k-r.org/htree/}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Htree", "--main", "README"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{htree}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{HTML/XML tree library}
20
+ s.signing_key = '/Users/agp/.gem/gem-private_key.pem'
21
+ s.cert_chain = ['gem-public_cert.pem']
22
+
23
+ if s.respond_to? :specification_version then
24
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
+ s.specification_version = 3
26
+
27
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
28
+ else
29
+ end
30
+ else
31
+ end
32
+ end
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'htree'
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # usage: ruby install.rb [-n] [--destdir=DESTDIR]
4
+ # options:
5
+ # -n : don't install
6
+ # --destdir=DESTDIR
7
+ #
8
+ # Author: Tanaka Akira <akr@m17n.org>
9
+
10
+ require 'optparse'
11
+ require 'fileutils'
12
+
13
+ def target_directory
14
+ $:.each {|loc|
15
+ if %r{/site_ruby/[\d.]+\z} =~ loc
16
+ return loc
17
+ end
18
+ }
19
+ raise "could not find target install directory"
20
+ end
21
+
22
+ CVS_FILES = {}
23
+ def cvs_files(dir)
24
+ return CVS_FILES[dir] if CVS_FILES.include? dir
25
+ if File.directory? "#{dir}/CVS"
26
+ result = {}
27
+ File.foreach("#{dir}/CVS/Entries") {|line|
28
+ case line
29
+ when %r{\A/([^/]+)/} then result[$1] = true
30
+ when %r{\AD/([^/]+)/} then result[$1] = true
31
+ end
32
+ }
33
+ else
34
+ result = nil
35
+ end
36
+ CVS_FILES[dir] = result
37
+ result
38
+ end
39
+
40
+ def each_target(&block)
41
+ target_set = {}
42
+ cvs = cvs_files('.')
43
+ Dir.glob("*.rb") {|filename|
44
+ next if /\Atest-/ =~ filename
45
+ next if /\Ainstall/ =~ filename
46
+ next if cvs && !cvs.include?(filename)
47
+ target_set[filename] = true
48
+ yield filename
49
+ each_require(filename, target_set, &block)
50
+ }
51
+ end
52
+
53
+ def each_require(file, target_set, &block)
54
+ File.foreach(file) {|line|
55
+ next if /\A\s*require\s+['"]([^'"]+)['"]/ !~ line
56
+ feature = $1
57
+ filename = "#{feature}.rb"
58
+ next if target_set.include? filename
59
+ next if !File.exist?(filename)
60
+ target_set[filename] = true
61
+ yield filename
62
+ each_require(filename, target_set, &block)
63
+ }
64
+ end
65
+
66
+ def collect_target
67
+ result = []
68
+ each_target {|filename| result << filename }
69
+ result.sort!
70
+ result
71
+ end
72
+
73
+ def install_file(src, dst)
74
+ ignore_exc(Errno::ENOENT) { return if FileUtils.compare_file src, dst }
75
+ # check shadow
76
+ ignore_exc(Errno::ENOENT) { File.unlink dst }
77
+ FileUtils.mkdir_p(File.dirname(dst), :mode=>0755)
78
+ FileUtils.cp(src, dst, :verbose => true)
79
+ File.chmod(0644, dst)
80
+ end
81
+
82
+ def ignore_exc(exc)
83
+ begin
84
+ yield
85
+ rescue exc
86
+ end
87
+ end
88
+
89
+ $opt_n = false
90
+ $opt_destdir = ""
91
+ ARGV.options {|q|
92
+ q.banner = 'ruby install.rb [opts]'
93
+ q.def_option('--help', 'show this message') {puts q; exit(0)}
94
+ q.def_option('-n', "don't install") { $opt_n = true }
95
+ q.def_option('--destdir=DESTDIR', "specify DESTDIR") {|destdir| $opt_destdir = destdir }
96
+ q.parse!
97
+ }
98
+
99
+ if $opt_n
100
+ dir = target_directory
101
+ collect_target.each {|filename|
102
+ puts "-> #{$opt_destdir}#{dir}/#{filename}"
103
+ }
104
+ exit
105
+ else
106
+ File.umask 022
107
+ dir = target_directory
108
+ collect_target.each {|filename|
109
+ install_file filename, "#{$opt_destdir}#{dir}/#{filename}"
110
+ }
111
+ end
112
+
@@ -0,0 +1,97 @@
1
+ #
2
+ # = htree.rb
3
+ #
4
+ # HTML/XML document tree
5
+ #
6
+ # Author:: Tanaka Akira <akr@fsij.org>
7
+ #
8
+ # == Features
9
+ #
10
+ # - Permissive unified HTML/XML parser
11
+ # - byte-to-byte round-tripping unparser
12
+ # - XML namespace support
13
+ # - Dedicated class for escaped string. This ease sanitization.
14
+ # - XHTML/XML generator
15
+ # - template engine: link:files/htree/template_rb.html
16
+ # - recursive template expansion
17
+ # - REXML tree generator: link:files/htree/rexml_rb.html
18
+ #
19
+ # == Example
20
+ #
21
+ # The following one-liner prints parsed tree object.
22
+ #
23
+ # % ruby -rhtree -e 'pp HTree(ARGF)' html-file
24
+ #
25
+ # The following two-line script convert HTML to XHTML.
26
+ #
27
+ # require 'htree'
28
+ # HTree(STDIN).display_xml
29
+ #
30
+ # The conversion method to REXML is provided as to_rexml.
31
+ #
32
+ # HTree(...).to_rexml
33
+ #
34
+ # == Module/Class Hierarchy
35
+ #
36
+ # * HTree
37
+ # * HTree::Name
38
+ # * HTree::Context
39
+ # * HTree::Location
40
+ # * HTree::Node
41
+ # * HTree::Doc
42
+ # * HTree::Elem
43
+ # * HTree::Text
44
+ # * HTree::XMLDecl
45
+ # * HTree::DocType
46
+ # * HTree::ProcIns
47
+ # * HTree::Comment
48
+ # * HTree::BogusETag
49
+ # * HTree::Error
50
+ #
51
+ # == Method Summary
52
+ #
53
+ # HTree provides following methods.
54
+ #
55
+ # - Parsing Methods
56
+ # - HTree(<i>html_string</i>) -> HTree::Doc
57
+ # - HTree.parse(<i>input</i>) -> HTree::Doc
58
+ #
59
+ # - Generation Methods
60
+ # - HTree::Node#display_xml -> STDOUT
61
+ # - HTree::Node#display_xml(<i>out</i>) -> <i>out</i>
62
+ # - HTree::Node#display_xml(<i>out</i>, <i>encoding</i>) -> <i>out</i>
63
+ # - HTree::Text#to_s -> String
64
+ #
65
+ # - Template Methods
66
+ # - HTree.expand_template{<i>template_string</i>} -> STDOUT
67
+ # - HTree.expand_template(<i>out</i>){<i>template_string</i>} -> <i>out</i>
68
+ # - HTree.expand_template(<i>out</i>, <i>encoding</i>){<i>template_string</i>} -> <i>out</i>
69
+ # - HTree.compile_template(<i>template_string</i>) -> Module
70
+ # - HTree{<i>template_string</i>} -> HTree::Doc
71
+ #
72
+ # - Traverse Methods
73
+ # - HTree::Elem#attributes -> Hash[HTree::Name -> HTree::Text]
74
+ # - HTree::Elem::Location#attributes -> Hash[HTree::Name -> HTree::Location]
75
+ #
76
+ # - Predicate Methods
77
+ # - HTree::Traverse#doc? -> true or false
78
+ # - HTree::Traverse#elem? -> true or false
79
+ # - HTree::Traverse#text? -> true or false
80
+ # - HTree::Traverse#xmldecl? -> true or false
81
+ # - HTree::Traverse#doctype? -> true or false
82
+ # - HTree::Traverse#procins? -> true or false
83
+ # - HTree::Traverse#comment? -> true or false
84
+ # - HTree::Traverse#bogusetag? -> true or false
85
+ #
86
+ # - REXML Tree Generator
87
+ # - HTree::Node#to_rexml -> REXML::Child
88
+
89
+ require 'htree/parse'
90
+ require 'htree/extract_text'
91
+ require 'htree/equality'
92
+ require 'htree/inspect'
93
+ require 'htree/display'
94
+ require 'htree/loc'
95
+ require 'htree/traverse'
96
+ require 'htree/template'
97
+ require 'htree/rexml'
@@ -0,0 +1,8 @@
1
+ require 'htree/modules'
2
+
3
+ module HTree::Container
4
+ # +children+ returns children nodes as an array.
5
+ def children
6
+ @children.dup
7
+ end
8
+ end
@@ -0,0 +1,69 @@
1
+ module HTree
2
+ class Context
3
+ # :stopdoc:
4
+ DefaultNamespaces = {'xml'=>'http://www.w3.org/XML/1998/namespace'}
5
+ DefaultNamespaces.default = ""
6
+ DefaultNamespaces.freeze
7
+ # :startdoc:
8
+
9
+ # The optional argument `namespaces' should be a hash or nil.
10
+ # HTree::DefaultNamespaces is used if nil is specified.
11
+ #
12
+ # If it is a hash, its key should be nil or a string.
13
+ # nil means default namespace.
14
+ # The string means some prefix which must not be empty.
15
+ #
16
+ # The hash value should be a string.
17
+ # The empty string "" means unbound namespace.
18
+ def initialize(namespaces=nil)
19
+ namespaces ||= DefaultNamespaces
20
+ namespaces.each_pair {|k, v|
21
+ check_namespace_prefix(k)
22
+ check_namespace_uri(v)
23
+ }
24
+ namespaces = namespaces.dup.freeze unless namespaces.frozen?
25
+ @namespaces = namespaces
26
+ end
27
+ attr_reader :namespaces
28
+
29
+ # return a namespace URI corresponding to _prefix_.
30
+ # It returns nil if _prefix_ is not defined.
31
+ def namespace_uri(prefix)
32
+ @namespaces[prefix]
33
+ end
34
+
35
+ # generate a new Context object which namespaces are substituted by
36
+ # a hash _declared_namespaces_.
37
+ def subst_namespaces(declared_namespaces)
38
+ namespaces = @namespaces.dup
39
+ declared_namespaces.each {|k, v|
40
+ check_namespace_prefix(k)
41
+ check_namespace_uri(v)
42
+ namespaces[k] = v
43
+ }
44
+ if namespaces == @namespaces
45
+ self
46
+ else
47
+ Context.new(namespaces)
48
+ end
49
+ end
50
+
51
+ private
52
+ def check_namespace_prefix(k)
53
+ unless (String === k && !k.empty?) || k == nil
54
+ raise ArgumentError, "invalid namespace prefix: #{k.inspect}"
55
+ end
56
+ end
57
+
58
+ def check_namespace_uri(v)
59
+ unless String === v
60
+ raise ArgumentError, "invalid namespace URI: #{v.inspect}"
61
+ end
62
+ end
63
+ end
64
+
65
+ # :stopdoc:
66
+ DefaultContext = Context.new
67
+ HTMLContext = DefaultContext.subst_namespaces(nil=>"http://www.w3.org/1999/xhtml")
68
+ # :startdoc:
69
+ end