htree 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data.tar.gz.sig +4 -0
  2. data/Makefile +20 -0
  3. data/Manifest +58 -0
  4. data/README +61 -0
  5. data/Rakefile +37 -0
  6. data/htree.gemspec +32 -0
  7. data/init.rb +1 -0
  8. data/install.rb +112 -0
  9. data/lib/htree.rb +97 -0
  10. data/lib/htree/container.rb +8 -0
  11. data/lib/htree/context.rb +69 -0
  12. data/lib/htree/display.rb +46 -0
  13. data/lib/htree/doc.rb +149 -0
  14. data/lib/htree/elem.rb +262 -0
  15. data/lib/htree/encoder.rb +217 -0
  16. data/lib/htree/equality.rb +219 -0
  17. data/lib/htree/extract_text.rb +37 -0
  18. data/lib/htree/fstr.rb +32 -0
  19. data/lib/htree/gencode.rb +193 -0
  20. data/lib/htree/htmlinfo.rb +672 -0
  21. data/lib/htree/inspect.rb +108 -0
  22. data/lib/htree/leaf.rb +92 -0
  23. data/lib/htree/loc.rb +369 -0
  24. data/lib/htree/modules.rb +49 -0
  25. data/lib/htree/name.rb +122 -0
  26. data/lib/htree/output.rb +212 -0
  27. data/lib/htree/parse.rb +410 -0
  28. data/lib/htree/raw_string.rb +127 -0
  29. data/lib/htree/regexp-util.rb +19 -0
  30. data/lib/htree/rexml.rb +131 -0
  31. data/lib/htree/scan.rb +176 -0
  32. data/lib/htree/tag.rb +113 -0
  33. data/lib/htree/template.rb +961 -0
  34. data/lib/htree/text.rb +115 -0
  35. data/lib/htree/traverse.rb +497 -0
  36. data/test-all.rb +5 -0
  37. data/test/assign.html +1 -0
  38. data/test/template.html +4 -0
  39. data/test/test-attr.rb +67 -0
  40. data/test/test-charset.rb +79 -0
  41. data/test/test-context.rb +29 -0
  42. data/test/test-display_xml.rb +45 -0
  43. data/test/test-elem-new.rb +101 -0
  44. data/test/test-encoder.rb +53 -0
  45. data/test/test-equality.rb +55 -0
  46. data/test/test-extract_text.rb +18 -0
  47. data/test/test-gencode.rb +27 -0
  48. data/test/test-leaf.rb +25 -0
  49. data/test/test-loc.rb +60 -0
  50. data/test/test-namespace.rb +147 -0
  51. data/test/test-output.rb +133 -0
  52. data/test/test-parse.rb +115 -0
  53. data/test/test-raw_string.rb +17 -0
  54. data/test/test-rexml.rb +70 -0
  55. data/test/test-scan.rb +153 -0
  56. data/test/test-security.rb +37 -0
  57. data/test/test-subnode.rb +142 -0
  58. data/test/test-template.rb +313 -0
  59. data/test/test-text.rb +43 -0
  60. data/test/test-traverse.rb +69 -0
  61. metadata +166 -0
  62. metadata.gz.sig +1 -0
@@ -0,0 +1,5 @@
1
+ $VERBOSE = true
2
+
3
+ Dir.glob('test/test-*.rb') {|filename|
4
+ load filename
5
+ }
@@ -0,0 +1 @@
1
+ <span _text="htree_test_toplevel_local_variable = :modified" />
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0"?>
2
+ <html>
3
+ <title _text="self">dummy_title</title>
4
+ </html>
@@ -0,0 +1,67 @@
1
+ require 'test/unit'
2
+ require 'htree/tag'
3
+ require 'htree/elem'
4
+ require 'htree/traverse'
5
+
6
+ class TestAttr < Test::Unit::TestCase
7
+ def test_each_attribute
8
+ t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
9
+ t = HTree::Elem.new!(t)
10
+ t.each_attribute {|n, v|
11
+ assert_instance_of(HTree::Name, n)
12
+ assert_instance_of(HTree::Text, v)
13
+ assert_equal('{u}n', n.universal_name)
14
+ assert_equal('a&amp;b', v.rcdata)
15
+ }
16
+ end
17
+
18
+ def test_each_attr
19
+ t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
20
+ t = HTree::Elem.new!(t)
21
+ t.each_attr {|n, v|
22
+ assert_instance_of(String, n)
23
+ assert_instance_of(String, v)
24
+ assert_equal('{u}n', n)
25
+ assert_equal('a&b', v)
26
+ }
27
+ end
28
+
29
+ def test_fetch_attribute
30
+ t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
31
+ t = HTree::Elem.new!(t)
32
+ v = t.fetch_attribute('{u}n')
33
+ assert_instance_of(HTree::Text, v)
34
+ assert_equal('a&amp;b', v.rcdata)
35
+ assert_equal('y', t.fetch_attribute('x', 'y'))
36
+ assert_raises(IndexError) { t.fetch_attribute('x') }
37
+ end
38
+
39
+ def test_get_attribute
40
+ t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
41
+ t = HTree::Elem.new!(t)
42
+ v = t.get_attribute('{u}n')
43
+ assert_instance_of(HTree::Text, v)
44
+ assert_equal('a&amp;b', v.rcdata)
45
+ assert_equal(nil, t.get_attribute('x'))
46
+ end
47
+
48
+ def test_get_attr
49
+ t = HTree::STag.new('ename', [['p:n', 'a&b']], HTree::DefaultContext.subst_namespaces({'p'=>'u'}))
50
+ t = HTree::Elem.new!(t)
51
+ v = t.get_attr('{u}n')
52
+ assert_instance_of(String, v)
53
+ assert_equal('a&b', v)
54
+ assert_equal(nil, t.get_attr('x'))
55
+ end
56
+
57
+ def test_loc_get_attr
58
+ t = HTree::Elem.new('e', {'k'=>'v'})
59
+ v = t.make_loc.get_attr('k')
60
+ assert_instance_of(String, v)
61
+ assert_equal('v', v)
62
+ v = t.make_loc.fetch_attr('k')
63
+ assert_instance_of(String, v)
64
+ assert_equal('v', v)
65
+ end
66
+
67
+ end
@@ -0,0 +1,79 @@
1
+ require 'test/unit'
2
+ require 'htree/parse'
3
+
4
+ class TestCharset < Test::Unit::TestCase
5
+ def setup
6
+ unless "".respond_to? :force_encoding
7
+ @old_kcode = $KCODE
8
+ end
9
+ end
10
+
11
+ def teardown
12
+ unless "".respond_to? :force_encoding
13
+ $KCODE = @old_kcode
14
+ end
15
+ end
16
+
17
+ def self.mark_string(str, charset)
18
+ def str.read() self end
19
+ class << str; self end.__send__(:define_method, :charset) { charset }
20
+ if str.respond_to? :force_encoding
21
+ str.force_encoding charset
22
+ end
23
+ str
24
+ end
25
+
26
+ # HIRAGANA LETTER A in various charset
27
+ UTF8 = mark_string("\343\201\202", 'UTF-8')
28
+ EUCKR = mark_string("\252\242", 'EUC-KR')
29
+ EUCJP = mark_string("\244\242", 'EUC-JP')
30
+ SJIS = mark_string("\202\240", 'Shift_JIS')
31
+ ISO2022JP = mark_string("\e$B$\"\e(B", 'ISO-2022-JP')
32
+
33
+ def with_kcode(kcode)
34
+ if "".respond_to? :force_encoding
35
+ if HTree::Encoder.internal_charset.start_with?(kcode.upcase)
36
+ yield
37
+ end
38
+ else
39
+ old = $KCODE
40
+ begin
41
+ $KCODE = kcode
42
+ yield
43
+ ensure
44
+ $KCODE = old
45
+ end
46
+ end
47
+ end
48
+
49
+ def test_u
50
+ with_kcode('u') {
51
+ assert_equal(UTF8, HTree.parse(UTF8).children[0].to_s)
52
+ assert_equal(UTF8, HTree.parse(EUCKR).children[0].to_s)
53
+ assert_equal(UTF8, HTree.parse(EUCJP).children[0].to_s)
54
+ assert_equal(UTF8, HTree.parse(SJIS).children[0].to_s)
55
+ assert_equal(UTF8, HTree.parse(ISO2022JP).children[0].to_s)
56
+ }
57
+ end
58
+
59
+ def test_e
60
+ with_kcode('e') {
61
+ assert_equal(EUCJP, HTree.parse(UTF8).children[0].to_s)
62
+ assert_equal(EUCJP, HTree.parse(EUCKR).children[0].to_s)
63
+ assert_equal(EUCJP, HTree.parse(EUCJP).children[0].to_s)
64
+ assert_equal(EUCJP, HTree.parse(SJIS).children[0].to_s)
65
+ assert_equal(EUCJP, HTree.parse(ISO2022JP).children[0].to_s)
66
+ }
67
+ end
68
+
69
+ def test_s
70
+ with_kcode('s') {
71
+ assert_equal(SJIS, HTree.parse(UTF8).children[0].to_s)
72
+ assert_equal(SJIS, HTree.parse(EUCKR).children[0].to_s)
73
+ assert_equal(SJIS, HTree.parse(EUCJP).children[0].to_s)
74
+ assert_equal(SJIS, HTree.parse(SJIS).children[0].to_s)
75
+ assert_equal(SJIS, HTree.parse(ISO2022JP).children[0].to_s)
76
+ }
77
+ end
78
+
79
+ end
@@ -0,0 +1,29 @@
1
+ require 'test/unit'
2
+ require 'htree/context'
3
+
4
+ class TestContext < Test::Unit::TestCase
5
+ def test_namespaces_validation
6
+ assert_raise(ArgumentError) { HTree::Context.new({1=>'u'}) }
7
+ assert_raise(ArgumentError) { HTree::Context.new({''=>'u'}) }
8
+ assert_raise(ArgumentError) { HTree::Context.new({'p'=>nil}) }
9
+ assert_nothing_raised { HTree::Context.new({nil=>'u'}) }
10
+ end
11
+
12
+ def test_namespace_uri
13
+ assert_equal('http://www.w3.org/XML/1998/namespace',
14
+ HTree::Context.new.namespace_uri('xml'))
15
+ assert_equal('u', HTree::Context.new({nil=>'u'}).namespace_uri(nil))
16
+ assert_equal('u', HTree::Context.new({'p'=>'u'}).namespace_uri('p'))
17
+ assert_equal(nil, HTree::Context.new({'p'=>'u'}).namespace_uri('q'))
18
+ end
19
+
20
+ def test_subst_namespaces
21
+ c1 = HTree::Context.new({'p'=>'u'})
22
+ c2 = c1.subst_namespaces({'q'=>'v'})
23
+ assert_equal('u', c1.namespace_uri('p'))
24
+ assert_equal(nil, c1.namespace_uri('q'))
25
+ assert_equal('u', c2.namespace_uri('p'))
26
+ assert_equal('v', c2.namespace_uri('q'))
27
+ end
28
+
29
+ end
@@ -0,0 +1,45 @@
1
+ require 'test/unit'
2
+ require 'htree/elem'
3
+ require 'htree/display'
4
+
5
+ class TestXMLNS < Test::Unit::TestCase
6
+ def assert_xml(expected, node)
7
+ assert_equal(expected, node.display_xml('', 'US-ASCII'))
8
+ end
9
+
10
+ def test_update_xmlns_empty
11
+ assert_xml("<n\n/>", HTree::Elem.new('n'))
12
+ end
13
+
14
+ def test_reduce_xmlns
15
+ assert_xml(
16
+ "<p:n xmlns:p=\"u\"\n/>",
17
+ HTree::Elem.new('p:n', {'xmlns:p'=>'u'}))
18
+
19
+ assert_xml(
20
+ "<n xmlns:p=\"u\"\n><p:n\n/></n\n>",
21
+ HTree::Elem.new('n', {'xmlns:p'=>'u'}, HTree::Elem.new('p:n', {'xmlns:p'=>'u'})))
22
+
23
+ assert_xml(
24
+ "<n xmlns:p=\"u\"\n><p:n xmlns:p=\"v\"\n/></n\n>",
25
+ HTree::Elem.new('n', {'xmlns:p'=>'u'}, HTree::Elem.new('p:n', {'xmlns:p'=>'v'})))
26
+ end
27
+
28
+ def test_extra_xmlns
29
+ assert_xml(
30
+ "<p:n xmlns:p=\"u\"\n/>",
31
+ HTree::Elem.new(HTree::Name.new('p', 'u', 'n')))
32
+
33
+ assert_xml(
34
+ "<nn\n><p:n xmlns:p=\"u\"\n/></nn\n>",
35
+ HTree::Elem.new('nn', HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
36
+
37
+ assert_xml(
38
+ "<nn xmlns:p=\"u\"\n><p:n\n/></nn\n>",
39
+ HTree::Elem.new('nn', {'xmlns:p'=>'u'}, HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
40
+
41
+ assert_xml(
42
+ "<nn xmlns:p=\"v\"\n><p:n xmlns:p=\"u\"\n/></nn\n>",
43
+ HTree::Elem.new('nn', {'xmlns:p'=>'v'}, HTree::Elem.new(HTree::Name.new('p', 'u', 'n'))))
44
+ end
45
+ end
@@ -0,0 +1,101 @@
1
+ require 'test/unit'
2
+ require 'htree/doc'
3
+ require 'htree/elem'
4
+ require 'htree/equality'
5
+ require 'htree/traverse'
6
+
7
+ class TestElemNew < Test::Unit::TestCase
8
+ def test_empty
9
+ e = HTree::Elem.new('a')
10
+ assert_equal('a', e.qualified_name)
11
+ assert_equal({}, e.attributes)
12
+ assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
13
+ assert_equal([], e.children)
14
+ assert_equal(true, e.empty_element?)
15
+ assert_nil(e.instance_variable_get(:@etag))
16
+ end
17
+
18
+ def test_empty_array
19
+ e = HTree::Elem.new('a', [])
20
+ assert_equal('a', e.qualified_name)
21
+ assert_equal({}, e.attributes)
22
+ assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
23
+ assert_equal([], e.children)
24
+ assert_equal(false, e.empty_element?)
25
+ assert_equal(nil, e.instance_variable_get(:@etag))
26
+ end
27
+
28
+ def test_empty_attr
29
+ e = HTree::Elem.new('a', {'href'=>'xxx'})
30
+ assert_equal('a', e.qualified_name)
31
+ assert_equal({HTree::Name.parse_attribute_name('href', HTree::DefaultContext)=>HTree::Text.new('xxx')}, e.attributes)
32
+ assert_equal(HTree::DefaultContext, e.instance_variable_get(:@stag).inherited_context)
33
+ assert_equal([], e.children)
34
+ assert_equal(true, e.empty_element?)
35
+ assert_equal(nil, e.instance_variable_get(:@etag))
36
+ end
37
+
38
+ def test_node
39
+ t = HTree::Text.new('t')
40
+ e = HTree::Elem.new('a', t)
41
+ assert_equal({}, e.attributes)
42
+ assert_equal([t], e.children)
43
+ end
44
+
45
+ def test_hash
46
+ t = HTree::Text.new('t')
47
+ e = HTree::Elem.new('a', {'b' => t})
48
+ assert_equal([['b', t]], e.attributes.map {|n,v| [n.universal_name, v] })
49
+ assert_equal([], e.children)
50
+ end
51
+
52
+ def test_string
53
+ t = HTree::Text.new('s')
54
+ e = HTree::Elem.new('a', "s")
55
+ assert_equal({}, e.attributes)
56
+ assert_equal([t], e.children)
57
+ end
58
+
59
+ def test_interleave
60
+ t = HTree::Text.new('t')
61
+ e = HTree::Elem.new('a', t, {'b' => t}, t, {'c' => 'd'}, t)
62
+ assert_equal([['b', t], ['c', HTree::Text.new('d')]],
63
+ e.attributes.map {|n,v| [n.universal_name, v] }.sort)
64
+ assert_equal([t, t, t], e.children)
65
+ end
66
+
67
+ def test_nest
68
+ t = HTree::Text.new('t')
69
+ b = HTree::BogusETag.new('a')
70
+ x = HTree::Elem.new('e', HTree::XMLDecl.new('1.0'))
71
+ d = HTree::Elem.new('e', HTree::DocType.new('html'))
72
+ e = HTree::Elem.new('a', [t, t, t, b, x, d])
73
+ assert_equal({}, e.attributes)
74
+ assert_equal([t, t, t, b, x, d], e.children)
75
+ end
76
+
77
+ def test_err
78
+ assert_raises(TypeError) { HTree::Elem.new('e', HTree::STag.new('a')) }
79
+ assert_raises(TypeError) { HTree::Elem.new('e', HTree::ETag.new('a')) }
80
+ end
81
+
82
+ def test_context
83
+ context = HTree::DefaultContext.subst_namespaces({'p'=>'u'})
84
+ elem = HTree::Elem.new('p:n', {'p:a'=>'t'}, context)
85
+ assert_equal('{u}n', elem.name)
86
+ assert_equal('t', elem.get_attr('{u}a'))
87
+
88
+ assert_same(context, elem.instance_variable_get(:@stag).inherited_context)
89
+ assert_raises(ArgumentError) { HTree::Elem.new('e', context, context) }
90
+ end
91
+
92
+ def test_hash_in_array
93
+ attrs = [{'a'=>'1'}, {'a'=>'2'}]
94
+ assert_raises(TypeError) { HTree::Elem.new('e', attrs) }
95
+ attrs.pop
96
+ assert_raises(TypeError) { HTree::Elem.new('e', attrs) }
97
+ attrs.pop
98
+ assert_equal([], attrs)
99
+ assert_equal(false, HTree::Elem.new('e', attrs).empty_element?)
100
+ end
101
+ end
@@ -0,0 +1,53 @@
1
+ require 'test/unit'
2
+ require 'htree/encoder'
3
+
4
+ class TestEncoder < Test::Unit::TestCase
5
+ EUC_JISX0212_CH = "\217\260\241" # cannot encode with Shift_JIS.
6
+ EUC_JISX0208_CH = "\260\241"
7
+ if EUC_JISX0212_CH.respond_to? :force_encoding
8
+ EUC_JISX0212_CH.force_encoding("EUC-JP")
9
+ EUC_JISX0208_CH.force_encoding("EUC-JP")
10
+ end
11
+
12
+ def test_minimal_charset
13
+ out = HTree::Encoder.new('Shift_JIS', 'EUC-JP')
14
+ assert_equal("US-ASCII", out.minimal_charset)
15
+ out.output_text("a")
16
+ assert_equal("US-ASCII", out.minimal_charset)
17
+ out.output_text(EUC_JISX0212_CH)
18
+ assert_equal("US-ASCII", out.minimal_charset)
19
+ out.output_text("b")
20
+ assert_equal("US-ASCII", out.minimal_charset)
21
+ assert_equal("a&#19970;b", out.finish)
22
+ end
23
+
24
+ def test_minimal_charset_2
25
+ out = HTree::Encoder.new('ISO-2022-JP-2', 'EUC-JP')
26
+ assert_equal("US-ASCII", out.minimal_charset)
27
+ out.output_text("a")
28
+ assert_equal("US-ASCII", out.minimal_charset)
29
+ out.output_text(EUC_JISX0208_CH)
30
+ assert_equal("ISO-2022-JP", out.minimal_charset)
31
+ out.output_text("b")
32
+ assert_equal("ISO-2022-JP", out.minimal_charset)
33
+ out.output_text(EUC_JISX0212_CH)
34
+ assert_equal("ISO-2022-JP-2", out.minimal_charset)
35
+ assert_equal("a\e$B0!\e(Bb\e$(D0!\e(B", out.finish)
36
+ end
37
+
38
+ def test_minimal_charset_u
39
+ out = HTree::Encoder.new('UTF-16BE', 'EUC-JP')
40
+ assert_equal("UTF-16BE", out.minimal_charset)
41
+ out.output_text("a")
42
+ assert_equal("UTF-16BE", out.minimal_charset)
43
+ assert_equal("\000a", out.finish)
44
+ end
45
+
46
+ def test_close
47
+ out = HTree::Encoder.new('ISO-2022-JP', 'EUC-JP')
48
+ out.output_string(EUC_JISX0208_CH)
49
+ assert_equal("ISO-2022-JP", out.minimal_charset)
50
+ assert_equal("\e$B0!\e(B", out.finish)
51
+ end
52
+
53
+ end
@@ -0,0 +1,55 @@
1
+ require 'test/unit'
2
+ require 'htree/equality'
3
+
4
+ class TestEQQ < Test::Unit::TestCase
5
+ def assert_exact_equal(expected, actual, message=nil)
6
+ full_message = build_message(message, <<EOT, expected, actual)
7
+ <?> expected but was
8
+ <?>.
9
+ EOT
10
+ assert_block(full_message) { expected.exact_equal? actual }
11
+ end
12
+
13
+ def test_tag_name_prefix
14
+ tags = [
15
+ HTree::STag.new('{u}n'),
16
+ HTree::STag.new('p1{u}n'),
17
+ HTree::STag.new('p2{u}n'),
18
+ HTree::STag.new('p1:n', [], HTree::DefaultContext.subst_namespaces({'p1'=>'u'})),
19
+ HTree::STag.new('p2:n', [], HTree::DefaultContext.subst_namespaces({'p2'=>'u'})),
20
+ ]
21
+ tags.each {|t1|
22
+ tags.each {|t2|
23
+ assert_equal(t1, t2)
24
+ }
25
+ }
26
+ end
27
+
28
+ def test_tag_attribute_name_prefix
29
+ tags = [
30
+ HTree::STag.new('n', [['p1{u}a', 'v']]),
31
+ HTree::STag.new('n', [['p2{u}a', 'v']]),
32
+ HTree::STag.new('n', [['p1:a', 'v']], HTree::DefaultContext.subst_namespaces({'p1'=>'u'})),
33
+ HTree::STag.new('n', [['p2:a', 'v']], HTree::DefaultContext.subst_namespaces({'p2'=>'u'})),
34
+ ]
35
+ tags.each {|t1|
36
+ tags.each {|t2|
37
+ assert_equal(t1, t2)
38
+ }
39
+ }
40
+ end
41
+
42
+ def test_element
43
+ assert_equal(HTree::Elem.new('p1{u}n'), HTree::Elem.new('p2{u}n'))
44
+ assert_equal(HTree::Elem.new('n', {'p1{u}a'=>'v'}),
45
+ HTree::Elem.new('n', {'p2{u}a'=>'v'}))
46
+ assert(!HTree::Elem.new('n', {'p1{u}a'=>'v'}).exact_equal?(HTree::Elem.new('n', {'p2{u}a'=>'v'})))
47
+ end
48
+
49
+ def test_tag_namespaces
50
+ assert_nothing_raised {
51
+ HTree::STag.new("n", [], HTree::DefaultContext.subst_namespaces({nil=>"u1", "p"=>"u2"})).make_exact_equal_object
52
+ }
53
+ end
54
+
55
+ end