hparser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/inline/collectable'
6
+ module HParser
7
+ module Inline
8
+ class Url
9
+ include Collectable
10
+ attr_reader :url
11
+ def self.parse(scanner)
12
+ if scanner.scan(%r!https?://[A-Za-z0-9./]+!) then
13
+ Url.new scanner.matched
14
+ end
15
+ end
16
+
17
+ def initialize(url)
18
+ @url = url
19
+ end
20
+
21
+ def ==(o)
22
+ self.class and o.class and @url == o.url
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,70 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/util/parser'
6
+ require 'hparser/inline/parser'
7
+ require 'hparser/block/collectable'
8
+ require 'hparser/util/line_scanner'
9
+
10
+ module HParser
11
+ # Block level parser. +hparser+ split hatena format to 2 level.
12
+ #
13
+ # High-level is block elements. This can be identified by first char.
14
+ #
15
+ # For exapmle:
16
+ # * head1
17
+ # This is block element.(paragpaph)
18
+ #
19
+ # - list
20
+ # - is also
21
+ # -- block element
22
+ #
23
+ # Low-level is inline elements. Pleease see HParser::Inline::Parser.
24
+ class Parser
25
+ include HParser::Util
26
+ include HParser::Block
27
+
28
+ # Make parser with block parsers and inline parser.
29
+ #
30
+ # This parser can parse +blocks+, and parse block content
31
+ # by +inlines+.
32
+ #
33
+ # If argument is not gived, this use default_parser.
34
+ def initialize(blocks=HParser::Parser.default_parser,
35
+ inlines=HParser::Inline::Parser.new)
36
+ @blocks = Many1.new(Concat.new(Or.new(*blocks),
37
+ Skip.new(Empty)))
38
+ @inlines = inlines
39
+ end
40
+
41
+ # Parse hatena format.
42
+ #
43
+ # Return array of block element.
44
+ def parse str
45
+ @blocks.parse(LineScanner.new(str.split("\n")),@inlines).map{|x|
46
+ x[0]
47
+ }
48
+ end
49
+
50
+ # Retutrn array of all usable parser.
51
+ #
52
+ # This method collect all classes/modules which include
53
+ # HParser::Block::Collectable. And sorting those by <=>.
54
+ def self.default_parser
55
+ parser = []
56
+ ObjectSpace.each_object(Class){|klass|
57
+ if klass.include?(HParser::Block::Collectable) then
58
+ parser.push klass
59
+ end
60
+ }
61
+
62
+ # sorting parser.
63
+ # e.g. Parser P should be after any other parser.
64
+ parser.sort{|a,b|
65
+ a <=> b or -(b <=> a).to_i
66
+ }
67
+ end
68
+ end
69
+ end
70
+
@@ -0,0 +1,43 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ module HParser
5
+ module Util
6
+ # StringScanner like class
7
+ class LineScanner
8
+ attr_reader :matched
9
+ def initialize(lines)
10
+ @lines = lines
11
+ end
12
+
13
+ def scan(exp)
14
+ if match?(exp) then
15
+ @matched = @lines.shift
16
+ else
17
+ nil
18
+ end
19
+ end
20
+
21
+ def skip(exp)
22
+ if match?(exp) then
23
+ @lines.shift
24
+ else
25
+ nil
26
+ end
27
+ end
28
+
29
+ def match?(exp)
30
+ if @lines == [] then
31
+ false
32
+ elsif exp.class == Regexp and @lines[0] =~ exp then
33
+ true
34
+ elsif @lines[0] == exp
35
+ true
36
+ else
37
+ false
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+
@@ -0,0 +1,74 @@
1
+ module HParser
2
+ module Util
3
+ class Many1
4
+ def initialize(parser)
5
+ @parser = parser
6
+ end
7
+
8
+ def parse(*args)
9
+ result = []
10
+ while (r = @parser.parse(*args))
11
+ result.push r
12
+ end
13
+ result==[] ? nil : result
14
+ end
15
+ end
16
+
17
+ class Skip
18
+ def initialize(parser)
19
+ @parser = parser
20
+ end
21
+
22
+ def parse(*args)
23
+ @parser.parse(*args)
24
+ true
25
+ end
26
+ end
27
+
28
+ class Concat
29
+ def initialize(*parsers)
30
+ @parsers = parsers
31
+ end
32
+
33
+ def parse(*args)
34
+ result = []
35
+ for parser in @parsers
36
+ r = parser.parse(*args)
37
+ unless r then
38
+ return nil
39
+ end
40
+ result.push r
41
+ end
42
+ result
43
+ end
44
+ end
45
+
46
+ class Or
47
+ def initialize(*parsers)
48
+ @parsers = parsers
49
+ end
50
+
51
+ def parse(*args)
52
+ r = nil
53
+ for parser in @parsers
54
+ r = parser.parse(*args)
55
+ if r then
56
+ break
57
+ end
58
+ end
59
+ r
60
+ end
61
+ end
62
+
63
+ class ProcParser
64
+ def initialize(&proc)
65
+ @proc = proc
66
+ end
67
+
68
+ def parse(*args)
69
+ @proc.call(*args)
70
+ end
71
+ end
72
+ end
73
+ end
74
+
data/lib/hparser.rb ADDED
@@ -0,0 +1,8 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/parser'
6
+ require 'hparser/block/all'
7
+ require 'hparser/inline/all'
8
+ require 'hparser/html'
@@ -0,0 +1,75 @@
1
+ require 'test/unit'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/list'
4
+
5
+ class BlockTest < Test::Unit::TestCase
6
+ include HParser::Block
7
+ include HParser::Inline
8
+
9
+ def setup
10
+ @parser = HParser::Parser.new
11
+ end
12
+
13
+ def parse str
14
+ @parser.parse str
15
+ end
16
+
17
+ def test_extra_empty
18
+ assert_equal [Ul.new(li('a')),P.new([Text.new('b')])],parse(<<-END)
19
+ -a
20
+
21
+ b
22
+ END
23
+
24
+ assert_equal [Head.new(1,[Text.new('a')]),P.new([Text.new('b')])],parse(<<-END)
25
+ *a
26
+
27
+ b
28
+ END
29
+ end
30
+
31
+ def test_ul
32
+ assert_equal [Ul.new(li('a'),li('b'),li('c'))],
33
+ parse(<<-END)
34
+ - a
35
+ - b
36
+ - c
37
+ END
38
+
39
+ assert_equal [Ul.new(li('a'),Ul.new(li('b')),li('c'))],
40
+ parse(<<-END)
41
+ - a
42
+ -- b
43
+ - c
44
+ END
45
+ end
46
+
47
+ def test_ol
48
+ assert_equal [Ol.new(li('a'),li('b'),li('c'))],
49
+ parse(<<-END)
50
+ + a
51
+ + b
52
+ + c
53
+ END
54
+
55
+ assert_equal [Ol.new(li('a'),Ol.new(li('b')),li('c'))],
56
+ parse(<<-END)
57
+ + a
58
+ ++ b
59
+ + c
60
+ END
61
+ end
62
+
63
+ def test_list
64
+ assert_equal [Ul.new(li('a'),Ol.new(li('b')),Ul.new(li('c')))],
65
+ parse(<<-END)
66
+ - a
67
+ ++ b
68
+ -- c
69
+ END
70
+ end
71
+
72
+ def li str
73
+ Li.new([Text.new(str)])
74
+ end
75
+ end
data/test/test_head.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/head'
4
+ require 'hparser/inline/text'
5
+ class HeadTest < Test::Unit::TestCase
6
+ include HParser::Block
7
+ include HParser::Inline
8
+
9
+ def setup
10
+ @parser = HParser::Parser.new [Head]
11
+ end
12
+
13
+ def parse str
14
+ @parser.parse str
15
+ end
16
+
17
+ def test_head
18
+ assert_equal [head(1,"aaa")],parse("*aaa")
19
+ assert_equal [head(2,"aaa")], parse("**aaa")
20
+ assert_equal [head(3,"aaa")], parse("***aaa")
21
+ end
22
+
23
+ def test_strip_space
24
+ assert_equal [head(1,"aaa")],parse("* aaa ")
25
+ end
26
+
27
+ def test_long
28
+ assert_equal [head(100,"aaa")],parse("#{'*'*100}aaa")
29
+ end
30
+
31
+ def head level,str
32
+ Head.new level,[Text.new(str)]
33
+ end
34
+ end
data/test/test_html.rb ADDED
@@ -0,0 +1,54 @@
1
+ require 'test/unit'
2
+ require 'hparser/block/all'
3
+ require 'hparser/inline/all'
4
+ require 'hparser/html'
5
+
6
+ class HtmlTest < Test::Unit::TestCase
7
+ include HParser::Block
8
+ include HParser::Inline
9
+ def setup
10
+ @parser = HParser::Parser.new
11
+ end
12
+ def assert_html expect,node
13
+ assert_equal expect,node.to_html
14
+ end
15
+
16
+ def test_head
17
+ assert_html '<h1>foo</h1>',Head.new(1,[Text.new('foo')])
18
+ end
19
+
20
+ def test_p
21
+ assert_html '<p>foobar</p>',P.new([Text.new('foobar')])
22
+ assert_html '<p><br /></p>',Empty.new
23
+ end
24
+
25
+ def test_pre
26
+ assert_html '<pre>foobar</pre>',Pre.new([Text.new('foobar')])
27
+ assert_html '<pre>foobar</pre>',SuperPre.new('foobar')
28
+ end
29
+
30
+ def test_quote
31
+ assert_html '<blockquote>foobar</blockquote>',Quote.new([Text.new('foobar')])
32
+ end
33
+
34
+ def test_table
35
+ assert_html '<table><tr><th>foo</th><th>bar</th></tr><tr><td>baz</td><td>xyzzy</td></tr></table>',
36
+ Table.new([th('foo'),th('bar')],
37
+ [td('baz') ,td('xyzzy')])
38
+ end
39
+
40
+ def test_list
41
+ assert_html '<ul><li>aaa</li><li>bbb</li></ul>',Ul.new(Li.new([Text.new('aaa')]),Li.new([Text.new('bbb')]))
42
+ assert_html '<ol><li>aaa</li><li>bbb</li></ol>',Ol.new(Li.new([Text.new('aaa')]),Li.new([Text.new('bbb')]))
43
+ assert_html '<ol><ul><li>aaa</li></ul><li>bbb</li></ol>',Ol.new(Ul.new(Li.new([Text.new('aaa')])),
44
+ Li.new([Text.new('bbb')]))
45
+ end
46
+
47
+ def th str
48
+ Th.new [Text.new(str)]
49
+ end
50
+
51
+ def td str
52
+ Td.new [Text.new(str)]
53
+ end
54
+ end
data/test/test_id.rb ADDED
@@ -0,0 +1,22 @@
1
+ require 'test/unit'
2
+ require 'hparser/inline/hatena_id'
3
+ require 'hparser/inline/parser'
4
+
5
+ class IdTest < Test::Unit::TestCase
6
+ include HParser::Inline
7
+ def setup
8
+ @parser = Parser.new HatenaId
9
+ end
10
+
11
+ def parse str
12
+ @parser.parse str
13
+ end
14
+
15
+ def test_id
16
+ assert_equal [HatenaId.new("mzp")],parse("id:mzp")
17
+ end
18
+
19
+ def test_jp
20
+ # assert_equal [HatenaId.new("mzp"),Text.new("は")],parse("id:mzpは")
21
+ end
22
+ end
@@ -0,0 +1,20 @@
1
+ require 'test/unit'
2
+ require 'hparser/inline/url'
3
+ require 'hparser/inline/text'
4
+ require 'hparser/inline/hatena_id'
5
+ require 'hparser/inline/parser'
6
+
7
+ class InlineTest < Test::Unit::TestCase
8
+ include HParser::Inline
9
+ def setup
10
+ @inline = Parser.new
11
+ end
12
+
13
+ def parse str
14
+ @inline.parse str
15
+ end
16
+
17
+ def test_text
18
+ assert_equal [Text.new("foo is bar")],parse("foo is bar")
19
+ end
20
+ end
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'hparser/inline/all'
3
+ require 'hparser/html'
4
+
5
+ class HtmlInlineTest < Test::Unit::TestCase
6
+ include HParser::Inline
7
+ def setup
8
+ @parser = Parser.new
9
+ end
10
+
11
+ def assert_html expect,str
12
+ expect.class == String and (expect = [expect])
13
+
14
+ assert_equal expect,@parser.parse(str).map{|x| x.to_html}
15
+ end
16
+
17
+ def assert_same str
18
+ assert_html [str],str
19
+ end
20
+
21
+ def test_text
22
+ assert_same 'foo is bar'
23
+ assert_same '<a href="http://mzp.sakura.ne.jp">link!</a>'
24
+ end
25
+
26
+ def test_id
27
+ assert_html '<a href="http://d.hatena.ne.jp/mzp/">id:mzp</a>','id:mzp'
28
+ end
29
+
30
+ def test_url
31
+ assert_html '<a href="http://mzp.sakura.ne.jp">http://mzp.sakura.ne.jp</a>',
32
+ 'http://mzp.sakura.ne.jp'
33
+ end
34
+ end
data/test/test_p.rb ADDED
@@ -0,0 +1,54 @@
1
+ require 'test/unit'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/p'
4
+
5
+ class PTest < Test::Unit::TestCase
6
+ include HParser::Block
7
+ include HParser::Inline
8
+
9
+ def setup
10
+ @parser = HParser::Parser.new [Empty,P]
11
+ end
12
+
13
+ def parse str
14
+ @parser.parse str
15
+ end
16
+
17
+ def test_normal
18
+ assert_equal [p("aaa")], parse("aaa")
19
+ end
20
+
21
+ def test_has_blank
22
+ assert_equal [p("aaa")], parse(<<-END)
23
+ aaa
24
+
25
+ END
26
+ end
27
+
28
+ def test_multi_line
29
+ assert_equal [p('aaa'),p('bbb')], parse(<<-END)
30
+ aaa
31
+ bbb
32
+ END
33
+
34
+ assert_equal [p("aaa"),p('bbb')], parse(<<-END)
35
+ aaa
36
+
37
+ bbb
38
+ END
39
+ end
40
+
41
+ def test_empty
42
+ assert_equal [p("aaa"),Empty.new,p('bbb')],
43
+ parse(<<-END)
44
+ aaa
45
+
46
+
47
+ bbb
48
+ END
49
+ end
50
+
51
+ def p str
52
+ P.new([Text.new(str)])
53
+ end
54
+ end
data/test/test_pair.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/quote'
4
+ require 'hparser/block/pre'
5
+ require 'hparser/block/super_pre'
6
+ class QuoteTest < Test::Unit::TestCase
7
+ include HParser::Block
8
+ include HParser::Inline
9
+
10
+ def setup
11
+ @parser = HParser::Parser.new [SuperPre,Pre,Quote]
12
+ end
13
+
14
+ def parse str
15
+ @parser.parse str
16
+ end
17
+
18
+ def assert_pair(from,to,klass)
19
+ assert_equal [klass.new([Text.new("aaa")])], parse("#{from}aaa#{to}")
20
+ assert_equal [klass.new([Text.new("aaa")])], parse("#{from}\naaa\n#{to}")
21
+ end
22
+
23
+ def test_quote
24
+ assert_pair ">>","<<",Quote
25
+ end
26
+
27
+ def test_pre
28
+ assert_pair ">|","|<",Pre
29
+ end
30
+
31
+ def test_spre
32
+ assert_pair ">||","||<",SuperPre
33
+ end
34
+ end
@@ -0,0 +1,40 @@
1
+ require 'test/unit'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/table'
4
+ require 'hparser/block/p'
5
+
6
+ class TableTest < Test::Unit::TestCase
7
+ include HParser::Block
8
+ include HParser::Inline
9
+
10
+ def setup
11
+ @parser = HParser::Parser.new [Table,P]
12
+ end
13
+
14
+ def parse str
15
+ @parser.parse str
16
+ end
17
+
18
+ def test_table
19
+ assert_equal [Table.new([th('name'),th('desc') ],
20
+ [td('foo') ,td('foo is ....')],
21
+ [td('bar') ,td('bar is ....')])],
22
+ parse(<<-END)
23
+ |*name|*desc |
24
+ |foo |foo is ....|
25
+ |bar |bar is ....|
26
+ END
27
+ end
28
+
29
+ def th str
30
+ Th.new [Text.new(str)]
31
+ end
32
+
33
+ def td str
34
+ Td.new [Text.new(str)]
35
+ end
36
+
37
+ def test_p
38
+ assert_equal [P.new([Text.new("a|aa")])], parse("a|aa")
39
+ end
40
+ end
data/test/test_url.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'test/unit'
2
+ require 'hparser/inline/parser'
3
+ require 'hparser/inline/url'
4
+
5
+ class UrlTest < Test::Unit::TestCase
6
+ include HParser::Inline
7
+ def setup
8
+ @parser = Parser.new [Url,Text]
9
+ end
10
+
11
+ def parse str
12
+ @parser.parse str
13
+ end
14
+
15
+ def test_http
16
+ assert_equal [Url.new("http://example.com")],parse("http://example.com")
17
+ end
18
+
19
+ def test_text
20
+ assert_equal [Text.new("<em>"),Url.new("http://example.com"),Text.new("</em>")],
21
+ parse("<em>http://example.com</em>")
22
+
23
+ assert_equal [Url.new("http://foo.com"),Text.new(" is dummy")],
24
+ parse("http://foo.com is dummy")
25
+ end
26
+
27
+ def test_a
28
+ assert_equal [Text.new("<a>http://example.com</a>")],parse("<a>http://example.com</a>")
29
+ end
30
+
31
+ def test_https
32
+ assert_equal [Url.new("https://example.com")],parse("https://example.com")
33
+ end
34
+ end