hparser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +17 -0
- data/lib/hparser/block/all.rb +1 -0
- data/lib/hparser/block/collectable.rb +7 -0
- data/lib/hparser/block/head.rb +45 -0
- data/lib/hparser/block/list.rb +101 -0
- data/lib/hparser/block/p.rb +59 -0
- data/lib/hparser/block/pair.rb +52 -0
- data/lib/hparser/block/pre.rb +15 -0
- data/lib/hparser/block/quote.rb +11 -0
- data/lib/hparser/block/super_pre.rb +19 -0
- data/lib/hparser/block/table.rb +71 -0
- data/lib/hparser/html.rb +144 -0
- data/lib/hparser/inline/all.rb +1 -0
- data/lib/hparser/inline/collectable.rb +6 -0
- data/lib/hparser/inline/hatena_id.rb +30 -0
- data/lib/hparser/inline/parser.rb +48 -0
- data/lib/hparser/inline/text.rb +35 -0
- data/lib/hparser/inline/url.rb +26 -0
- data/lib/hparser/parser.rb +70 -0
- data/lib/hparser/util/line_scanner.rb +43 -0
- data/lib/hparser/util/parser.rb +74 -0
- data/lib/hparser.rb +8 -0
- data/test/test_block.rb +75 -0
- data/test/test_head.rb +34 -0
- data/test/test_html.rb +54 -0
- data/test/test_id.rb +22 -0
- data/test/test_inline.rb +20 -0
- data/test/test_inline_html.rb +34 -0
- data/test/test_p.rb +54 -0
- data/test/test_pair.rb +34 -0
- data/test/test_table.rb +40 -0
- data/test/test_url.rb +34 -0
- metadata +86 -0
data/README
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
=Hatena Format Parser
|
2
|
+
==OverReview
|
3
|
+
+hparser+ is hatena format parser. This format is used at hatena diary(http://d.hatena.ne.jp/)
|
4
|
+
If you want to know more detail about hatena format, please see http://hatenadiary.g.hatena.ne.jp/keyword/%e3%81%af%e3%81%a6%e3%81%aa%e8%a8%98%e6%b3%95%e4%b8%80%e8%a6%a7
|
5
|
+
|
6
|
+
+hpaser+ is constructed with some little parser.(e.g. header parser,list parser,and ...)
|
7
|
+
So,+hparser+ can be added new format,or removed unused format.
|
8
|
+
|
9
|
+
==Basic usage
|
10
|
+
To parse hatena format,please use Hatena::Parser.
|
11
|
+
|
12
|
+
require 'hatena/parser'
|
13
|
+
require 'hatena/block/all'
|
14
|
+
require 'hatena/inline/all'
|
15
|
+
|
16
|
+
parser = Hatena::Parser.new
|
17
|
+
puts parser.parse(some_text)
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir[File.dirname(__FILE__)+'/*.rb'].each{|x| require x}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# Header parser.
|
9
|
+
#
|
10
|
+
# Header is defiend as "a line which is start with '*'".
|
11
|
+
# And a number of '*' show that level.
|
12
|
+
#
|
13
|
+
# For example:
|
14
|
+
# * level1
|
15
|
+
# ** level2
|
16
|
+
# *** level3
|
17
|
+
class Head
|
18
|
+
include Collectable
|
19
|
+
def self.parse(scanner,inlines)
|
20
|
+
if scanner.scan(/\A\*/) then
|
21
|
+
level = 0
|
22
|
+
scanner.matched.each_byte{|c|
|
23
|
+
if c.chr == '*' then
|
24
|
+
level += 1
|
25
|
+
else
|
26
|
+
break
|
27
|
+
end
|
28
|
+
}
|
29
|
+
Head.new level,inlines.parse(scanner.matched[level..-1].strip)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_reader :level,:content
|
34
|
+
def initialize(level,content)
|
35
|
+
@level = level
|
36
|
+
@content = content
|
37
|
+
end
|
38
|
+
|
39
|
+
def ==(o)
|
40
|
+
o.class == self.class and o.level == self.level and
|
41
|
+
o.content == self.content
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
# This code should be rewrite.
|
5
|
+
# Ul and Ol is depend each other.
|
6
|
+
|
7
|
+
require 'hparser/block/collectable'
|
8
|
+
require 'hparser/util/parser'
|
9
|
+
module HParser
|
10
|
+
module Block
|
11
|
+
include HParser::Util
|
12
|
+
def self.make_list_parser(level,mark,&proc)
|
13
|
+
ProcParser.new{|scanner,inlines|
|
14
|
+
if level == 3 then
|
15
|
+
parser = Many1.new(Li.make_parser(level,mark))
|
16
|
+
else
|
17
|
+
parser = Many1.new(Or.new(UnorderList.make_parser(level+1),
|
18
|
+
OrderList.make_parser(level+1),
|
19
|
+
Li.make_parser(level,mark)))
|
20
|
+
end
|
21
|
+
list = parser.parse(scanner,inlines)
|
22
|
+
|
23
|
+
if list then
|
24
|
+
proc.call list
|
25
|
+
end
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
# This class undocumented.
|
30
|
+
# Maybe rewrite in near future.
|
31
|
+
class UnorderList
|
32
|
+
include Collectable
|
33
|
+
def self.parse(scanner,inlines)
|
34
|
+
Ul.make_parser(1).parse(scanner,inlines)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.make_parser(level)
|
38
|
+
Block.make_list_parser(level,'-'){|x| Ul.new(*x)}
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_reader :items
|
42
|
+
def initialize(*items)
|
43
|
+
@items = items
|
44
|
+
end
|
45
|
+
|
46
|
+
def ==(o)
|
47
|
+
o.class == self.class and o.items == self.items
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# This class undocumented.
|
52
|
+
# Maybe rewrite in near future.
|
53
|
+
class OrderList
|
54
|
+
include Collectable
|
55
|
+
def self.parse(scanner,inlines)
|
56
|
+
Ol.make_parser(1).parse(scanner,inlines)
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.make_parser(level)
|
60
|
+
Block.make_list_parser(level,'+'){|x| Ol.new(*x) }
|
61
|
+
end
|
62
|
+
|
63
|
+
attr_reader :items
|
64
|
+
def initialize(*items)
|
65
|
+
@items = items
|
66
|
+
end
|
67
|
+
|
68
|
+
def ==(o)
|
69
|
+
o.class == self.class and o.items == self.items
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# This class undocumented.
|
74
|
+
# Maybe rewrite in near future.
|
75
|
+
class ListItem
|
76
|
+
def self.make_parser(level,mark)
|
77
|
+
include HParser::Util
|
78
|
+
ProcParser.new{|scanner,inlines|
|
79
|
+
if scanner.scan(/\A#{Regexp.quote mark*level}.*/) then
|
80
|
+
ListItem.new inlines.parse(scanner.matched[level..-1].strip)
|
81
|
+
end
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
attr_reader :content
|
86
|
+
def initialize(content)
|
87
|
+
@content = content
|
88
|
+
end
|
89
|
+
|
90
|
+
def ==(o)
|
91
|
+
o.class==self.class and o.content == self.content
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
unless defined?(Ul)
|
96
|
+
Ul = UnorderList
|
97
|
+
Ol = OrderList
|
98
|
+
Li = ListItem
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# Blank line parser. This parser should be use with HParser::Block::P.
|
9
|
+
#
|
10
|
+
# This parser can parse blank line.
|
11
|
+
#
|
12
|
+
# For example:
|
13
|
+
# aaaa
|
14
|
+
# <blank>
|
15
|
+
# <blonk>
|
16
|
+
#
|
17
|
+
# First line and second line is parsed with HParser::Block::P. And
|
18
|
+
# third line is parsed with HParser::Block::Empty.
|
19
|
+
class Empty
|
20
|
+
include Collectable
|
21
|
+
def self.parse(scanner,inlines)
|
22
|
+
if scanner.scan('') then
|
23
|
+
Empty.new
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def ==(o)
|
28
|
+
o.class == self.class
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Normal line parser.
|
33
|
+
#
|
34
|
+
# At hatena format, a line which is not parsed by any other parser is
|
35
|
+
# paragraph.
|
36
|
+
class P
|
37
|
+
include Collectable
|
38
|
+
attr_reader :content
|
39
|
+
def self.parse(scanner,inlines)
|
40
|
+
if scanner.scan(/./) then
|
41
|
+
P.new inlines.parse(scanner.matched)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize(content)
|
46
|
+
@content = content
|
47
|
+
end
|
48
|
+
|
49
|
+
def ==(o)
|
50
|
+
self.class == o.class and self.content == o.content
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.<=>(o)
|
54
|
+
# This parser should be last.
|
55
|
+
o.class == P ? nil : 1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/util/parser'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# Some formats have common structure.
|
9
|
+
#
|
10
|
+
# Qutoe is defined as
|
11
|
+
# >>quoted string<<
|
12
|
+
#
|
13
|
+
# Pre is defiend as
|
14
|
+
# >|plain text|<
|
15
|
+
#
|
16
|
+
# In short,some format is different in begining/ending string.
|
17
|
+
# So this class have basic structure for that format.
|
18
|
+
class Pair
|
19
|
+
# make parser by begin/end-ing string
|
20
|
+
def self.spliter(from,to)
|
21
|
+
from_q = Regexp.quote from
|
22
|
+
to_q = Regexp.quote to
|
23
|
+
|
24
|
+
module_eval <<-"END"
|
25
|
+
def self.parse(scanner,inlines)
|
26
|
+
if scanner.scan(/\\A#{from_q}(.*)#{to_q}\\Z/) then
|
27
|
+
content = scanner.matched
|
28
|
+
self.new inlines.parse(content[#{from.length}...-#{to.length}].strip)
|
29
|
+
elsif scanner.scan(/\\A#{from_q}/)
|
30
|
+
content = scanner.matched[#{from.length}..-1]
|
31
|
+
until scanner.scan(/#{to_q}\\Z/) do
|
32
|
+
content += "\n"+ scanner.scan(/./)
|
33
|
+
end
|
34
|
+
content += "\n"+scanner.matched[0...-#{to.length}]
|
35
|
+
self.new inlines.parse(content.strip)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
END
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_reader :content
|
42
|
+
def initialize(content)
|
43
|
+
@content = content
|
44
|
+
end
|
45
|
+
|
46
|
+
def ==(o)
|
47
|
+
self.class == o.class and self.content == o.content
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
require 'hparser/block/pair'
|
7
|
+
module HParser
|
8
|
+
module Block
|
9
|
+
# Pre format.
|
10
|
+
class Pre < Pair
|
11
|
+
include Collectable
|
12
|
+
spliter '>|','|<'
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'hparser/block/pair'
|
2
|
+
require 'hparser/block/collectable'
|
3
|
+
module HParser
|
4
|
+
module Block
|
5
|
+
# Super pre parser.
|
6
|
+
class SuperPre < Pair
|
7
|
+
include Collectable
|
8
|
+
spliter '>||','||<'
|
9
|
+
|
10
|
+
def self.<=>(o)
|
11
|
+
if Block.const_defined?(:Pre) and o == Pre then
|
12
|
+
1
|
13
|
+
else
|
14
|
+
-1
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# Table parser.
|
9
|
+
class Table
|
10
|
+
attr_reader :rows
|
11
|
+
include Collectable
|
12
|
+
def self.parse(scanner,inlines)
|
13
|
+
rows = []
|
14
|
+
while scanner.scan(/\A\|/)
|
15
|
+
rows.push scanner.matched[1..-1].split('|').map{|label|
|
16
|
+
if label[0].chr == '*' then
|
17
|
+
Th.new inlines.parse(label[1..-1].strip)
|
18
|
+
else
|
19
|
+
Td.new inlines.parse(label.strip)
|
20
|
+
end
|
21
|
+
}
|
22
|
+
end
|
23
|
+
rows == [] ? nil : Table.new(*rows)
|
24
|
+
end
|
25
|
+
|
26
|
+
def initialize(*rows)
|
27
|
+
@rows = rows
|
28
|
+
end
|
29
|
+
|
30
|
+
def ==(o)
|
31
|
+
o.class == self.class and o.rows == self.rows
|
32
|
+
end
|
33
|
+
|
34
|
+
def map_row(&f) # :yield: tr
|
35
|
+
@rows.map(&f)
|
36
|
+
end
|
37
|
+
|
38
|
+
def each_row(&f) # :yield: tr
|
39
|
+
@row.each(&f)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class TableHeader
|
44
|
+
attr_reader :content
|
45
|
+
def initialize(content)
|
46
|
+
@content = content
|
47
|
+
end
|
48
|
+
|
49
|
+
def ==(o)
|
50
|
+
o.class == self.class and o.content == self.content
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class TableCell
|
55
|
+
attr_reader :content
|
56
|
+
def initialize(content)
|
57
|
+
@content = content
|
58
|
+
end
|
59
|
+
|
60
|
+
def ==(o)
|
61
|
+
o.class == self.class and o.content == self.content
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
unless defined?(Th)
|
66
|
+
Th = TableHeader
|
67
|
+
Td = TableCell
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
data/lib/hparser/html.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
# This file define +to_html+. +to_html+ is convert hatena format to html.
|
5
|
+
#
|
6
|
+
|
7
|
+
module HParser
|
8
|
+
# This module provide +to_html+ method.
|
9
|
+
# This method is intended to convert hatena format to html format.
|
10
|
+
#
|
11
|
+
# For example:
|
12
|
+
# Hatena::Parser.parse('*foo').to_html # -> <h1>foo</h1>
|
13
|
+
# Hatena::Parser.parse('>|bar|<').to_html # -> <pre>bar</pre>
|
14
|
+
#
|
15
|
+
# A class including this module shold implement 2 methods,+html_tag+ and
|
16
|
+
# +html_content+. Obviously,+html_tag+ provid using html tag name.
|
17
|
+
# +html_content+ is provid that content.
|
18
|
+
# If content is +Arary+,each elements convert to html by
|
19
|
+
# +to_html+. Otherwise,using as it self.
|
20
|
+
#
|
21
|
+
# For example,Head implements is following:
|
22
|
+
# class Hatena::Block::Head
|
23
|
+
# include Hatena::Html
|
24
|
+
# def tag_name
|
25
|
+
# "h#{@level}"
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# def content
|
29
|
+
# @inlines
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
module Html
|
34
|
+
def to_html
|
35
|
+
content = html_content
|
36
|
+
if content.class == Array then
|
37
|
+
content = content.map{|x| x.to_html}.join
|
38
|
+
end
|
39
|
+
%(<#{html_tag}>#{content}</#{html_tag}>)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
module Block
|
44
|
+
class Head
|
45
|
+
include Html
|
46
|
+
private
|
47
|
+
def html_tag
|
48
|
+
"h#{self.level}"
|
49
|
+
end
|
50
|
+
|
51
|
+
alias_method :html_content,:content
|
52
|
+
end
|
53
|
+
|
54
|
+
class P
|
55
|
+
include Html
|
56
|
+
private
|
57
|
+
def html_tag() 'p' end
|
58
|
+
|
59
|
+
alias_method :html_content,:content
|
60
|
+
end
|
61
|
+
|
62
|
+
class Empty
|
63
|
+
def to_html() '<p><br /></p>' end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Pre
|
67
|
+
include Html
|
68
|
+
private
|
69
|
+
def html_tag() 'pre' end
|
70
|
+
alias_method :html_content,:content
|
71
|
+
end
|
72
|
+
|
73
|
+
class SuperPre
|
74
|
+
include Html
|
75
|
+
private
|
76
|
+
def html_tag() 'pre' end
|
77
|
+
alias_method :html_content,:content
|
78
|
+
end
|
79
|
+
|
80
|
+
class Quote
|
81
|
+
include Html
|
82
|
+
private
|
83
|
+
def html_tag() 'blockquote' end
|
84
|
+
alias_method :html_content,:content
|
85
|
+
end
|
86
|
+
|
87
|
+
class Table
|
88
|
+
def to_html
|
89
|
+
'<table>'+self.map_row{|tr|
|
90
|
+
'<tr>'+tr.map{|cell| tag = cell.class == Th ? 'th' : 'td'
|
91
|
+
"<#{tag}>#{cell.content.map{|x| x.to_html}.join}</#{tag}>"}.join+'</tr>'
|
92
|
+
}.join+'</table>'
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class UnorderList
|
97
|
+
include Html
|
98
|
+
private
|
99
|
+
def html_tag
|
100
|
+
'ul'
|
101
|
+
end
|
102
|
+
alias_method :html_content,:items
|
103
|
+
end
|
104
|
+
|
105
|
+
class OrderList
|
106
|
+
include Html
|
107
|
+
private
|
108
|
+
def html_tag
|
109
|
+
'ol'
|
110
|
+
end
|
111
|
+
alias_method :html_content,:items
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
class ListItem
|
116
|
+
include Html
|
117
|
+
private
|
118
|
+
def html_tag
|
119
|
+
'li'
|
120
|
+
end
|
121
|
+
alias_method :html_content,:content
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
module Inline
|
126
|
+
class Text
|
127
|
+
def to_html
|
128
|
+
self.text
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
class Url
|
133
|
+
def to_html
|
134
|
+
%(<a href="#{self.url}">#{self.url}</a>)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class HatenaId
|
139
|
+
def to_html
|
140
|
+
%(<a href="http://d.hatena.ne.jp/#{self.name}/">id:#{self.name}</a>)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir[File.dirname(__FILE__)+'/*.rb'].each{|x| require x}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
require 'hparser/inline/collectable'
|
5
|
+
|
6
|
+
module HParser
|
7
|
+
module Inline
|
8
|
+
# hatena id parser.
|
9
|
+
#
|
10
|
+
# For example:
|
11
|
+
# id:mzp
|
12
|
+
class HatenaId
|
13
|
+
include Collectable
|
14
|
+
attr_reader :name
|
15
|
+
def initialize(name)
|
16
|
+
@name = name
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse(scanner)
|
20
|
+
if scanner.scan(/id:\w+/) then
|
21
|
+
HatenaId.new scanner.matched[3..-1]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def ==(o)
|
26
|
+
self.class == o.class and @name == o.name
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'strscan'
|
6
|
+
require 'hparser/inline/collectable'
|
7
|
+
require 'hparser/util/parser'
|
8
|
+
module HParser
|
9
|
+
module Inline
|
10
|
+
class Parser
|
11
|
+
include Util
|
12
|
+
def initialize(parsers=Parser.default_parser)
|
13
|
+
@document = Many1.new(Or.new(*parsers))
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse str
|
17
|
+
scanner = StringScanner.new str
|
18
|
+
join_text @document.parse(scanner)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.default_parser
|
22
|
+
parser = []
|
23
|
+
ObjectSpace.each_object(Class){|klass|
|
24
|
+
if klass.include?(HParser::Inline::Collectable) then
|
25
|
+
parser.push klass
|
26
|
+
end
|
27
|
+
}
|
28
|
+
parser.sort{|a,b|
|
29
|
+
a <=> b or -(b <=> a).to_i
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def join_text(nodes)
|
35
|
+
if nodes.length == 1 then
|
36
|
+
nodes
|
37
|
+
else
|
38
|
+
rest = join_text nodes[1..-1]
|
39
|
+
if rest[0].class == Text and nodes[0].class == Text then
|
40
|
+
rest[1..-1].unshift(nodes[0]+rest[0])
|
41
|
+
else
|
42
|
+
rest.unshift nodes[0]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
require 'hparser/inline/collectable'
|
5
|
+
|
6
|
+
module HParser
|
7
|
+
module Inline
|
8
|
+
class Text
|
9
|
+
include Collectable
|
10
|
+
attr_reader :text
|
11
|
+
|
12
|
+
def self.<=>(o)
|
13
|
+
1
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse(scanner)
|
17
|
+
if scanner.scan(%r!<a.*</a>!) or scanner.scan(/./)
|
18
|
+
Text.new(scanner.matched)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(text)
|
23
|
+
@text = text
|
24
|
+
end
|
25
|
+
|
26
|
+
def +(other)
|
27
|
+
Text.new(self.text+other.text)
|
28
|
+
end
|
29
|
+
|
30
|
+
def ==(o)
|
31
|
+
o.class == self.class and @text == o.text
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|