hparser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +17 -0
- data/lib/hparser/block/all.rb +1 -0
- data/lib/hparser/block/collectable.rb +7 -0
- data/lib/hparser/block/head.rb +45 -0
- data/lib/hparser/block/list.rb +101 -0
- data/lib/hparser/block/p.rb +59 -0
- data/lib/hparser/block/pair.rb +52 -0
- data/lib/hparser/block/pre.rb +15 -0
- data/lib/hparser/block/quote.rb +11 -0
- data/lib/hparser/block/super_pre.rb +19 -0
- data/lib/hparser/block/table.rb +71 -0
- data/lib/hparser/html.rb +144 -0
- data/lib/hparser/inline/all.rb +1 -0
- data/lib/hparser/inline/collectable.rb +6 -0
- data/lib/hparser/inline/hatena_id.rb +30 -0
- data/lib/hparser/inline/parser.rb +48 -0
- data/lib/hparser/inline/text.rb +35 -0
- data/lib/hparser/inline/url.rb +26 -0
- data/lib/hparser/parser.rb +70 -0
- data/lib/hparser/util/line_scanner.rb +43 -0
- data/lib/hparser/util/parser.rb +74 -0
- data/lib/hparser.rb +8 -0
- data/test/test_block.rb +75 -0
- data/test/test_head.rb +34 -0
- data/test/test_html.rb +54 -0
- data/test/test_id.rb +22 -0
- data/test/test_inline.rb +20 -0
- data/test/test_inline_html.rb +34 -0
- data/test/test_p.rb +54 -0
- data/test/test_pair.rb +34 -0
- data/test/test_table.rb +40 -0
- data/test/test_url.rb +34 -0
- metadata +86 -0
data/README
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
=Hatena Format Parser
|
2
|
+
==OverReview
|
3
|
+
+hparser+ is hatena format parser. This format is used at hatena diary(http://d.hatena.ne.jp/)
|
4
|
+
If you want to know more detail about hatena format, please see http://hatenadiary.g.hatena.ne.jp/keyword/%e3%81%af%e3%81%a6%e3%81%aa%e8%a8%98%e6%b3%95%e4%b8%80%e8%a6%a7
|
5
|
+
|
6
|
+
+hpaser+ is constructed with some little parser.(e.g. header parser,list parser,and ...)
|
7
|
+
So,+hparser+ can be added new format,or removed unused format.
|
8
|
+
|
9
|
+
==Basic usage
|
10
|
+
To parse hatena format,please use Hatena::Parser.
|
11
|
+
|
12
|
+
require 'hatena/parser'
|
13
|
+
require 'hatena/block/all'
|
14
|
+
require 'hatena/inline/all'
|
15
|
+
|
16
|
+
parser = Hatena::Parser.new
|
17
|
+
puts parser.parse(some_text)
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir[File.dirname(__FILE__)+'/*.rb'].each{|x| require x}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# Header parser.
|
9
|
+
#
|
10
|
+
# Header is defiend as "a line which is start with '*'".
|
11
|
+
# And a number of '*' show that level.
|
12
|
+
#
|
13
|
+
# For example:
|
14
|
+
# * level1
|
15
|
+
# ** level2
|
16
|
+
# *** level3
|
17
|
+
class Head
|
18
|
+
include Collectable
|
19
|
+
def self.parse(scanner,inlines)
|
20
|
+
if scanner.scan(/\A\*/) then
|
21
|
+
level = 0
|
22
|
+
scanner.matched.each_byte{|c|
|
23
|
+
if c.chr == '*' then
|
24
|
+
level += 1
|
25
|
+
else
|
26
|
+
break
|
27
|
+
end
|
28
|
+
}
|
29
|
+
Head.new level,inlines.parse(scanner.matched[level..-1].strip)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_reader :level,:content
|
34
|
+
def initialize(level,content)
|
35
|
+
@level = level
|
36
|
+
@content = content
|
37
|
+
end
|
38
|
+
|
39
|
+
def ==(o)
|
40
|
+
o.class == self.class and o.level == self.level and
|
41
|
+
o.content == self.content
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
# This code should be rewrite.
|
5
|
+
# Ul and Ol is depend each other.
|
6
|
+
|
7
|
+
require 'hparser/block/collectable'
|
8
|
+
require 'hparser/util/parser'
|
9
|
+
module HParser
|
10
|
+
module Block
|
11
|
+
include HParser::Util
|
12
|
+
def self.make_list_parser(level,mark,&proc)
|
13
|
+
ProcParser.new{|scanner,inlines|
|
14
|
+
if level == 3 then
|
15
|
+
parser = Many1.new(Li.make_parser(level,mark))
|
16
|
+
else
|
17
|
+
parser = Many1.new(Or.new(UnorderList.make_parser(level+1),
|
18
|
+
OrderList.make_parser(level+1),
|
19
|
+
Li.make_parser(level,mark)))
|
20
|
+
end
|
21
|
+
list = parser.parse(scanner,inlines)
|
22
|
+
|
23
|
+
if list then
|
24
|
+
proc.call list
|
25
|
+
end
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
# This class undocumented.
|
30
|
+
# Maybe rewrite in near future.
|
31
|
+
class UnorderList
|
32
|
+
include Collectable
|
33
|
+
def self.parse(scanner,inlines)
|
34
|
+
Ul.make_parser(1).parse(scanner,inlines)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.make_parser(level)
|
38
|
+
Block.make_list_parser(level,'-'){|x| Ul.new(*x)}
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_reader :items
|
42
|
+
def initialize(*items)
|
43
|
+
@items = items
|
44
|
+
end
|
45
|
+
|
46
|
+
def ==(o)
|
47
|
+
o.class == self.class and o.items == self.items
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# This class undocumented.
|
52
|
+
# Maybe rewrite in near future.
|
53
|
+
class OrderList
|
54
|
+
include Collectable
|
55
|
+
def self.parse(scanner,inlines)
|
56
|
+
Ol.make_parser(1).parse(scanner,inlines)
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.make_parser(level)
|
60
|
+
Block.make_list_parser(level,'+'){|x| Ol.new(*x) }
|
61
|
+
end
|
62
|
+
|
63
|
+
attr_reader :items
|
64
|
+
def initialize(*items)
|
65
|
+
@items = items
|
66
|
+
end
|
67
|
+
|
68
|
+
def ==(o)
|
69
|
+
o.class == self.class and o.items == self.items
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# This class undocumented.
|
74
|
+
# Maybe rewrite in near future.
|
75
|
+
class ListItem
|
76
|
+
def self.make_parser(level,mark)
|
77
|
+
include HParser::Util
|
78
|
+
ProcParser.new{|scanner,inlines|
|
79
|
+
if scanner.scan(/\A#{Regexp.quote mark*level}.*/) then
|
80
|
+
ListItem.new inlines.parse(scanner.matched[level..-1].strip)
|
81
|
+
end
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
attr_reader :content
|
86
|
+
def initialize(content)
|
87
|
+
@content = content
|
88
|
+
end
|
89
|
+
|
90
|
+
def ==(o)
|
91
|
+
o.class==self.class and o.content == self.content
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
unless defined?(Ul)
|
96
|
+
Ul = UnorderList
|
97
|
+
Ol = OrderList
|
98
|
+
Li = ListItem
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# Blank line parser. This parser should be use with HParser::Block::P.
|
9
|
+
#
|
10
|
+
# This parser can parse blank line.
|
11
|
+
#
|
12
|
+
# For example:
|
13
|
+
# aaaa
|
14
|
+
# <blank>
|
15
|
+
# <blonk>
|
16
|
+
#
|
17
|
+
# First line and second line is parsed with HParser::Block::P. And
|
18
|
+
# third line is parsed with HParser::Block::Empty.
|
19
|
+
class Empty
|
20
|
+
include Collectable
|
21
|
+
def self.parse(scanner,inlines)
|
22
|
+
if scanner.scan('') then
|
23
|
+
Empty.new
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def ==(o)
|
28
|
+
o.class == self.class
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Normal line parser.
|
33
|
+
#
|
34
|
+
# At hatena format, a line which is not parsed by any other parser is
|
35
|
+
# paragraph.
|
36
|
+
class P
|
37
|
+
include Collectable
|
38
|
+
attr_reader :content
|
39
|
+
def self.parse(scanner,inlines)
|
40
|
+
if scanner.scan(/./) then
|
41
|
+
P.new inlines.parse(scanner.matched)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize(content)
|
46
|
+
@content = content
|
47
|
+
end
|
48
|
+
|
49
|
+
def ==(o)
|
50
|
+
self.class == o.class and self.content == o.content
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.<=>(o)
|
54
|
+
# This parser should be last.
|
55
|
+
o.class == P ? nil : 1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/util/parser'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# Some formats have common structure.
|
9
|
+
#
|
10
|
+
# Qutoe is defined as
|
11
|
+
# >>quoted string<<
|
12
|
+
#
|
13
|
+
# Pre is defiend as
|
14
|
+
# >|plain text|<
|
15
|
+
#
|
16
|
+
# In short,some format is different in begining/ending string.
|
17
|
+
# So this class have basic structure for that format.
|
18
|
+
class Pair
|
19
|
+
# make parser by begin/end-ing string
|
20
|
+
def self.spliter(from,to)
|
21
|
+
from_q = Regexp.quote from
|
22
|
+
to_q = Regexp.quote to
|
23
|
+
|
24
|
+
module_eval <<-"END"
|
25
|
+
def self.parse(scanner,inlines)
|
26
|
+
if scanner.scan(/\\A#{from_q}(.*)#{to_q}\\Z/) then
|
27
|
+
content = scanner.matched
|
28
|
+
self.new inlines.parse(content[#{from.length}...-#{to.length}].strip)
|
29
|
+
elsif scanner.scan(/\\A#{from_q}/)
|
30
|
+
content = scanner.matched[#{from.length}..-1]
|
31
|
+
until scanner.scan(/#{to_q}\\Z/) do
|
32
|
+
content += "\n"+ scanner.scan(/./)
|
33
|
+
end
|
34
|
+
content += "\n"+scanner.matched[0...-#{to.length}]
|
35
|
+
self.new inlines.parse(content.strip)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
END
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_reader :content
|
42
|
+
def initialize(content)
|
43
|
+
@content = content
|
44
|
+
end
|
45
|
+
|
46
|
+
def ==(o)
|
47
|
+
self.class == o.class and self.content == o.content
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
require 'hparser/block/pair'
|
7
|
+
module HParser
|
8
|
+
module Block
|
9
|
+
# Pre format.
|
10
|
+
class Pre < Pair
|
11
|
+
include Collectable
|
12
|
+
spliter '>|','|<'
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'hparser/block/pair'
|
2
|
+
require 'hparser/block/collectable'
|
3
|
+
module HParser
|
4
|
+
module Block
|
5
|
+
# Super pre parser.
|
6
|
+
class SuperPre < Pair
|
7
|
+
include Collectable
|
8
|
+
spliter '>||','||<'
|
9
|
+
|
10
|
+
def self.<=>(o)
|
11
|
+
if Block.const_defined?(:Pre) and o == Pre then
|
12
|
+
1
|
13
|
+
else
|
14
|
+
-1
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# Table parser.
|
9
|
+
class Table
|
10
|
+
attr_reader :rows
|
11
|
+
include Collectable
|
12
|
+
def self.parse(scanner,inlines)
|
13
|
+
rows = []
|
14
|
+
while scanner.scan(/\A\|/)
|
15
|
+
rows.push scanner.matched[1..-1].split('|').map{|label|
|
16
|
+
if label[0].chr == '*' then
|
17
|
+
Th.new inlines.parse(label[1..-1].strip)
|
18
|
+
else
|
19
|
+
Td.new inlines.parse(label.strip)
|
20
|
+
end
|
21
|
+
}
|
22
|
+
end
|
23
|
+
rows == [] ? nil : Table.new(*rows)
|
24
|
+
end
|
25
|
+
|
26
|
+
def initialize(*rows)
|
27
|
+
@rows = rows
|
28
|
+
end
|
29
|
+
|
30
|
+
def ==(o)
|
31
|
+
o.class == self.class and o.rows == self.rows
|
32
|
+
end
|
33
|
+
|
34
|
+
def map_row(&f) # :yield: tr
|
35
|
+
@rows.map(&f)
|
36
|
+
end
|
37
|
+
|
38
|
+
def each_row(&f) # :yield: tr
|
39
|
+
@row.each(&f)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class TableHeader
|
44
|
+
attr_reader :content
|
45
|
+
def initialize(content)
|
46
|
+
@content = content
|
47
|
+
end
|
48
|
+
|
49
|
+
def ==(o)
|
50
|
+
o.class == self.class and o.content == self.content
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class TableCell
|
55
|
+
attr_reader :content
|
56
|
+
def initialize(content)
|
57
|
+
@content = content
|
58
|
+
end
|
59
|
+
|
60
|
+
def ==(o)
|
61
|
+
o.class == self.class and o.content == self.content
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
unless defined?(Th)
|
66
|
+
Th = TableHeader
|
67
|
+
Td = TableCell
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
data/lib/hparser/html.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
# This file define +to_html+. +to_html+ is convert hatena format to html.
|
5
|
+
#
|
6
|
+
|
7
|
+
module HParser
|
8
|
+
# This module provide +to_html+ method.
|
9
|
+
# This method is intended to convert hatena format to html format.
|
10
|
+
#
|
11
|
+
# For example:
|
12
|
+
# Hatena::Parser.parse('*foo').to_html # -> <h1>foo</h1>
|
13
|
+
# Hatena::Parser.parse('>|bar|<').to_html # -> <pre>bar</pre>
|
14
|
+
#
|
15
|
+
# A class including this module shold implement 2 methods,+html_tag+ and
|
16
|
+
# +html_content+. Obviously,+html_tag+ provid using html tag name.
|
17
|
+
# +html_content+ is provid that content.
|
18
|
+
# If content is +Arary+,each elements convert to html by
|
19
|
+
# +to_html+. Otherwise,using as it self.
|
20
|
+
#
|
21
|
+
# For example,Head implements is following:
|
22
|
+
# class Hatena::Block::Head
|
23
|
+
# include Hatena::Html
|
24
|
+
# def tag_name
|
25
|
+
# "h#{@level}"
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# def content
|
29
|
+
# @inlines
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
module Html
|
34
|
+
def to_html
|
35
|
+
content = html_content
|
36
|
+
if content.class == Array then
|
37
|
+
content = content.map{|x| x.to_html}.join
|
38
|
+
end
|
39
|
+
%(<#{html_tag}>#{content}</#{html_tag}>)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
module Block
|
44
|
+
class Head
|
45
|
+
include Html
|
46
|
+
private
|
47
|
+
def html_tag
|
48
|
+
"h#{self.level}"
|
49
|
+
end
|
50
|
+
|
51
|
+
alias_method :html_content,:content
|
52
|
+
end
|
53
|
+
|
54
|
+
class P
|
55
|
+
include Html
|
56
|
+
private
|
57
|
+
def html_tag() 'p' end
|
58
|
+
|
59
|
+
alias_method :html_content,:content
|
60
|
+
end
|
61
|
+
|
62
|
+
class Empty
|
63
|
+
def to_html() '<p><br /></p>' end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Pre
|
67
|
+
include Html
|
68
|
+
private
|
69
|
+
def html_tag() 'pre' end
|
70
|
+
alias_method :html_content,:content
|
71
|
+
end
|
72
|
+
|
73
|
+
class SuperPre
|
74
|
+
include Html
|
75
|
+
private
|
76
|
+
def html_tag() 'pre' end
|
77
|
+
alias_method :html_content,:content
|
78
|
+
end
|
79
|
+
|
80
|
+
class Quote
|
81
|
+
include Html
|
82
|
+
private
|
83
|
+
def html_tag() 'blockquote' end
|
84
|
+
alias_method :html_content,:content
|
85
|
+
end
|
86
|
+
|
87
|
+
class Table
|
88
|
+
def to_html
|
89
|
+
'<table>'+self.map_row{|tr|
|
90
|
+
'<tr>'+tr.map{|cell| tag = cell.class == Th ? 'th' : 'td'
|
91
|
+
"<#{tag}>#{cell.content.map{|x| x.to_html}.join}</#{tag}>"}.join+'</tr>'
|
92
|
+
}.join+'</table>'
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class UnorderList
|
97
|
+
include Html
|
98
|
+
private
|
99
|
+
def html_tag
|
100
|
+
'ul'
|
101
|
+
end
|
102
|
+
alias_method :html_content,:items
|
103
|
+
end
|
104
|
+
|
105
|
+
class OrderList
|
106
|
+
include Html
|
107
|
+
private
|
108
|
+
def html_tag
|
109
|
+
'ol'
|
110
|
+
end
|
111
|
+
alias_method :html_content,:items
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
class ListItem
|
116
|
+
include Html
|
117
|
+
private
|
118
|
+
def html_tag
|
119
|
+
'li'
|
120
|
+
end
|
121
|
+
alias_method :html_content,:content
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
module Inline
|
126
|
+
class Text
|
127
|
+
def to_html
|
128
|
+
self.text
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
class Url
|
133
|
+
def to_html
|
134
|
+
%(<a href="#{self.url}">#{self.url}</a>)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class HatenaId
|
139
|
+
def to_html
|
140
|
+
%(<a href="http://d.hatena.ne.jp/#{self.name}/">id:#{self.name}</a>)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
Dir[File.dirname(__FILE__)+'/*.rb'].each{|x| require x}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
require 'hparser/inline/collectable'
|
5
|
+
|
6
|
+
module HParser
|
7
|
+
module Inline
|
8
|
+
# hatena id parser.
|
9
|
+
#
|
10
|
+
# For example:
|
11
|
+
# id:mzp
|
12
|
+
class HatenaId
|
13
|
+
include Collectable
|
14
|
+
attr_reader :name
|
15
|
+
def initialize(name)
|
16
|
+
@name = name
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse(scanner)
|
20
|
+
if scanner.scan(/id:\w+/) then
|
21
|
+
HatenaId.new scanner.matched[3..-1]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def ==(o)
|
26
|
+
self.class == o.class and @name == o.name
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'strscan'
|
6
|
+
require 'hparser/inline/collectable'
|
7
|
+
require 'hparser/util/parser'
|
8
|
+
module HParser
|
9
|
+
module Inline
|
10
|
+
class Parser
|
11
|
+
include Util
|
12
|
+
def initialize(parsers=Parser.default_parser)
|
13
|
+
@document = Many1.new(Or.new(*parsers))
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse str
|
17
|
+
scanner = StringScanner.new str
|
18
|
+
join_text @document.parse(scanner)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.default_parser
|
22
|
+
parser = []
|
23
|
+
ObjectSpace.each_object(Class){|klass|
|
24
|
+
if klass.include?(HParser::Inline::Collectable) then
|
25
|
+
parser.push klass
|
26
|
+
end
|
27
|
+
}
|
28
|
+
parser.sort{|a,b|
|
29
|
+
a <=> b or -(b <=> a).to_i
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def join_text(nodes)
|
35
|
+
if nodes.length == 1 then
|
36
|
+
nodes
|
37
|
+
else
|
38
|
+
rest = join_text nodes[1..-1]
|
39
|
+
if rest[0].class == Text and nodes[0].class == Text then
|
40
|
+
rest[1..-1].unshift(nodes[0]+rest[0])
|
41
|
+
else
|
42
|
+
rest.unshift nodes[0]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
require 'hparser/inline/collectable'
|
5
|
+
|
6
|
+
module HParser
|
7
|
+
module Inline
|
8
|
+
class Text
|
9
|
+
include Collectable
|
10
|
+
attr_reader :text
|
11
|
+
|
12
|
+
def self.<=>(o)
|
13
|
+
1
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse(scanner)
|
17
|
+
if scanner.scan(%r!<a.*</a>!) or scanner.scan(/./)
|
18
|
+
Text.new(scanner.matched)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(text)
|
23
|
+
@text = text
|
24
|
+
end
|
25
|
+
|
26
|
+
def +(other)
|
27
|
+
Text.new(self.text+other.text)
|
28
|
+
end
|
29
|
+
|
30
|
+
def ==(o)
|
31
|
+
o.class == self.class and @text == o.text
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|