hparser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,17 @@
1
+ =Hatena Format Parser
2
+ ==OverReview
3
+ +hparser+ is hatena format parser. This format is used at hatena diary(http://d.hatena.ne.jp/)
4
+ If you want to know more detail about hatena format, please see http://hatenadiary.g.hatena.ne.jp/keyword/%e3%81%af%e3%81%a6%e3%81%aa%e8%a8%98%e6%b3%95%e4%b8%80%e8%a6%a7
5
+
6
+ +hpaser+ is constructed with some little parser.(e.g. header parser,list parser,and ...)
7
+ So,+hparser+ can be added new format,or removed unused format.
8
+
9
+ ==Basic usage
10
+ To parse hatena format,please use Hatena::Parser.
11
+
12
+ require 'hatena/parser'
13
+ require 'hatena/block/all'
14
+ require 'hatena/inline/all'
15
+
16
+ parser = Hatena::Parser.new
17
+ puts parser.parse(some_text)
@@ -0,0 +1 @@
1
+ Dir[File.dirname(__FILE__)+'/*.rb'].each{|x| require x}
@@ -0,0 +1,7 @@
1
+ module HParser
2
+ module Block
3
+ # This is marker for Hatena::Parser.default_parser.
4
+ module Collectable
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,45 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ module HParser
7
+ module Block
8
+ # Header parser.
9
+ #
10
+ # Header is defiend as "a line which is start with '*'".
11
+ # And a number of '*' show that level.
12
+ #
13
+ # For example:
14
+ # * level1
15
+ # ** level2
16
+ # *** level3
17
+ class Head
18
+ include Collectable
19
+ def self.parse(scanner,inlines)
20
+ if scanner.scan(/\A\*/) then
21
+ level = 0
22
+ scanner.matched.each_byte{|c|
23
+ if c.chr == '*' then
24
+ level += 1
25
+ else
26
+ break
27
+ end
28
+ }
29
+ Head.new level,inlines.parse(scanner.matched[level..-1].strip)
30
+ end
31
+ end
32
+
33
+ attr_reader :level,:content
34
+ def initialize(level,content)
35
+ @level = level
36
+ @content = content
37
+ end
38
+
39
+ def ==(o)
40
+ o.class == self.class and o.level == self.level and
41
+ o.content == self.content
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,101 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ # This code should be rewrite.
5
+ # Ul and Ol is depend each other.
6
+
7
+ require 'hparser/block/collectable'
8
+ require 'hparser/util/parser'
9
+ module HParser
10
+ module Block
11
+ include HParser::Util
12
+ def self.make_list_parser(level,mark,&proc)
13
+ ProcParser.new{|scanner,inlines|
14
+ if level == 3 then
15
+ parser = Many1.new(Li.make_parser(level,mark))
16
+ else
17
+ parser = Many1.new(Or.new(UnorderList.make_parser(level+1),
18
+ OrderList.make_parser(level+1),
19
+ Li.make_parser(level,mark)))
20
+ end
21
+ list = parser.parse(scanner,inlines)
22
+
23
+ if list then
24
+ proc.call list
25
+ end
26
+ }
27
+ end
28
+
29
+ # This class undocumented.
30
+ # Maybe rewrite in near future.
31
+ class UnorderList
32
+ include Collectable
33
+ def self.parse(scanner,inlines)
34
+ Ul.make_parser(1).parse(scanner,inlines)
35
+ end
36
+
37
+ def self.make_parser(level)
38
+ Block.make_list_parser(level,'-'){|x| Ul.new(*x)}
39
+ end
40
+
41
+ attr_reader :items
42
+ def initialize(*items)
43
+ @items = items
44
+ end
45
+
46
+ def ==(o)
47
+ o.class == self.class and o.items == self.items
48
+ end
49
+ end
50
+
51
+ # This class undocumented.
52
+ # Maybe rewrite in near future.
53
+ class OrderList
54
+ include Collectable
55
+ def self.parse(scanner,inlines)
56
+ Ol.make_parser(1).parse(scanner,inlines)
57
+ end
58
+
59
+ def self.make_parser(level)
60
+ Block.make_list_parser(level,'+'){|x| Ol.new(*x) }
61
+ end
62
+
63
+ attr_reader :items
64
+ def initialize(*items)
65
+ @items = items
66
+ end
67
+
68
+ def ==(o)
69
+ o.class == self.class and o.items == self.items
70
+ end
71
+ end
72
+
73
+ # This class undocumented.
74
+ # Maybe rewrite in near future.
75
+ class ListItem
76
+ def self.make_parser(level,mark)
77
+ include HParser::Util
78
+ ProcParser.new{|scanner,inlines|
79
+ if scanner.scan(/\A#{Regexp.quote mark*level}.*/) then
80
+ ListItem.new inlines.parse(scanner.matched[level..-1].strip)
81
+ end
82
+ }
83
+ end
84
+
85
+ attr_reader :content
86
+ def initialize(content)
87
+ @content = content
88
+ end
89
+
90
+ def ==(o)
91
+ o.class==self.class and o.content == self.content
92
+ end
93
+ end
94
+
95
+ unless defined?(Ul)
96
+ Ul = UnorderList
97
+ Ol = OrderList
98
+ Li = ListItem
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,59 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ module HParser
7
+ module Block
8
+ # Blank line parser. This parser should be use with HParser::Block::P.
9
+ #
10
+ # This parser can parse blank line.
11
+ #
12
+ # For example:
13
+ # aaaa
14
+ # <blank>
15
+ # <blonk>
16
+ #
17
+ # First line and second line is parsed with HParser::Block::P. And
18
+ # third line is parsed with HParser::Block::Empty.
19
+ class Empty
20
+ include Collectable
21
+ def self.parse(scanner,inlines)
22
+ if scanner.scan('') then
23
+ Empty.new
24
+ end
25
+ end
26
+
27
+ def ==(o)
28
+ o.class == self.class
29
+ end
30
+ end
31
+
32
+ # Normal line parser.
33
+ #
34
+ # At hatena format, a line which is not parsed by any other parser is
35
+ # paragraph.
36
+ class P
37
+ include Collectable
38
+ attr_reader :content
39
+ def self.parse(scanner,inlines)
40
+ if scanner.scan(/./) then
41
+ P.new inlines.parse(scanner.matched)
42
+ end
43
+ end
44
+
45
+ def initialize(content)
46
+ @content = content
47
+ end
48
+
49
+ def ==(o)
50
+ self.class == o.class and self.content == o.content
51
+ end
52
+
53
+ def self.<=>(o)
54
+ # This parser should be last.
55
+ o.class == P ? nil : 1
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,52 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/util/parser'
6
+ module HParser
7
+ module Block
8
+ # Some formats have common structure.
9
+ #
10
+ # Qutoe is defined as
11
+ # >>quoted string<<
12
+ #
13
+ # Pre is defiend as
14
+ # >|plain text|<
15
+ #
16
+ # In short,some format is different in begining/ending string.
17
+ # So this class have basic structure for that format.
18
+ class Pair
19
+ # make parser by begin/end-ing string
20
+ def self.spliter(from,to)
21
+ from_q = Regexp.quote from
22
+ to_q = Regexp.quote to
23
+
24
+ module_eval <<-"END"
25
+ def self.parse(scanner,inlines)
26
+ if scanner.scan(/\\A#{from_q}(.*)#{to_q}\\Z/) then
27
+ content = scanner.matched
28
+ self.new inlines.parse(content[#{from.length}...-#{to.length}].strip)
29
+ elsif scanner.scan(/\\A#{from_q}/)
30
+ content = scanner.matched[#{from.length}..-1]
31
+ until scanner.scan(/#{to_q}\\Z/) do
32
+ content += "\n"+ scanner.scan(/./)
33
+ end
34
+ content += "\n"+scanner.matched[0...-#{to.length}]
35
+ self.new inlines.parse(content.strip)
36
+ end
37
+ end
38
+ END
39
+ end
40
+
41
+ attr_reader :content
42
+ def initialize(content)
43
+ @content = content
44
+ end
45
+
46
+ def ==(o)
47
+ self.class == o.class and self.content == o.content
48
+ end
49
+ end
50
+ end
51
+ end
52
+
@@ -0,0 +1,15 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ require 'hparser/block/pair'
7
+ module HParser
8
+ module Block
9
+ # Pre format.
10
+ class Pre < Pair
11
+ include Collectable
12
+ spliter '>|','|<'
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,11 @@
1
+ require 'hparser/block/pair'
2
+ require 'hparser/block/collectable'
3
+ module HParser
4
+ module Block
5
+ # Quote parser.
6
+ class Quote < Pair
7
+ include Collectable
8
+ spliter '>>','<<'
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,19 @@
1
+ require 'hparser/block/pair'
2
+ require 'hparser/block/collectable'
3
+ module HParser
4
+ module Block
5
+ # Super pre parser.
6
+ class SuperPre < Pair
7
+ include Collectable
8
+ spliter '>||','||<'
9
+
10
+ def self.<=>(o)
11
+ if Block.const_defined?(:Pre) and o == Pre then
12
+ 1
13
+ else
14
+ -1
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,71 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ module HParser
7
+ module Block
8
+ # Table parser.
9
+ class Table
10
+ attr_reader :rows
11
+ include Collectable
12
+ def self.parse(scanner,inlines)
13
+ rows = []
14
+ while scanner.scan(/\A\|/)
15
+ rows.push scanner.matched[1..-1].split('|').map{|label|
16
+ if label[0].chr == '*' then
17
+ Th.new inlines.parse(label[1..-1].strip)
18
+ else
19
+ Td.new inlines.parse(label.strip)
20
+ end
21
+ }
22
+ end
23
+ rows == [] ? nil : Table.new(*rows)
24
+ end
25
+
26
+ def initialize(*rows)
27
+ @rows = rows
28
+ end
29
+
30
+ def ==(o)
31
+ o.class == self.class and o.rows == self.rows
32
+ end
33
+
34
+ def map_row(&f) # :yield: tr
35
+ @rows.map(&f)
36
+ end
37
+
38
+ def each_row(&f) # :yield: tr
39
+ @row.each(&f)
40
+ end
41
+ end
42
+
43
+ class TableHeader
44
+ attr_reader :content
45
+ def initialize(content)
46
+ @content = content
47
+ end
48
+
49
+ def ==(o)
50
+ o.class == self.class and o.content == self.content
51
+ end
52
+ end
53
+
54
+ class TableCell
55
+ attr_reader :content
56
+ def initialize(content)
57
+ @content = content
58
+ end
59
+
60
+ def ==(o)
61
+ o.class == self.class and o.content == self.content
62
+ end
63
+ end
64
+
65
+ unless defined?(Th)
66
+ Th = TableHeader
67
+ Td = TableCell
68
+ end
69
+ end
70
+ end
71
+
@@ -0,0 +1,144 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ # This file define +to_html+. +to_html+ is convert hatena format to html.
5
+ #
6
+
7
+ module HParser
8
+ # This module provide +to_html+ method.
9
+ # This method is intended to convert hatena format to html format.
10
+ #
11
+ # For example:
12
+ # Hatena::Parser.parse('*foo').to_html # -> <h1>foo</h1>
13
+ # Hatena::Parser.parse('>|bar|<').to_html # -> <pre>bar</pre>
14
+ #
15
+ # A class including this module shold implement 2 methods,+html_tag+ and
16
+ # +html_content+. Obviously,+html_tag+ provid using html tag name.
17
+ # +html_content+ is provid that content.
18
+ # If content is +Arary+,each elements convert to html by
19
+ # +to_html+. Otherwise,using as it self.
20
+ #
21
+ # For example,Head implements is following:
22
+ # class Hatena::Block::Head
23
+ # include Hatena::Html
24
+ # def tag_name
25
+ # "h#{@level}"
26
+ # end
27
+ #
28
+ # def content
29
+ # @inlines
30
+ # end
31
+ # end
32
+ #
33
+ module Html
34
+ def to_html
35
+ content = html_content
36
+ if content.class == Array then
37
+ content = content.map{|x| x.to_html}.join
38
+ end
39
+ %(<#{html_tag}>#{content}</#{html_tag}>)
40
+ end
41
+ end
42
+
43
+ module Block
44
+ class Head
45
+ include Html
46
+ private
47
+ def html_tag
48
+ "h#{self.level}"
49
+ end
50
+
51
+ alias_method :html_content,:content
52
+ end
53
+
54
+ class P
55
+ include Html
56
+ private
57
+ def html_tag() 'p' end
58
+
59
+ alias_method :html_content,:content
60
+ end
61
+
62
+ class Empty
63
+ def to_html() '<p><br /></p>' end
64
+ end
65
+
66
+ class Pre
67
+ include Html
68
+ private
69
+ def html_tag() 'pre' end
70
+ alias_method :html_content,:content
71
+ end
72
+
73
+ class SuperPre
74
+ include Html
75
+ private
76
+ def html_tag() 'pre' end
77
+ alias_method :html_content,:content
78
+ end
79
+
80
+ class Quote
81
+ include Html
82
+ private
83
+ def html_tag() 'blockquote' end
84
+ alias_method :html_content,:content
85
+ end
86
+
87
+ class Table
88
+ def to_html
89
+ '<table>'+self.map_row{|tr|
90
+ '<tr>'+tr.map{|cell| tag = cell.class == Th ? 'th' : 'td'
91
+ "<#{tag}>#{cell.content.map{|x| x.to_html}.join}</#{tag}>"}.join+'</tr>'
92
+ }.join+'</table>'
93
+ end
94
+ end
95
+
96
+ class UnorderList
97
+ include Html
98
+ private
99
+ def html_tag
100
+ 'ul'
101
+ end
102
+ alias_method :html_content,:items
103
+ end
104
+
105
+ class OrderList
106
+ include Html
107
+ private
108
+ def html_tag
109
+ 'ol'
110
+ end
111
+ alias_method :html_content,:items
112
+ end
113
+
114
+
115
+ class ListItem
116
+ include Html
117
+ private
118
+ def html_tag
119
+ 'li'
120
+ end
121
+ alias_method :html_content,:content
122
+ end
123
+ end
124
+
125
+ module Inline
126
+ class Text
127
+ def to_html
128
+ self.text
129
+ end
130
+ end
131
+
132
+ class Url
133
+ def to_html
134
+ %(<a href="#{self.url}">#{self.url}</a>)
135
+ end
136
+ end
137
+
138
+ class HatenaId
139
+ def to_html
140
+ %(<a href="http://d.hatena.ne.jp/#{self.name}/">id:#{self.name}</a>)
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1 @@
1
+ Dir[File.dirname(__FILE__)+'/*.rb'].each{|x| require x}
@@ -0,0 +1,6 @@
1
+ module HParser
2
+ module Inline
3
+ module Collectable
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,30 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ require 'hparser/inline/collectable'
5
+
6
+ module HParser
7
+ module Inline
8
+ # hatena id parser.
9
+ #
10
+ # For example:
11
+ # id:mzp
12
+ class HatenaId
13
+ include Collectable
14
+ attr_reader :name
15
+ def initialize(name)
16
+ @name = name
17
+ end
18
+
19
+ def self.parse(scanner)
20
+ if scanner.scan(/id:\w+/) then
21
+ HatenaId.new scanner.matched[3..-1]
22
+ end
23
+ end
24
+
25
+ def ==(o)
26
+ self.class == o.class and @name == o.name
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,48 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'strscan'
6
+ require 'hparser/inline/collectable'
7
+ require 'hparser/util/parser'
8
+ module HParser
9
+ module Inline
10
+ class Parser
11
+ include Util
12
+ def initialize(parsers=Parser.default_parser)
13
+ @document = Many1.new(Or.new(*parsers))
14
+ end
15
+
16
+ def parse str
17
+ scanner = StringScanner.new str
18
+ join_text @document.parse(scanner)
19
+ end
20
+
21
+ def self.default_parser
22
+ parser = []
23
+ ObjectSpace.each_object(Class){|klass|
24
+ if klass.include?(HParser::Inline::Collectable) then
25
+ parser.push klass
26
+ end
27
+ }
28
+ parser.sort{|a,b|
29
+ a <=> b or -(b <=> a).to_i
30
+ }
31
+ end
32
+
33
+ private
34
+ def join_text(nodes)
35
+ if nodes.length == 1 then
36
+ nodes
37
+ else
38
+ rest = join_text nodes[1..-1]
39
+ if rest[0].class == Text and nodes[0].class == Text then
40
+ rest[1..-1].unshift(nodes[0]+rest[0])
41
+ else
42
+ rest.unshift nodes[0]
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,35 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ require 'hparser/inline/collectable'
5
+
6
+ module HParser
7
+ module Inline
8
+ class Text
9
+ include Collectable
10
+ attr_reader :text
11
+
12
+ def self.<=>(o)
13
+ 1
14
+ end
15
+
16
+ def self.parse(scanner)
17
+ if scanner.scan(%r!<a.*</a>!) or scanner.scan(/./)
18
+ Text.new(scanner.matched)
19
+ end
20
+ end
21
+
22
+ def initialize(text)
23
+ @text = text
24
+ end
25
+
26
+ def +(other)
27
+ Text.new(self.text+other.text)
28
+ end
29
+
30
+ def ==(o)
31
+ o.class == self.class and @text == o.text
32
+ end
33
+ end
34
+ end
35
+ end