hparser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,17 @@
1
+ =Hatena Format Parser
2
+ ==OverReview
3
+ +hparser+ is hatena format parser. This format is used at hatena diary(http://d.hatena.ne.jp/)
4
+ If you want to know more detail about hatena format, please see http://hatenadiary.g.hatena.ne.jp/keyword/%e3%81%af%e3%81%a6%e3%81%aa%e8%a8%98%e6%b3%95%e4%b8%80%e8%a6%a7
5
+
6
+ +hpaser+ is constructed with some little parser.(e.g. header parser,list parser,and ...)
7
+ So,+hparser+ can be added new format,or removed unused format.
8
+
9
+ ==Basic usage
10
+ To parse hatena format,please use Hatena::Parser.
11
+
12
+ require 'hatena/parser'
13
+ require 'hatena/block/all'
14
+ require 'hatena/inline/all'
15
+
16
+ parser = Hatena::Parser.new
17
+ puts parser.parse(some_text)
@@ -0,0 +1 @@
1
+ Dir[File.dirname(__FILE__)+'/*.rb'].each{|x| require x}
@@ -0,0 +1,7 @@
1
+ module HParser
2
+ module Block
3
+ # This is marker for Hatena::Parser.default_parser.
4
+ module Collectable
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,45 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ module HParser
7
+ module Block
8
+ # Header parser.
9
+ #
10
+ # Header is defiend as "a line which is start with '*'".
11
+ # And a number of '*' show that level.
12
+ #
13
+ # For example:
14
+ # * level1
15
+ # ** level2
16
+ # *** level3
17
+ class Head
18
+ include Collectable
19
+ def self.parse(scanner,inlines)
20
+ if scanner.scan(/\A\*/) then
21
+ level = 0
22
+ scanner.matched.each_byte{|c|
23
+ if c.chr == '*' then
24
+ level += 1
25
+ else
26
+ break
27
+ end
28
+ }
29
+ Head.new level,inlines.parse(scanner.matched[level..-1].strip)
30
+ end
31
+ end
32
+
33
+ attr_reader :level,:content
34
+ def initialize(level,content)
35
+ @level = level
36
+ @content = content
37
+ end
38
+
39
+ def ==(o)
40
+ o.class == self.class and o.level == self.level and
41
+ o.content == self.content
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,101 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ # This code should be rewrite.
5
+ # Ul and Ol is depend each other.
6
+
7
+ require 'hparser/block/collectable'
8
+ require 'hparser/util/parser'
9
+ module HParser
10
+ module Block
11
+ include HParser::Util
12
+ def self.make_list_parser(level,mark,&proc)
13
+ ProcParser.new{|scanner,inlines|
14
+ if level == 3 then
15
+ parser = Many1.new(Li.make_parser(level,mark))
16
+ else
17
+ parser = Many1.new(Or.new(UnorderList.make_parser(level+1),
18
+ OrderList.make_parser(level+1),
19
+ Li.make_parser(level,mark)))
20
+ end
21
+ list = parser.parse(scanner,inlines)
22
+
23
+ if list then
24
+ proc.call list
25
+ end
26
+ }
27
+ end
28
+
29
+ # This class undocumented.
30
+ # Maybe rewrite in near future.
31
+ class UnorderList
32
+ include Collectable
33
+ def self.parse(scanner,inlines)
34
+ Ul.make_parser(1).parse(scanner,inlines)
35
+ end
36
+
37
+ def self.make_parser(level)
38
+ Block.make_list_parser(level,'-'){|x| Ul.new(*x)}
39
+ end
40
+
41
+ attr_reader :items
42
+ def initialize(*items)
43
+ @items = items
44
+ end
45
+
46
+ def ==(o)
47
+ o.class == self.class and o.items == self.items
48
+ end
49
+ end
50
+
51
+ # This class undocumented.
52
+ # Maybe rewrite in near future.
53
+ class OrderList
54
+ include Collectable
55
+ def self.parse(scanner,inlines)
56
+ Ol.make_parser(1).parse(scanner,inlines)
57
+ end
58
+
59
+ def self.make_parser(level)
60
+ Block.make_list_parser(level,'+'){|x| Ol.new(*x) }
61
+ end
62
+
63
+ attr_reader :items
64
+ def initialize(*items)
65
+ @items = items
66
+ end
67
+
68
+ def ==(o)
69
+ o.class == self.class and o.items == self.items
70
+ end
71
+ end
72
+
73
+ # This class undocumented.
74
+ # Maybe rewrite in near future.
75
+ class ListItem
76
+ def self.make_parser(level,mark)
77
+ include HParser::Util
78
+ ProcParser.new{|scanner,inlines|
79
+ if scanner.scan(/\A#{Regexp.quote mark*level}.*/) then
80
+ ListItem.new inlines.parse(scanner.matched[level..-1].strip)
81
+ end
82
+ }
83
+ end
84
+
85
+ attr_reader :content
86
+ def initialize(content)
87
+ @content = content
88
+ end
89
+
90
+ def ==(o)
91
+ o.class==self.class and o.content == self.content
92
+ end
93
+ end
94
+
95
+ unless defined?(Ul)
96
+ Ul = UnorderList
97
+ Ol = OrderList
98
+ Li = ListItem
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,59 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ module HParser
7
+ module Block
8
+ # Blank line parser. This parser should be use with HParser::Block::P.
9
+ #
10
+ # This parser can parse blank line.
11
+ #
12
+ # For example:
13
+ # aaaa
14
+ # <blank>
15
+ # <blonk>
16
+ #
17
+ # First line and second line is parsed with HParser::Block::P. And
18
+ # third line is parsed with HParser::Block::Empty.
19
+ class Empty
20
+ include Collectable
21
+ def self.parse(scanner,inlines)
22
+ if scanner.scan('') then
23
+ Empty.new
24
+ end
25
+ end
26
+
27
+ def ==(o)
28
+ o.class == self.class
29
+ end
30
+ end
31
+
32
+ # Normal line parser.
33
+ #
34
+ # At hatena format, a line which is not parsed by any other parser is
35
+ # paragraph.
36
+ class P
37
+ include Collectable
38
+ attr_reader :content
39
+ def self.parse(scanner,inlines)
40
+ if scanner.scan(/./) then
41
+ P.new inlines.parse(scanner.matched)
42
+ end
43
+ end
44
+
45
+ def initialize(content)
46
+ @content = content
47
+ end
48
+
49
+ def ==(o)
50
+ self.class == o.class and self.content == o.content
51
+ end
52
+
53
+ def self.<=>(o)
54
+ # This parser should be last.
55
+ o.class == P ? nil : 1
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,52 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/util/parser'
6
+ module HParser
7
+ module Block
8
+ # Some formats have common structure.
9
+ #
10
+ # Qutoe is defined as
11
+ # >>quoted string<<
12
+ #
13
+ # Pre is defiend as
14
+ # >|plain text|<
15
+ #
16
+ # In short,some format is different in begining/ending string.
17
+ # So this class have basic structure for that format.
18
+ class Pair
19
+ # make parser by begin/end-ing string
20
+ def self.spliter(from,to)
21
+ from_q = Regexp.quote from
22
+ to_q = Regexp.quote to
23
+
24
+ module_eval <<-"END"
25
+ def self.parse(scanner,inlines)
26
+ if scanner.scan(/\\A#{from_q}(.*)#{to_q}\\Z/) then
27
+ content = scanner.matched
28
+ self.new inlines.parse(content[#{from.length}...-#{to.length}].strip)
29
+ elsif scanner.scan(/\\A#{from_q}/)
30
+ content = scanner.matched[#{from.length}..-1]
31
+ until scanner.scan(/#{to_q}\\Z/) do
32
+ content += "\n"+ scanner.scan(/./)
33
+ end
34
+ content += "\n"+scanner.matched[0...-#{to.length}]
35
+ self.new inlines.parse(content.strip)
36
+ end
37
+ end
38
+ END
39
+ end
40
+
41
+ attr_reader :content
42
+ def initialize(content)
43
+ @content = content
44
+ end
45
+
46
+ def ==(o)
47
+ self.class == o.class and self.content == o.content
48
+ end
49
+ end
50
+ end
51
+ end
52
+
@@ -0,0 +1,15 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ require 'hparser/block/pair'
7
+ module HParser
8
+ module Block
9
+ # Pre format.
10
+ class Pre < Pair
11
+ include Collectable
12
+ spliter '>|','|<'
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,11 @@
1
+ require 'hparser/block/pair'
2
+ require 'hparser/block/collectable'
3
+ module HParser
4
+ module Block
5
+ # Quote parser.
6
+ class Quote < Pair
7
+ include Collectable
8
+ spliter '>>','<<'
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,19 @@
1
+ require 'hparser/block/pair'
2
+ require 'hparser/block/collectable'
3
+ module HParser
4
+ module Block
5
+ # Super pre parser.
6
+ class SuperPre < Pair
7
+ include Collectable
8
+ spliter '>||','||<'
9
+
10
+ def self.<=>(o)
11
+ if Block.const_defined?(:Pre) and o == Pre then
12
+ 1
13
+ else
14
+ -1
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,71 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ module HParser
7
+ module Block
8
+ # Table parser.
9
+ class Table
10
+ attr_reader :rows
11
+ include Collectable
12
+ def self.parse(scanner,inlines)
13
+ rows = []
14
+ while scanner.scan(/\A\|/)
15
+ rows.push scanner.matched[1..-1].split('|').map{|label|
16
+ if label[0].chr == '*' then
17
+ Th.new inlines.parse(label[1..-1].strip)
18
+ else
19
+ Td.new inlines.parse(label.strip)
20
+ end
21
+ }
22
+ end
23
+ rows == [] ? nil : Table.new(*rows)
24
+ end
25
+
26
+ def initialize(*rows)
27
+ @rows = rows
28
+ end
29
+
30
+ def ==(o)
31
+ o.class == self.class and o.rows == self.rows
32
+ end
33
+
34
+ def map_row(&f) # :yield: tr
35
+ @rows.map(&f)
36
+ end
37
+
38
+ def each_row(&f) # :yield: tr
39
+ @row.each(&f)
40
+ end
41
+ end
42
+
43
+ class TableHeader
44
+ attr_reader :content
45
+ def initialize(content)
46
+ @content = content
47
+ end
48
+
49
+ def ==(o)
50
+ o.class == self.class and o.content == self.content
51
+ end
52
+ end
53
+
54
+ class TableCell
55
+ attr_reader :content
56
+ def initialize(content)
57
+ @content = content
58
+ end
59
+
60
+ def ==(o)
61
+ o.class == self.class and o.content == self.content
62
+ end
63
+ end
64
+
65
+ unless defined?(Th)
66
+ Th = TableHeader
67
+ Td = TableCell
68
+ end
69
+ end
70
+ end
71
+
@@ -0,0 +1,144 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ # This file define +to_html+. +to_html+ is convert hatena format to html.
5
+ #
6
+
7
+ module HParser
8
+ # This module provide +to_html+ method.
9
+ # This method is intended to convert hatena format to html format.
10
+ #
11
+ # For example:
12
+ # Hatena::Parser.parse('*foo').to_html # -> <h1>foo</h1>
13
+ # Hatena::Parser.parse('>|bar|<').to_html # -> <pre>bar</pre>
14
+ #
15
+ # A class including this module shold implement 2 methods,+html_tag+ and
16
+ # +html_content+. Obviously,+html_tag+ provid using html tag name.
17
+ # +html_content+ is provid that content.
18
+ # If content is +Arary+,each elements convert to html by
19
+ # +to_html+. Otherwise,using as it self.
20
+ #
21
+ # For example,Head implements is following:
22
+ # class Hatena::Block::Head
23
+ # include Hatena::Html
24
+ # def tag_name
25
+ # "h#{@level}"
26
+ # end
27
+ #
28
+ # def content
29
+ # @inlines
30
+ # end
31
+ # end
32
+ #
33
+ module Html
34
+ def to_html
35
+ content = html_content
36
+ if content.class == Array then
37
+ content = content.map{|x| x.to_html}.join
38
+ end
39
+ %(<#{html_tag}>#{content}</#{html_tag}>)
40
+ end
41
+ end
42
+
43
+ module Block
44
+ class Head
45
+ include Html
46
+ private
47
+ def html_tag
48
+ "h#{self.level}"
49
+ end
50
+
51
+ alias_method :html_content,:content
52
+ end
53
+
54
+ class P
55
+ include Html
56
+ private
57
+ def html_tag() 'p' end
58
+
59
+ alias_method :html_content,:content
60
+ end
61
+
62
+ class Empty
63
+ def to_html() '<p><br /></p>' end
64
+ end
65
+
66
+ class Pre
67
+ include Html
68
+ private
69
+ def html_tag() 'pre' end
70
+ alias_method :html_content,:content
71
+ end
72
+
73
+ class SuperPre
74
+ include Html
75
+ private
76
+ def html_tag() 'pre' end
77
+ alias_method :html_content,:content
78
+ end
79
+
80
+ class Quote
81
+ include Html
82
+ private
83
+ def html_tag() 'blockquote' end
84
+ alias_method :html_content,:content
85
+ end
86
+
87
+ class Table
88
+ def to_html
89
+ '<table>'+self.map_row{|tr|
90
+ '<tr>'+tr.map{|cell| tag = cell.class == Th ? 'th' : 'td'
91
+ "<#{tag}>#{cell.content.map{|x| x.to_html}.join}</#{tag}>"}.join+'</tr>'
92
+ }.join+'</table>'
93
+ end
94
+ end
95
+
96
+ class UnorderList
97
+ include Html
98
+ private
99
+ def html_tag
100
+ 'ul'
101
+ end
102
+ alias_method :html_content,:items
103
+ end
104
+
105
+ class OrderList
106
+ include Html
107
+ private
108
+ def html_tag
109
+ 'ol'
110
+ end
111
+ alias_method :html_content,:items
112
+ end
113
+
114
+
115
+ class ListItem
116
+ include Html
117
+ private
118
+ def html_tag
119
+ 'li'
120
+ end
121
+ alias_method :html_content,:content
122
+ end
123
+ end
124
+
125
+ module Inline
126
+ class Text
127
+ def to_html
128
+ self.text
129
+ end
130
+ end
131
+
132
+ class Url
133
+ def to_html
134
+ %(<a href="#{self.url}">#{self.url}</a>)
135
+ end
136
+ end
137
+
138
+ class HatenaId
139
+ def to_html
140
+ %(<a href="http://d.hatena.ne.jp/#{self.name}/">id:#{self.name}</a>)
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1 @@
1
+ Dir[File.dirname(__FILE__)+'/*.rb'].each{|x| require x}
@@ -0,0 +1,6 @@
1
+ module HParser
2
+ module Inline
3
+ module Collectable
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,30 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ require 'hparser/inline/collectable'
5
+
6
+ module HParser
7
+ module Inline
8
+ # hatena id parser.
9
+ #
10
+ # For example:
11
+ # id:mzp
12
+ class HatenaId
13
+ include Collectable
14
+ attr_reader :name
15
+ def initialize(name)
16
+ @name = name
17
+ end
18
+
19
+ def self.parse(scanner)
20
+ if scanner.scan(/id:\w+/) then
21
+ HatenaId.new scanner.matched[3..-1]
22
+ end
23
+ end
24
+
25
+ def ==(o)
26
+ self.class == o.class and @name == o.name
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,48 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'strscan'
6
+ require 'hparser/inline/collectable'
7
+ require 'hparser/util/parser'
8
+ module HParser
9
+ module Inline
10
+ class Parser
11
+ include Util
12
+ def initialize(parsers=Parser.default_parser)
13
+ @document = Many1.new(Or.new(*parsers))
14
+ end
15
+
16
+ def parse str
17
+ scanner = StringScanner.new str
18
+ join_text @document.parse(scanner)
19
+ end
20
+
21
+ def self.default_parser
22
+ parser = []
23
+ ObjectSpace.each_object(Class){|klass|
24
+ if klass.include?(HParser::Inline::Collectable) then
25
+ parser.push klass
26
+ end
27
+ }
28
+ parser.sort{|a,b|
29
+ a <=> b or -(b <=> a).to_i
30
+ }
31
+ end
32
+
33
+ private
34
+ def join_text(nodes)
35
+ if nodes.length == 1 then
36
+ nodes
37
+ else
38
+ rest = join_text nodes[1..-1]
39
+ if rest[0].class == Text and nodes[0].class == Text then
40
+ rest[1..-1].unshift(nodes[0]+rest[0])
41
+ else
42
+ rest.unshift nodes[0]
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,35 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+ require 'hparser/inline/collectable'
5
+
6
+ module HParser
7
+ module Inline
8
+ class Text
9
+ include Collectable
10
+ attr_reader :text
11
+
12
+ def self.<=>(o)
13
+ 1
14
+ end
15
+
16
+ def self.parse(scanner)
17
+ if scanner.scan(%r!<a.*</a>!) or scanner.scan(/./)
18
+ Text.new(scanner.matched)
19
+ end
20
+ end
21
+
22
+ def initialize(text)
23
+ @text = text
24
+ end
25
+
26
+ def +(other)
27
+ Text.new(self.text+other.text)
28
+ end
29
+
30
+ def ==(o)
31
+ o.class == self.class and @text == o.text
32
+ end
33
+ end
34
+ end
35
+ end