hparser 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/.travis.yml +11 -0
- data/ChangeLog +4 -0
- data/Gemfile +3 -0
- data/README.md +40 -0
- data/Rakefile +12 -0
- data/VERSION +1 -0
- data/hparser.gemspec +21 -0
- data/lib/hparser/block/dl.rb +4 -4
- data/lib/hparser/block/footnote_list.rb +19 -0
- data/lib/hparser/block/head.rb +2 -2
- data/lib/hparser/block/list.rb +8 -8
- data/lib/hparser/block/p.rb +4 -3
- data/lib/hparser/block/pair.rb +12 -7
- data/lib/hparser/block/quote.rb +32 -2
- data/lib/hparser/block/raw.rb +34 -0
- data/lib/hparser/block/see_more.rb +31 -0
- data/lib/hparser/block/super_pre.rb +21 -3
- data/lib/hparser/block/table.rb +4 -4
- data/lib/hparser/hatena.rb +3 -1
- data/lib/hparser/html.rb +181 -13
- data/lib/hparser/inline/comment.rb +27 -0
- data/lib/hparser/inline/footnote.rb +34 -0
- data/lib/hparser/inline/fotolife.rb +40 -0
- data/lib/hparser/inline/hatena_id.rb +7 -6
- data/lib/hparser/inline/parser.rb +3 -2
- data/lib/hparser/inline/tex.rb +27 -0
- data/lib/hparser/inline/text.rb +3 -2
- data/lib/hparser/inline/url.rb +20 -6
- data/lib/hparser/latex.rb +273 -0
- data/lib/hparser/parser.rb +17 -1
- data/lib/hparser/text.rb +42 -0
- data/lib/hparser/util/line_scanner.rb +3 -2
- data/lib/hparser.rb +1 -0
- data/test/integration_texts/error1.ok.hatena +23 -0
- data/test/test_block.rb +65 -2
- data/test/test_bruteforce.rb +48 -0
- data/test/test_dl.rb +13 -1
- data/test/test_footnote.rb +42 -0
- data/test/test_fotolife.rb +29 -0
- data/test/test_from_perl/01_module.t +559 -0
- data/test/test_from_perl/02_module_extend.t +36 -0
- data/test/test_from_perl/10_autolink.t +78 -0
- data/test/test_from_perl/11_autolink_extend.t +43 -0
- data/test/test_hatena.rb +2 -2
- data/test/test_head.rb +7 -1
- data/test/test_helper.rb +11 -0
- data/test/test_html.rb +39 -3
- data/test/test_id.rb +1 -1
- data/test/test_inline.rb +13 -1
- data/test/test_inline_html.rb +37 -2
- data/test/test_integration.rb +20 -0
- data/test/test_latex.rb +101 -0
- data/test/test_p.rb +23 -3
- data/test/test_pair.rb +22 -4
- data/test/test_quote.rb +69 -0
- data/test/test_see_more.rb +28 -0
- data/test/test_table.rb +1 -1
- data/test/test_tex.rb +24 -0
- data/test/test_text.rb +12 -2
- data/test/test_url.rb +39 -2
- metadata +141 -58
- data/README +0 -17
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/ChangeLog
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
=Hatena Format Parser
|
2
|
+
|
3
|
+
== Description
|
4
|
+
|
5
|
+
+hparser+ is hatena format parser. This format is used at hatena diary(http://d.hatena.ne.jp/)
|
6
|
+
If you want to know more detail about hatena format, please see http://hatenadiary.g.hatena.ne.jp/keyword/%e3%81%af%e3%81%a6%e3%81%aa%e8%a8%98%e6%b3%95%e4%b8%80%e8%a6%a7
|
7
|
+
|
8
|
+
+hpaser+ is constructed with some little parser.(e.g. header parser,list parser,and ...)
|
9
|
+
So,+hparser+ can be added new format,or removed unused format.
|
10
|
+
|
11
|
+
== Installation
|
12
|
+
|
13
|
+
=== Archive Installation
|
14
|
+
|
15
|
+
rake install
|
16
|
+
|
17
|
+
=== Gem Installation
|
18
|
+
|
19
|
+
gem install hotchpotch-hparser
|
20
|
+
|
21
|
+
|
22
|
+
== Features/Problems
|
23
|
+
|
24
|
+
|
25
|
+
== Synopsis
|
26
|
+
|
27
|
+
To parse hatena format,please use HParser::Parser.
|
28
|
+
|
29
|
+
require 'hparser'
|
30
|
+
|
31
|
+
parser = HParser::Parser.new
|
32
|
+
puts parser.parse(hatena_syntax).map {|e| e.to_html }.join("\n")
|
33
|
+
|
34
|
+
|
35
|
+
== Copyright
|
36
|
+
|
37
|
+
Author:: HIROKI Mizuno(Original Author), Yuichi Tateno<hotchpotch@nospam@gmail.com>, Nitoyon
|
38
|
+
Copyright:: HIROKI Mizuno, Yuichi Tateno
|
39
|
+
License:: Ruby's
|
40
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
Rake::TestTask.new(:test) do |test|
|
7
|
+
test.libs << 'lib' << 'test'
|
8
|
+
test.test_files = FileList['test/*.rb'] + FileList['test/*/*.rb']
|
9
|
+
test.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => [:test]
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.4.0
|
data/hparser.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.name = "hparser"
|
6
|
+
gem.description = "Hatena Syntax parser for Ruby"
|
7
|
+
gem.homepage = "https://github.com/hotchpotch/hparser"
|
8
|
+
gem.summary = gem.description
|
9
|
+
gem.version = File.read("VERSION").strip
|
10
|
+
gem.authors = ["HIROKI Mizuno", "Yuichi Tateno", "Nitoyon"]
|
11
|
+
gem.email = ""
|
12
|
+
gem.has_rdoc = false
|
13
|
+
gem.files = `git ls-files`.split("\n")
|
14
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
gem.require_paths = ['lib']
|
17
|
+
|
18
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
19
|
+
gem.add_development_dependency "pry"
|
20
|
+
end
|
21
|
+
|
data/lib/hparser/block/dl.rb
CHANGED
@@ -21,12 +21,12 @@ module HParser
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.parse(scanner,inlines)
|
24
|
+
def self.parse(scanner,context,inlines)
|
25
25
|
items = []
|
26
|
-
while scanner.scan(/\A
|
26
|
+
while scanner.scan(/\A:((?:<[^>]+>|\[[^\]]+\]|[^:])+):(.+)/)
|
27
27
|
i = scanner.matched.index(':',1)
|
28
|
-
title = inlines.parse scanner.
|
29
|
-
description = inlines.parse scanner.
|
28
|
+
title = inlines.parse scanner.matched_pattern[1], context
|
29
|
+
description = inlines.parse scanner.matched_pattern[2], context
|
30
30
|
items.push Item.new(title,description)
|
31
31
|
end
|
32
32
|
items == [] ? nil : self.new(*items)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
module HParser
|
6
|
+
module Block
|
7
|
+
class FootnoteList
|
8
|
+
attr_reader :footnotes
|
9
|
+
|
10
|
+
def initialize(footnotes)
|
11
|
+
@footnotes = footnotes
|
12
|
+
end
|
13
|
+
|
14
|
+
def ==(o)
|
15
|
+
self.class == o.class and self.footnotes == o.footnotes
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/hparser/block/head.rb
CHANGED
@@ -16,7 +16,7 @@ module HParser
|
|
16
16
|
# *** level3
|
17
17
|
class Head
|
18
18
|
include Collectable
|
19
|
-
def self.parse(scanner,inlines)
|
19
|
+
def self.parse(scanner,context,inlines)
|
20
20
|
if scanner.scan(/\A\*/) then
|
21
21
|
level = 0
|
22
22
|
scanner.matched.each_byte{|c|
|
@@ -26,7 +26,7 @@ module HParser
|
|
26
26
|
break
|
27
27
|
end
|
28
28
|
}
|
29
|
-
Head.new level,inlines.parse(scanner.matched[level..-1].strip)
|
29
|
+
Head.new level,inlines.parse(scanner.matched[level..-1].strip, context)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
data/lib/hparser/block/list.rb
CHANGED
@@ -10,7 +10,7 @@ module HParser
|
|
10
10
|
module Block
|
11
11
|
include HParser::Util
|
12
12
|
def self.make_list_parser(level,mark,&proc)
|
13
|
-
ProcParser.new{|scanner,inlines|
|
13
|
+
ProcParser.new{|scanner,context,inlines|
|
14
14
|
if level == 3 then
|
15
15
|
parser = Many1.new(Li.make_parser(level,mark))
|
16
16
|
else
|
@@ -18,7 +18,7 @@ module HParser
|
|
18
18
|
OrderList.make_parser(level+1),
|
19
19
|
Li.make_parser(level,mark)))
|
20
20
|
end
|
21
|
-
list = parser.parse(scanner,inlines)
|
21
|
+
list = parser.parse(scanner,context,inlines)
|
22
22
|
|
23
23
|
if list then
|
24
24
|
proc.call list
|
@@ -30,8 +30,8 @@ module HParser
|
|
30
30
|
# Maybe rewrite in near future.
|
31
31
|
class UnorderList
|
32
32
|
include Collectable
|
33
|
-
def self.parse(scanner,inlines)
|
34
|
-
Ul.make_parser(1).parse(scanner,inlines)
|
33
|
+
def self.parse(scanner,context,inlines)
|
34
|
+
Ul.make_parser(1).parse(scanner,context,inlines)
|
35
35
|
end
|
36
36
|
|
37
37
|
def self.make_parser(level)
|
@@ -52,8 +52,8 @@ module HParser
|
|
52
52
|
# Maybe rewrite in near future.
|
53
53
|
class OrderList
|
54
54
|
include Collectable
|
55
|
-
def self.parse(scanner,inlines)
|
56
|
-
Ol.make_parser(1).parse(scanner,inlines)
|
55
|
+
def self.parse(scanner,context,inlines)
|
56
|
+
Ol.make_parser(1).parse(scanner,context,inlines)
|
57
57
|
end
|
58
58
|
|
59
59
|
def self.make_parser(level)
|
@@ -75,9 +75,9 @@ module HParser
|
|
75
75
|
class ListItem
|
76
76
|
def self.make_parser(level,mark)
|
77
77
|
include HParser::Util
|
78
|
-
ProcParser.new{|scanner,inlines|
|
78
|
+
ProcParser.new{|scanner,context,inlines|
|
79
79
|
if scanner.scan(/\A#{Regexp.quote mark*level}.*/) then
|
80
|
-
ListItem.new inlines.parse(scanner.matched[level..-1].strip)
|
80
|
+
ListItem.new inlines.parse(scanner.matched[level..-1].strip, context)
|
81
81
|
end
|
82
82
|
}
|
83
83
|
end
|
data/lib/hparser/block/p.rb
CHANGED
@@ -18,7 +18,7 @@ module HParser
|
|
18
18
|
# third line is parsed with HParser::Block::Empty.
|
19
19
|
class Empty
|
20
20
|
include Collectable
|
21
|
-
def self.parse(scanner,inlines)
|
21
|
+
def self.parse(scanner,context,inlines)
|
22
22
|
if scanner.scan('') then
|
23
23
|
Empty.new
|
24
24
|
end
|
@@ -36,9 +36,10 @@ module HParser
|
|
36
36
|
class P
|
37
37
|
include Collectable
|
38
38
|
attr_reader :content
|
39
|
-
def self.parse(scanner,inlines)
|
39
|
+
def self.parse(scanner,context,inlines)
|
40
40
|
if scanner.scan(/./) then
|
41
|
-
|
41
|
+
matched = scanner.matched
|
42
|
+
P.new inlines.parse(matched, context)
|
42
43
|
end
|
43
44
|
end
|
44
45
|
|
data/lib/hparser/block/pair.rb
CHANGED
@@ -26,22 +26,27 @@ module HParser
|
|
26
26
|
def self.get(scanner,from,to)
|
27
27
|
from_q = Regexp.quote from
|
28
28
|
to_q = Regexp.quote to
|
29
|
-
if scanner.scan(/^#{from_q}
|
30
|
-
|
31
|
-
until scanner.scan(/^#{to_q}
|
32
|
-
|
29
|
+
if scanner.scan(/^#{from_q}\s*?$/)
|
30
|
+
lines = []
|
31
|
+
until scanner.scan(/^#{to_q}\s*?$/) do
|
32
|
+
matched = scanner.scan(/.*/)
|
33
|
+
if matched
|
34
|
+
lines << matched
|
35
|
+
else
|
36
|
+
break
|
37
|
+
end
|
33
38
|
end
|
34
|
-
return
|
39
|
+
return lines.join("\n")
|
35
40
|
end
|
36
41
|
end
|
37
42
|
|
38
43
|
# make parser by begin/end-ing string
|
39
44
|
def self.spliter(from,to)
|
40
45
|
module_eval <<-"END"
|
41
|
-
def self.parse(scanner,inlines)
|
46
|
+
def self.parse(scanner,context,inlines)
|
42
47
|
content = get(scanner,"#{from}","#{to}")
|
43
48
|
if content then
|
44
|
-
self.new inlines.parse(content)
|
49
|
+
self.new inlines.parse(content, context)
|
45
50
|
else
|
46
51
|
nil
|
47
52
|
end
|
data/lib/hparser/block/quote.rb
CHANGED
@@ -1,11 +1,41 @@
|
|
1
|
+
require 'strscan'
|
1
2
|
require 'hparser/block/pair'
|
2
3
|
require 'hparser/block/collectable'
|
4
|
+
require 'hparser/inline/url'
|
3
5
|
module HParser
|
4
6
|
module Block
|
5
7
|
# Quote parser.
|
6
|
-
class Quote
|
8
|
+
class Quote
|
7
9
|
include Collectable
|
8
|
-
|
10
|
+
include HParser::Inline
|
11
|
+
include HParser::Util
|
12
|
+
@@start_pattern = /^>(.*)>\s*$/
|
13
|
+
@@end_pattern = /^<<\s*$/
|
14
|
+
@@blocks = Concat.new(Or.new(*HParser::Parser.default_parser),
|
15
|
+
Skip.new(Empty))
|
16
|
+
|
17
|
+
def self.parse(scanner,context,inlines)
|
18
|
+
if scanner.scan(@@start_pattern)
|
19
|
+
url = Url.parse(StringScanner.new "[#{scanner.matched_pattern[1]}]")
|
20
|
+
|
21
|
+
items = []
|
22
|
+
until scanner.scan(@@end_pattern)
|
23
|
+
break unless scanner.match? /.*/
|
24
|
+
items << @@blocks.parse(scanner,context,inlines)[0]
|
25
|
+
end
|
26
|
+
self.new(items, url)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_reader :items, :url
|
31
|
+
def initialize(items, url = nil)
|
32
|
+
@items = items
|
33
|
+
@url = url
|
34
|
+
end
|
35
|
+
|
36
|
+
def ==(o)
|
37
|
+
o and self.class == o.class and self.items == o.items and @url == o.url
|
38
|
+
end
|
9
39
|
end
|
10
40
|
end
|
11
41
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
require 'hparser/block/collectable'
|
3
|
+
require 'hparser/block/pair'
|
4
|
+
|
5
|
+
module HParser
|
6
|
+
module Block
|
7
|
+
class RAW < Pair
|
8
|
+
include Collectable
|
9
|
+
|
10
|
+
def self.parse(scanner, context, inlines)
|
11
|
+
if scanner.scan(/^></)
|
12
|
+
content = scanner.matched
|
13
|
+
until content.match(/><$/)
|
14
|
+
str = scanner.scan(/.*/)
|
15
|
+
if str.nil? then
|
16
|
+
content << "<"
|
17
|
+
break
|
18
|
+
end
|
19
|
+
content << "\n" << str
|
20
|
+
end
|
21
|
+
self.new inlines.parse(content[1..-2], context)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def ==(o)
|
26
|
+
self.class == o.class and self.content == o.content
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.<=>(o)
|
30
|
+
-1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# SeeMore line parser.
|
9
|
+
#
|
10
|
+
# ==== or =====
|
11
|
+
class SeeMore
|
12
|
+
include Collectable
|
13
|
+
def self.parse(scanner,context,inlines)
|
14
|
+
if scanner.scan('=====')
|
15
|
+
SeeMore.new true
|
16
|
+
elsif scanner.scan('====') then
|
17
|
+
SeeMore.new false
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :is_super
|
22
|
+
def initialize(is_super)
|
23
|
+
@is_super = is_super
|
24
|
+
end
|
25
|
+
|
26
|
+
def ==(o)
|
27
|
+
o.class == self.class and o.is_super == self.is_super
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -6,13 +6,31 @@ module HParser
|
|
6
6
|
class SuperPre < Pair
|
7
7
|
include Collectable
|
8
8
|
|
9
|
-
def self.parse scanner,inlines
|
10
|
-
|
9
|
+
def self.parse scanner,context,inlines
|
10
|
+
|
11
|
+
content = format = nil
|
12
|
+
if scanner.scan(/^>\|([A-Za-z0-9]*)\|\s*?$/)
|
13
|
+
lines = []
|
14
|
+
format = scanner.matched_pattern[1]
|
15
|
+
until scanner.scan(/^\|\|<\s*?$/) do
|
16
|
+
str = scanner.scan(/.*/)
|
17
|
+
break if !str
|
18
|
+
lines << str
|
19
|
+
end
|
20
|
+
content = lines.join("\n")
|
21
|
+
end
|
22
|
+
|
11
23
|
if content then
|
12
|
-
SuperPre.new content
|
24
|
+
SuperPre.new content, format
|
13
25
|
end
|
14
26
|
end
|
15
27
|
|
28
|
+
attr_reader :format
|
29
|
+
def initialize(content, format = nil)
|
30
|
+
super content
|
31
|
+
@format = format
|
32
|
+
end
|
33
|
+
|
16
34
|
def self.<=>(o)
|
17
35
|
-1
|
18
36
|
end
|
data/lib/hparser/block/table.rb
CHANGED
@@ -9,14 +9,14 @@ module HParser
|
|
9
9
|
class Table
|
10
10
|
attr_reader :rows
|
11
11
|
include Collectable
|
12
|
-
def self.parse(scanner,inlines)
|
12
|
+
def self.parse(scanner,context,inlines)
|
13
13
|
rows = []
|
14
14
|
while scanner.scan(/\A\|/)
|
15
|
-
rows.push scanner.matched[1..-1].split('|').map{|label|
|
15
|
+
rows.push scanner.matched[1..-1].split('|').select{|l| l[0]}.map{|label|
|
16
16
|
if label[0].chr == '*' then
|
17
|
-
Th.new inlines.parse(label[1..-1].strip)
|
17
|
+
Th.new inlines.parse(label[1..-1].strip, context)
|
18
18
|
else
|
19
|
-
Td.new inlines.parse(label.strip)
|
19
|
+
Td.new inlines.parse(label.strip, context)
|
20
20
|
end
|
21
21
|
}
|
22
22
|
end
|