hparser 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/.travis.yml +11 -0
- data/ChangeLog +4 -0
- data/Gemfile +3 -0
- data/README.md +40 -0
- data/Rakefile +12 -0
- data/VERSION +1 -0
- data/hparser.gemspec +21 -0
- data/lib/hparser/block/dl.rb +4 -4
- data/lib/hparser/block/footnote_list.rb +19 -0
- data/lib/hparser/block/head.rb +2 -2
- data/lib/hparser/block/list.rb +8 -8
- data/lib/hparser/block/p.rb +4 -3
- data/lib/hparser/block/pair.rb +12 -7
- data/lib/hparser/block/quote.rb +32 -2
- data/lib/hparser/block/raw.rb +34 -0
- data/lib/hparser/block/see_more.rb +31 -0
- data/lib/hparser/block/super_pre.rb +21 -3
- data/lib/hparser/block/table.rb +4 -4
- data/lib/hparser/hatena.rb +3 -1
- data/lib/hparser/html.rb +181 -13
- data/lib/hparser/inline/comment.rb +27 -0
- data/lib/hparser/inline/footnote.rb +34 -0
- data/lib/hparser/inline/fotolife.rb +40 -0
- data/lib/hparser/inline/hatena_id.rb +7 -6
- data/lib/hparser/inline/parser.rb +3 -2
- data/lib/hparser/inline/tex.rb +27 -0
- data/lib/hparser/inline/text.rb +3 -2
- data/lib/hparser/inline/url.rb +20 -6
- data/lib/hparser/latex.rb +273 -0
- data/lib/hparser/parser.rb +17 -1
- data/lib/hparser/text.rb +42 -0
- data/lib/hparser/util/line_scanner.rb +3 -2
- data/lib/hparser.rb +1 -0
- data/test/integration_texts/error1.ok.hatena +23 -0
- data/test/test_block.rb +65 -2
- data/test/test_bruteforce.rb +48 -0
- data/test/test_dl.rb +13 -1
- data/test/test_footnote.rb +42 -0
- data/test/test_fotolife.rb +29 -0
- data/test/test_from_perl/01_module.t +559 -0
- data/test/test_from_perl/02_module_extend.t +36 -0
- data/test/test_from_perl/10_autolink.t +78 -0
- data/test/test_from_perl/11_autolink_extend.t +43 -0
- data/test/test_hatena.rb +2 -2
- data/test/test_head.rb +7 -1
- data/test/test_helper.rb +11 -0
- data/test/test_html.rb +39 -3
- data/test/test_id.rb +1 -1
- data/test/test_inline.rb +13 -1
- data/test/test_inline_html.rb +37 -2
- data/test/test_integration.rb +20 -0
- data/test/test_latex.rb +101 -0
- data/test/test_p.rb +23 -3
- data/test/test_pair.rb +22 -4
- data/test/test_quote.rb +69 -0
- data/test/test_see_more.rb +28 -0
- data/test/test_table.rb +1 -1
- data/test/test_tex.rb +24 -0
- data/test/test_text.rb +12 -2
- data/test/test_url.rb +39 -2
- metadata +141 -58
- data/README +0 -17
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/ChangeLog
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
=Hatena Format Parser
|
2
|
+
|
3
|
+
== Description
|
4
|
+
|
5
|
+
+hparser+ is hatena format parser. This format is used at hatena diary(http://d.hatena.ne.jp/)
|
6
|
+
If you want to know more detail about hatena format, please see http://hatenadiary.g.hatena.ne.jp/keyword/%e3%81%af%e3%81%a6%e3%81%aa%e8%a8%98%e6%b3%95%e4%b8%80%e8%a6%a7
|
7
|
+
|
8
|
+
+hpaser+ is constructed with some little parser.(e.g. header parser,list parser,and ...)
|
9
|
+
So,+hparser+ can be added new format,or removed unused format.
|
10
|
+
|
11
|
+
== Installation
|
12
|
+
|
13
|
+
=== Archive Installation
|
14
|
+
|
15
|
+
rake install
|
16
|
+
|
17
|
+
=== Gem Installation
|
18
|
+
|
19
|
+
gem install hotchpotch-hparser
|
20
|
+
|
21
|
+
|
22
|
+
== Features/Problems
|
23
|
+
|
24
|
+
|
25
|
+
== Synopsis
|
26
|
+
|
27
|
+
To parse hatena format,please use HParser::Parser.
|
28
|
+
|
29
|
+
require 'hparser'
|
30
|
+
|
31
|
+
parser = HParser::Parser.new
|
32
|
+
puts parser.parse(hatena_syntax).map {|e| e.to_html }.join("\n")
|
33
|
+
|
34
|
+
|
35
|
+
== Copyright
|
36
|
+
|
37
|
+
Author:: HIROKI Mizuno(Original Author), Yuichi Tateno<hotchpotch@nospam@gmail.com>, Nitoyon
|
38
|
+
Copyright:: HIROKI Mizuno, Yuichi Tateno
|
39
|
+
License:: Ruby's
|
40
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rake/testtask'
|
5
|
+
|
6
|
+
Rake::TestTask.new(:test) do |test|
|
7
|
+
test.libs << 'lib' << 'test'
|
8
|
+
test.test_files = FileList['test/*.rb'] + FileList['test/*/*.rb']
|
9
|
+
test.verbose = true
|
10
|
+
end
|
11
|
+
|
12
|
+
task :default => [:test]
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.4.0
|
data/hparser.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.name = "hparser"
|
6
|
+
gem.description = "Hatena Syntax parser for Ruby"
|
7
|
+
gem.homepage = "https://github.com/hotchpotch/hparser"
|
8
|
+
gem.summary = gem.description
|
9
|
+
gem.version = File.read("VERSION").strip
|
10
|
+
gem.authors = ["HIROKI Mizuno", "Yuichi Tateno", "Nitoyon"]
|
11
|
+
gem.email = ""
|
12
|
+
gem.has_rdoc = false
|
13
|
+
gem.files = `git ls-files`.split("\n")
|
14
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
gem.require_paths = ['lib']
|
17
|
+
|
18
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
19
|
+
gem.add_development_dependency "pry"
|
20
|
+
end
|
21
|
+
|
data/lib/hparser/block/dl.rb
CHANGED
@@ -21,12 +21,12 @@ module HParser
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
def self.parse(scanner,inlines)
|
24
|
+
def self.parse(scanner,context,inlines)
|
25
25
|
items = []
|
26
|
-
while scanner.scan(/\A
|
26
|
+
while scanner.scan(/\A:((?:<[^>]+>|\[[^\]]+\]|[^:])+):(.+)/)
|
27
27
|
i = scanner.matched.index(':',1)
|
28
|
-
title = inlines.parse scanner.
|
29
|
-
description = inlines.parse scanner.
|
28
|
+
title = inlines.parse scanner.matched_pattern[1], context
|
29
|
+
description = inlines.parse scanner.matched_pattern[2], context
|
30
30
|
items.push Item.new(title,description)
|
31
31
|
end
|
32
32
|
items == [] ? nil : self.new(*items)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
module HParser
|
6
|
+
module Block
|
7
|
+
class FootnoteList
|
8
|
+
attr_reader :footnotes
|
9
|
+
|
10
|
+
def initialize(footnotes)
|
11
|
+
@footnotes = footnotes
|
12
|
+
end
|
13
|
+
|
14
|
+
def ==(o)
|
15
|
+
self.class == o.class and self.footnotes == o.footnotes
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/hparser/block/head.rb
CHANGED
@@ -16,7 +16,7 @@ module HParser
|
|
16
16
|
# *** level3
|
17
17
|
class Head
|
18
18
|
include Collectable
|
19
|
-
def self.parse(scanner,inlines)
|
19
|
+
def self.parse(scanner,context,inlines)
|
20
20
|
if scanner.scan(/\A\*/) then
|
21
21
|
level = 0
|
22
22
|
scanner.matched.each_byte{|c|
|
@@ -26,7 +26,7 @@ module HParser
|
|
26
26
|
break
|
27
27
|
end
|
28
28
|
}
|
29
|
-
Head.new level,inlines.parse(scanner.matched[level..-1].strip)
|
29
|
+
Head.new level,inlines.parse(scanner.matched[level..-1].strip, context)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
data/lib/hparser/block/list.rb
CHANGED
@@ -10,7 +10,7 @@ module HParser
|
|
10
10
|
module Block
|
11
11
|
include HParser::Util
|
12
12
|
def self.make_list_parser(level,mark,&proc)
|
13
|
-
ProcParser.new{|scanner,inlines|
|
13
|
+
ProcParser.new{|scanner,context,inlines|
|
14
14
|
if level == 3 then
|
15
15
|
parser = Many1.new(Li.make_parser(level,mark))
|
16
16
|
else
|
@@ -18,7 +18,7 @@ module HParser
|
|
18
18
|
OrderList.make_parser(level+1),
|
19
19
|
Li.make_parser(level,mark)))
|
20
20
|
end
|
21
|
-
list = parser.parse(scanner,inlines)
|
21
|
+
list = parser.parse(scanner,context,inlines)
|
22
22
|
|
23
23
|
if list then
|
24
24
|
proc.call list
|
@@ -30,8 +30,8 @@ module HParser
|
|
30
30
|
# Maybe rewrite in near future.
|
31
31
|
class UnorderList
|
32
32
|
include Collectable
|
33
|
-
def self.parse(scanner,inlines)
|
34
|
-
Ul.make_parser(1).parse(scanner,inlines)
|
33
|
+
def self.parse(scanner,context,inlines)
|
34
|
+
Ul.make_parser(1).parse(scanner,context,inlines)
|
35
35
|
end
|
36
36
|
|
37
37
|
def self.make_parser(level)
|
@@ -52,8 +52,8 @@ module HParser
|
|
52
52
|
# Maybe rewrite in near future.
|
53
53
|
class OrderList
|
54
54
|
include Collectable
|
55
|
-
def self.parse(scanner,inlines)
|
56
|
-
Ol.make_parser(1).parse(scanner,inlines)
|
55
|
+
def self.parse(scanner,context,inlines)
|
56
|
+
Ol.make_parser(1).parse(scanner,context,inlines)
|
57
57
|
end
|
58
58
|
|
59
59
|
def self.make_parser(level)
|
@@ -75,9 +75,9 @@ module HParser
|
|
75
75
|
class ListItem
|
76
76
|
def self.make_parser(level,mark)
|
77
77
|
include HParser::Util
|
78
|
-
ProcParser.new{|scanner,inlines|
|
78
|
+
ProcParser.new{|scanner,context,inlines|
|
79
79
|
if scanner.scan(/\A#{Regexp.quote mark*level}.*/) then
|
80
|
-
ListItem.new inlines.parse(scanner.matched[level..-1].strip)
|
80
|
+
ListItem.new inlines.parse(scanner.matched[level..-1].strip, context)
|
81
81
|
end
|
82
82
|
}
|
83
83
|
end
|
data/lib/hparser/block/p.rb
CHANGED
@@ -18,7 +18,7 @@ module HParser
|
|
18
18
|
# third line is parsed with HParser::Block::Empty.
|
19
19
|
class Empty
|
20
20
|
include Collectable
|
21
|
-
def self.parse(scanner,inlines)
|
21
|
+
def self.parse(scanner,context,inlines)
|
22
22
|
if scanner.scan('') then
|
23
23
|
Empty.new
|
24
24
|
end
|
@@ -36,9 +36,10 @@ module HParser
|
|
36
36
|
class P
|
37
37
|
include Collectable
|
38
38
|
attr_reader :content
|
39
|
-
def self.parse(scanner,inlines)
|
39
|
+
def self.parse(scanner,context,inlines)
|
40
40
|
if scanner.scan(/./) then
|
41
|
-
|
41
|
+
matched = scanner.matched
|
42
|
+
P.new inlines.parse(matched, context)
|
42
43
|
end
|
43
44
|
end
|
44
45
|
|
data/lib/hparser/block/pair.rb
CHANGED
@@ -26,22 +26,27 @@ module HParser
|
|
26
26
|
def self.get(scanner,from,to)
|
27
27
|
from_q = Regexp.quote from
|
28
28
|
to_q = Regexp.quote to
|
29
|
-
if scanner.scan(/^#{from_q}
|
30
|
-
|
31
|
-
until scanner.scan(/^#{to_q}
|
32
|
-
|
29
|
+
if scanner.scan(/^#{from_q}\s*?$/)
|
30
|
+
lines = []
|
31
|
+
until scanner.scan(/^#{to_q}\s*?$/) do
|
32
|
+
matched = scanner.scan(/.*/)
|
33
|
+
if matched
|
34
|
+
lines << matched
|
35
|
+
else
|
36
|
+
break
|
37
|
+
end
|
33
38
|
end
|
34
|
-
return
|
39
|
+
return lines.join("\n")
|
35
40
|
end
|
36
41
|
end
|
37
42
|
|
38
43
|
# make parser by begin/end-ing string
|
39
44
|
def self.spliter(from,to)
|
40
45
|
module_eval <<-"END"
|
41
|
-
def self.parse(scanner,inlines)
|
46
|
+
def self.parse(scanner,context,inlines)
|
42
47
|
content = get(scanner,"#{from}","#{to}")
|
43
48
|
if content then
|
44
|
-
self.new inlines.parse(content)
|
49
|
+
self.new inlines.parse(content, context)
|
45
50
|
else
|
46
51
|
nil
|
47
52
|
end
|
data/lib/hparser/block/quote.rb
CHANGED
@@ -1,11 +1,41 @@
|
|
1
|
+
require 'strscan'
|
1
2
|
require 'hparser/block/pair'
|
2
3
|
require 'hparser/block/collectable'
|
4
|
+
require 'hparser/inline/url'
|
3
5
|
module HParser
|
4
6
|
module Block
|
5
7
|
# Quote parser.
|
6
|
-
class Quote
|
8
|
+
class Quote
|
7
9
|
include Collectable
|
8
|
-
|
10
|
+
include HParser::Inline
|
11
|
+
include HParser::Util
|
12
|
+
@@start_pattern = /^>(.*)>\s*$/
|
13
|
+
@@end_pattern = /^<<\s*$/
|
14
|
+
@@blocks = Concat.new(Or.new(*HParser::Parser.default_parser),
|
15
|
+
Skip.new(Empty))
|
16
|
+
|
17
|
+
def self.parse(scanner,context,inlines)
|
18
|
+
if scanner.scan(@@start_pattern)
|
19
|
+
url = Url.parse(StringScanner.new "[#{scanner.matched_pattern[1]}]")
|
20
|
+
|
21
|
+
items = []
|
22
|
+
until scanner.scan(@@end_pattern)
|
23
|
+
break unless scanner.match? /.*/
|
24
|
+
items << @@blocks.parse(scanner,context,inlines)[0]
|
25
|
+
end
|
26
|
+
self.new(items, url)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_reader :items, :url
|
31
|
+
def initialize(items, url = nil)
|
32
|
+
@items = items
|
33
|
+
@url = url
|
34
|
+
end
|
35
|
+
|
36
|
+
def ==(o)
|
37
|
+
o and self.class == o.class and self.items == o.items and @url == o.url
|
38
|
+
end
|
9
39
|
end
|
10
40
|
end
|
11
41
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
require 'hparser/block/collectable'
|
3
|
+
require 'hparser/block/pair'
|
4
|
+
|
5
|
+
module HParser
|
6
|
+
module Block
|
7
|
+
class RAW < Pair
|
8
|
+
include Collectable
|
9
|
+
|
10
|
+
def self.parse(scanner, context, inlines)
|
11
|
+
if scanner.scan(/^></)
|
12
|
+
content = scanner.matched
|
13
|
+
until content.match(/><$/)
|
14
|
+
str = scanner.scan(/.*/)
|
15
|
+
if str.nil? then
|
16
|
+
content << "<"
|
17
|
+
break
|
18
|
+
end
|
19
|
+
content << "\n" << str
|
20
|
+
end
|
21
|
+
self.new inlines.parse(content[1..-2], context)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def ==(o)
|
26
|
+
self.class == o.class and self.content == o.content
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.<=>(o)
|
30
|
+
-1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/block/collectable'
|
6
|
+
module HParser
|
7
|
+
module Block
|
8
|
+
# SeeMore line parser.
|
9
|
+
#
|
10
|
+
# ==== or =====
|
11
|
+
class SeeMore
|
12
|
+
include Collectable
|
13
|
+
def self.parse(scanner,context,inlines)
|
14
|
+
if scanner.scan('=====')
|
15
|
+
SeeMore.new true
|
16
|
+
elsif scanner.scan('====') then
|
17
|
+
SeeMore.new false
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :is_super
|
22
|
+
def initialize(is_super)
|
23
|
+
@is_super = is_super
|
24
|
+
end
|
25
|
+
|
26
|
+
def ==(o)
|
27
|
+
o.class == self.class and o.is_super == self.is_super
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -6,13 +6,31 @@ module HParser
|
|
6
6
|
class SuperPre < Pair
|
7
7
|
include Collectable
|
8
8
|
|
9
|
-
def self.parse scanner,inlines
|
10
|
-
|
9
|
+
def self.parse scanner,context,inlines
|
10
|
+
|
11
|
+
content = format = nil
|
12
|
+
if scanner.scan(/^>\|([A-Za-z0-9]*)\|\s*?$/)
|
13
|
+
lines = []
|
14
|
+
format = scanner.matched_pattern[1]
|
15
|
+
until scanner.scan(/^\|\|<\s*?$/) do
|
16
|
+
str = scanner.scan(/.*/)
|
17
|
+
break if !str
|
18
|
+
lines << str
|
19
|
+
end
|
20
|
+
content = lines.join("\n")
|
21
|
+
end
|
22
|
+
|
11
23
|
if content then
|
12
|
-
SuperPre.new content
|
24
|
+
SuperPre.new content, format
|
13
25
|
end
|
14
26
|
end
|
15
27
|
|
28
|
+
attr_reader :format
|
29
|
+
def initialize(content, format = nil)
|
30
|
+
super content
|
31
|
+
@format = format
|
32
|
+
end
|
33
|
+
|
16
34
|
def self.<=>(o)
|
17
35
|
-1
|
18
36
|
end
|
data/lib/hparser/block/table.rb
CHANGED
@@ -9,14 +9,14 @@ module HParser
|
|
9
9
|
class Table
|
10
10
|
attr_reader :rows
|
11
11
|
include Collectable
|
12
|
-
def self.parse(scanner,inlines)
|
12
|
+
def self.parse(scanner,context,inlines)
|
13
13
|
rows = []
|
14
14
|
while scanner.scan(/\A\|/)
|
15
|
-
rows.push scanner.matched[1..-1].split('|').map{|label|
|
15
|
+
rows.push scanner.matched[1..-1].split('|').select{|l| l[0]}.map{|label|
|
16
16
|
if label[0].chr == '*' then
|
17
|
-
Th.new inlines.parse(label[1..-1].strip)
|
17
|
+
Th.new inlines.parse(label[1..-1].strip, context)
|
18
18
|
else
|
19
|
-
Td.new inlines.parse(label.strip)
|
19
|
+
Td.new inlines.parse(label.strip, context)
|
20
20
|
end
|
21
21
|
}
|
22
22
|
end
|