hparser 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +11 -0
  3. data/ChangeLog +4 -0
  4. data/Gemfile +3 -0
  5. data/README.md +40 -0
  6. data/Rakefile +12 -0
  7. data/VERSION +1 -0
  8. data/hparser.gemspec +21 -0
  9. data/lib/hparser/block/dl.rb +4 -4
  10. data/lib/hparser/block/footnote_list.rb +19 -0
  11. data/lib/hparser/block/head.rb +2 -2
  12. data/lib/hparser/block/list.rb +8 -8
  13. data/lib/hparser/block/p.rb +4 -3
  14. data/lib/hparser/block/pair.rb +12 -7
  15. data/lib/hparser/block/quote.rb +32 -2
  16. data/lib/hparser/block/raw.rb +34 -0
  17. data/lib/hparser/block/see_more.rb +31 -0
  18. data/lib/hparser/block/super_pre.rb +21 -3
  19. data/lib/hparser/block/table.rb +4 -4
  20. data/lib/hparser/hatena.rb +3 -1
  21. data/lib/hparser/html.rb +181 -13
  22. data/lib/hparser/inline/comment.rb +27 -0
  23. data/lib/hparser/inline/footnote.rb +34 -0
  24. data/lib/hparser/inline/fotolife.rb +40 -0
  25. data/lib/hparser/inline/hatena_id.rb +7 -6
  26. data/lib/hparser/inline/parser.rb +3 -2
  27. data/lib/hparser/inline/tex.rb +27 -0
  28. data/lib/hparser/inline/text.rb +3 -2
  29. data/lib/hparser/inline/url.rb +20 -6
  30. data/lib/hparser/latex.rb +273 -0
  31. data/lib/hparser/parser.rb +17 -1
  32. data/lib/hparser/text.rb +42 -0
  33. data/lib/hparser/util/line_scanner.rb +3 -2
  34. data/lib/hparser.rb +1 -0
  35. data/test/integration_texts/error1.ok.hatena +23 -0
  36. data/test/test_block.rb +65 -2
  37. data/test/test_bruteforce.rb +48 -0
  38. data/test/test_dl.rb +13 -1
  39. data/test/test_footnote.rb +42 -0
  40. data/test/test_fotolife.rb +29 -0
  41. data/test/test_from_perl/01_module.t +559 -0
  42. data/test/test_from_perl/02_module_extend.t +36 -0
  43. data/test/test_from_perl/10_autolink.t +78 -0
  44. data/test/test_from_perl/11_autolink_extend.t +43 -0
  45. data/test/test_hatena.rb +2 -2
  46. data/test/test_head.rb +7 -1
  47. data/test/test_helper.rb +11 -0
  48. data/test/test_html.rb +39 -3
  49. data/test/test_id.rb +1 -1
  50. data/test/test_inline.rb +13 -1
  51. data/test/test_inline_html.rb +37 -2
  52. data/test/test_integration.rb +20 -0
  53. data/test/test_latex.rb +101 -0
  54. data/test/test_p.rb +23 -3
  55. data/test/test_pair.rb +22 -4
  56. data/test/test_quote.rb +69 -0
  57. data/test/test_see_more.rb +28 -0
  58. data/test/test_table.rb +1 -1
  59. data/test/test_tex.rb +24 -0
  60. data/test/test_text.rb +12 -2
  61. data/test/test_url.rb +39 -2
  62. metadata +141 -58
  63. data/README +0 -17
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ *.gem
2
+ Gemfile.lock
data/.travis.yml ADDED
@@ -0,0 +1,11 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - rbx-2.0
6
+ - jruby
7
+ - ruby-head
8
+ - ree
9
+ notifications:
10
+ email:
11
+ - hotchpotch@gmail.com
data/ChangeLog ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 / 2008-06-18
2
+
3
+ * initial release
4
+
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ =Hatena Format Parser
2
+
3
+ == Description
4
+
5
+ +hparser+ is hatena format parser. This format is used at hatena diary(http://d.hatena.ne.jp/)
6
+ If you want to know more detail about hatena format, please see http://hatenadiary.g.hatena.ne.jp/keyword/%e3%81%af%e3%81%a6%e3%81%aa%e8%a8%98%e6%b3%95%e4%b8%80%e8%a6%a7
7
+
8
+ +hpaser+ is constructed with some little parser.(e.g. header parser,list parser,and ...)
9
+ So,+hparser+ can be added new format,or removed unused format.
10
+
11
+ == Installation
12
+
13
+ === Archive Installation
14
+
15
+ rake install
16
+
17
+ === Gem Installation
18
+
19
+ gem install hotchpotch-hparser
20
+
21
+
22
+ == Features/Problems
23
+
24
+
25
+ == Synopsis
26
+
27
+ To parse hatena format,please use HParser::Parser.
28
+
29
+ require 'hparser'
30
+
31
+ parser = HParser::Parser.new
32
+ puts parser.parse(hatena_syntax).map {|e| e.to_html }.join("\n")
33
+
34
+
35
+ == Copyright
36
+
37
+ Author:: HIROKI Mizuno(Original Author), Yuichi Tateno<hotchpotch@nospam@gmail.com>, Nitoyon
38
+ Copyright:: HIROKI Mizuno, Yuichi Tateno
39
+ License:: Ruby's
40
+
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/*.rb'] + FileList['test/*/*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ task :default => [:test]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.4.0
data/hparser.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "hparser"
6
+ gem.description = "Hatena Syntax parser for Ruby"
7
+ gem.homepage = "https://github.com/hotchpotch/hparser"
8
+ gem.summary = gem.description
9
+ gem.version = File.read("VERSION").strip
10
+ gem.authors = ["HIROKI Mizuno", "Yuichi Tateno", "Nitoyon"]
11
+ gem.email = ""
12
+ gem.has_rdoc = false
13
+ gem.files = `git ls-files`.split("\n")
14
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ gem.require_paths = ['lib']
17
+
18
+ gem.add_development_dependency "rake", ">= 0.9.2"
19
+ gem.add_development_dependency "pry"
20
+ end
21
+
@@ -21,12 +21,12 @@ module HParser
21
21
  end
22
22
  end
23
23
 
24
- def self.parse(scanner,inlines)
24
+ def self.parse(scanner,context,inlines)
25
25
  items = []
26
- while scanner.scan(/\A:/)
26
+ while scanner.scan(/\A:((?:<[^>]+>|\[[^\]]+\]|[^:])+):(.+)/)
27
27
  i = scanner.matched.index(':',1)
28
- title = inlines.parse scanner.matched[1...i]
29
- description = inlines.parse scanner.matched[i+1..-1]
28
+ title = inlines.parse scanner.matched_pattern[1], context
29
+ description = inlines.parse scanner.matched_pattern[2], context
30
30
  items.push Item.new(title,description)
31
31
  end
32
32
  items == [] ? nil : self.new(*items)
@@ -0,0 +1,19 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ module HParser
6
+ module Block
7
+ class FootnoteList
8
+ attr_reader :footnotes
9
+
10
+ def initialize(footnotes)
11
+ @footnotes = footnotes
12
+ end
13
+
14
+ def ==(o)
15
+ self.class == o.class and self.footnotes == o.footnotes
16
+ end
17
+ end
18
+ end
19
+ end
@@ -16,7 +16,7 @@ module HParser
16
16
  # *** level3
17
17
  class Head
18
18
  include Collectable
19
- def self.parse(scanner,inlines)
19
+ def self.parse(scanner,context,inlines)
20
20
  if scanner.scan(/\A\*/) then
21
21
  level = 0
22
22
  scanner.matched.each_byte{|c|
@@ -26,7 +26,7 @@ module HParser
26
26
  break
27
27
  end
28
28
  }
29
- Head.new level,inlines.parse(scanner.matched[level..-1].strip)
29
+ Head.new level,inlines.parse(scanner.matched[level..-1].strip, context)
30
30
  end
31
31
  end
32
32
 
@@ -10,7 +10,7 @@ module HParser
10
10
  module Block
11
11
  include HParser::Util
12
12
  def self.make_list_parser(level,mark,&proc)
13
- ProcParser.new{|scanner,inlines|
13
+ ProcParser.new{|scanner,context,inlines|
14
14
  if level == 3 then
15
15
  parser = Many1.new(Li.make_parser(level,mark))
16
16
  else
@@ -18,7 +18,7 @@ module HParser
18
18
  OrderList.make_parser(level+1),
19
19
  Li.make_parser(level,mark)))
20
20
  end
21
- list = parser.parse(scanner,inlines)
21
+ list = parser.parse(scanner,context,inlines)
22
22
 
23
23
  if list then
24
24
  proc.call list
@@ -30,8 +30,8 @@ module HParser
30
30
  # Maybe rewrite in near future.
31
31
  class UnorderList
32
32
  include Collectable
33
- def self.parse(scanner,inlines)
34
- Ul.make_parser(1).parse(scanner,inlines)
33
+ def self.parse(scanner,context,inlines)
34
+ Ul.make_parser(1).parse(scanner,context,inlines)
35
35
  end
36
36
 
37
37
  def self.make_parser(level)
@@ -52,8 +52,8 @@ module HParser
52
52
  # Maybe rewrite in near future.
53
53
  class OrderList
54
54
  include Collectable
55
- def self.parse(scanner,inlines)
56
- Ol.make_parser(1).parse(scanner,inlines)
55
+ def self.parse(scanner,context,inlines)
56
+ Ol.make_parser(1).parse(scanner,context,inlines)
57
57
  end
58
58
 
59
59
  def self.make_parser(level)
@@ -75,9 +75,9 @@ module HParser
75
75
  class ListItem
76
76
  def self.make_parser(level,mark)
77
77
  include HParser::Util
78
- ProcParser.new{|scanner,inlines|
78
+ ProcParser.new{|scanner,context,inlines|
79
79
  if scanner.scan(/\A#{Regexp.quote mark*level}.*/) then
80
- ListItem.new inlines.parse(scanner.matched[level..-1].strip)
80
+ ListItem.new inlines.parse(scanner.matched[level..-1].strip, context)
81
81
  end
82
82
  }
83
83
  end
@@ -18,7 +18,7 @@ module HParser
18
18
  # third line is parsed with HParser::Block::Empty.
19
19
  class Empty
20
20
  include Collectable
21
- def self.parse(scanner,inlines)
21
+ def self.parse(scanner,context,inlines)
22
22
  if scanner.scan('') then
23
23
  Empty.new
24
24
  end
@@ -36,9 +36,10 @@ module HParser
36
36
  class P
37
37
  include Collectable
38
38
  attr_reader :content
39
- def self.parse(scanner,inlines)
39
+ def self.parse(scanner,context,inlines)
40
40
  if scanner.scan(/./) then
41
- P.new inlines.parse(scanner.matched)
41
+ matched = scanner.matched
42
+ P.new inlines.parse(matched, context)
42
43
  end
43
44
  end
44
45
 
@@ -26,22 +26,27 @@ module HParser
26
26
  def self.get(scanner,from,to)
27
27
  from_q = Regexp.quote from
28
28
  to_q = Regexp.quote to
29
- if scanner.scan(/^#{from_q}$/)
30
- content = ''
31
- until scanner.scan(/^#{to_q}$/) do
32
- content += "\n"+ scanner.scan(/.*/)
29
+ if scanner.scan(/^#{from_q}\s*?$/)
30
+ lines = []
31
+ until scanner.scan(/^#{to_q}\s*?$/) do
32
+ matched = scanner.scan(/.*/)
33
+ if matched
34
+ lines << matched
35
+ else
36
+ break
37
+ end
33
38
  end
34
- return content.strip
39
+ return lines.join("\n")
35
40
  end
36
41
  end
37
42
 
38
43
  # make parser by begin/end-ing string
39
44
  def self.spliter(from,to)
40
45
  module_eval <<-"END"
41
- def self.parse(scanner,inlines)
46
+ def self.parse(scanner,context,inlines)
42
47
  content = get(scanner,"#{from}","#{to}")
43
48
  if content then
44
- self.new inlines.parse(content)
49
+ self.new inlines.parse(content, context)
45
50
  else
46
51
  nil
47
52
  end
@@ -1,11 +1,41 @@
1
+ require 'strscan'
1
2
  require 'hparser/block/pair'
2
3
  require 'hparser/block/collectable'
4
+ require 'hparser/inline/url'
3
5
  module HParser
4
6
  module Block
5
7
  # Quote parser.
6
- class Quote < Pair
8
+ class Quote
7
9
  include Collectable
8
- spliter '>>','<<'
10
+ include HParser::Inline
11
+ include HParser::Util
12
+ @@start_pattern = /^>(.*)>\s*$/
13
+ @@end_pattern = /^<<\s*$/
14
+ @@blocks = Concat.new(Or.new(*HParser::Parser.default_parser),
15
+ Skip.new(Empty))
16
+
17
+ def self.parse(scanner,context,inlines)
18
+ if scanner.scan(@@start_pattern)
19
+ url = Url.parse(StringScanner.new "[#{scanner.matched_pattern[1]}]")
20
+
21
+ items = []
22
+ until scanner.scan(@@end_pattern)
23
+ break unless scanner.match? /.*/
24
+ items << @@blocks.parse(scanner,context,inlines)[0]
25
+ end
26
+ self.new(items, url)
27
+ end
28
+ end
29
+
30
+ attr_reader :items, :url
31
+ def initialize(items, url = nil)
32
+ @items = items
33
+ @url = url
34
+ end
35
+
36
+ def ==(o)
37
+ o and self.class == o.class and self.items == o.items and @url == o.url
38
+ end
9
39
  end
10
40
  end
11
41
  end
@@ -0,0 +1,34 @@
1
+
2
+ require 'hparser/block/collectable'
3
+ require 'hparser/block/pair'
4
+
5
+ module HParser
6
+ module Block
7
+ class RAW < Pair
8
+ include Collectable
9
+
10
+ def self.parse(scanner, context, inlines)
11
+ if scanner.scan(/^></)
12
+ content = scanner.matched
13
+ until content.match(/><$/)
14
+ str = scanner.scan(/.*/)
15
+ if str.nil? then
16
+ content << "<"
17
+ break
18
+ end
19
+ content << "\n" << str
20
+ end
21
+ self.new inlines.parse(content[1..-2], context)
22
+ end
23
+ end
24
+
25
+ def ==(o)
26
+ self.class == o.class and self.content == o.content
27
+ end
28
+
29
+ def self.<=>(o)
30
+ -1
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,31 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ module HParser
7
+ module Block
8
+ # SeeMore line parser.
9
+ #
10
+ # ==== or =====
11
+ class SeeMore
12
+ include Collectable
13
+ def self.parse(scanner,context,inlines)
14
+ if scanner.scan('=====')
15
+ SeeMore.new true
16
+ elsif scanner.scan('====') then
17
+ SeeMore.new false
18
+ end
19
+ end
20
+
21
+ attr_reader :is_super
22
+ def initialize(is_super)
23
+ @is_super = is_super
24
+ end
25
+
26
+ def ==(o)
27
+ o.class == self.class and o.is_super == self.is_super
28
+ end
29
+ end
30
+ end
31
+ end
@@ -6,13 +6,31 @@ module HParser
6
6
  class SuperPre < Pair
7
7
  include Collectable
8
8
 
9
- def self.parse scanner,inlines
10
- content = get scanner,'>||','||<'
9
+ def self.parse scanner,context,inlines
10
+
11
+ content = format = nil
12
+ if scanner.scan(/^>\|([A-Za-z0-9]*)\|\s*?$/)
13
+ lines = []
14
+ format = scanner.matched_pattern[1]
15
+ until scanner.scan(/^\|\|<\s*?$/) do
16
+ str = scanner.scan(/.*/)
17
+ break if !str
18
+ lines << str
19
+ end
20
+ content = lines.join("\n")
21
+ end
22
+
11
23
  if content then
12
- SuperPre.new content
24
+ SuperPre.new content, format
13
25
  end
14
26
  end
15
27
 
28
+ attr_reader :format
29
+ def initialize(content, format = nil)
30
+ super content
31
+ @format = format
32
+ end
33
+
16
34
  def self.<=>(o)
17
35
  -1
18
36
  end
@@ -9,14 +9,14 @@ module HParser
9
9
  class Table
10
10
  attr_reader :rows
11
11
  include Collectable
12
- def self.parse(scanner,inlines)
12
+ def self.parse(scanner,context,inlines)
13
13
  rows = []
14
14
  while scanner.scan(/\A\|/)
15
- rows.push scanner.matched[1..-1].split('|').map{|label|
15
+ rows.push scanner.matched[1..-1].split('|').select{|l| l[0]}.map{|label|
16
16
  if label[0].chr == '*' then
17
- Th.new inlines.parse(label[1..-1].strip)
17
+ Th.new inlines.parse(label[1..-1].strip, context)
18
18
  else
19
- Td.new inlines.parse(label.strip)
19
+ Td.new inlines.parse(label.strip, context)
20
20
  end
21
21
  }
22
22
  end
@@ -51,7 +51,9 @@ module HParser
51
51
 
52
52
  class Quote
53
53
  include Hatena
54
- alias_method :hatena_content,:content
54
+ def hatena_content
55
+ @items
56
+ end
55
57
  def hatena_filter c
56
58
  ">>\n"+c+"\n<<"
57
59
  end