hparser 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +11 -0
  3. data/ChangeLog +4 -0
  4. data/Gemfile +3 -0
  5. data/README.md +40 -0
  6. data/Rakefile +12 -0
  7. data/VERSION +1 -0
  8. data/hparser.gemspec +21 -0
  9. data/lib/hparser/block/dl.rb +4 -4
  10. data/lib/hparser/block/footnote_list.rb +19 -0
  11. data/lib/hparser/block/head.rb +2 -2
  12. data/lib/hparser/block/list.rb +8 -8
  13. data/lib/hparser/block/p.rb +4 -3
  14. data/lib/hparser/block/pair.rb +12 -7
  15. data/lib/hparser/block/quote.rb +32 -2
  16. data/lib/hparser/block/raw.rb +34 -0
  17. data/lib/hparser/block/see_more.rb +31 -0
  18. data/lib/hparser/block/super_pre.rb +21 -3
  19. data/lib/hparser/block/table.rb +4 -4
  20. data/lib/hparser/hatena.rb +3 -1
  21. data/lib/hparser/html.rb +181 -13
  22. data/lib/hparser/inline/comment.rb +27 -0
  23. data/lib/hparser/inline/footnote.rb +34 -0
  24. data/lib/hparser/inline/fotolife.rb +40 -0
  25. data/lib/hparser/inline/hatena_id.rb +7 -6
  26. data/lib/hparser/inline/parser.rb +3 -2
  27. data/lib/hparser/inline/tex.rb +27 -0
  28. data/lib/hparser/inline/text.rb +3 -2
  29. data/lib/hparser/inline/url.rb +20 -6
  30. data/lib/hparser/latex.rb +273 -0
  31. data/lib/hparser/parser.rb +17 -1
  32. data/lib/hparser/text.rb +42 -0
  33. data/lib/hparser/util/line_scanner.rb +3 -2
  34. data/lib/hparser.rb +1 -0
  35. data/test/integration_texts/error1.ok.hatena +23 -0
  36. data/test/test_block.rb +65 -2
  37. data/test/test_bruteforce.rb +48 -0
  38. data/test/test_dl.rb +13 -1
  39. data/test/test_footnote.rb +42 -0
  40. data/test/test_fotolife.rb +29 -0
  41. data/test/test_from_perl/01_module.t +559 -0
  42. data/test/test_from_perl/02_module_extend.t +36 -0
  43. data/test/test_from_perl/10_autolink.t +78 -0
  44. data/test/test_from_perl/11_autolink_extend.t +43 -0
  45. data/test/test_hatena.rb +2 -2
  46. data/test/test_head.rb +7 -1
  47. data/test/test_helper.rb +11 -0
  48. data/test/test_html.rb +39 -3
  49. data/test/test_id.rb +1 -1
  50. data/test/test_inline.rb +13 -1
  51. data/test/test_inline_html.rb +37 -2
  52. data/test/test_integration.rb +20 -0
  53. data/test/test_latex.rb +101 -0
  54. data/test/test_p.rb +23 -3
  55. data/test/test_pair.rb +22 -4
  56. data/test/test_quote.rb +69 -0
  57. data/test/test_see_more.rb +28 -0
  58. data/test/test_table.rb +1 -1
  59. data/test/test_tex.rb +24 -0
  60. data/test/test_text.rb +12 -2
  61. data/test/test_url.rb +39 -2
  62. metadata +141 -58
  63. data/README +0 -17
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ *.gem
2
+ Gemfile.lock
data/.travis.yml ADDED
@@ -0,0 +1,11 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - rbx-2.0
6
+ - jruby
7
+ - ruby-head
8
+ - ree
9
+ notifications:
10
+ email:
11
+ - hotchpotch@gmail.com
data/ChangeLog ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 / 2008-06-18
2
+
3
+ * initial release
4
+
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ =Hatena Format Parser
2
+
3
+ == Description
4
+
5
+ +hparser+ is hatena format parser. This format is used at hatena diary(http://d.hatena.ne.jp/)
6
+ If you want to know more detail about hatena format, please see http://hatenadiary.g.hatena.ne.jp/keyword/%e3%81%af%e3%81%a6%e3%81%aa%e8%a8%98%e6%b3%95%e4%b8%80%e8%a6%a7
7
+
8
+ +hpaser+ is constructed with some little parser.(e.g. header parser,list parser,and ...)
9
+ So,+hparser+ can be added new format,or removed unused format.
10
+
11
+ == Installation
12
+
13
+ === Archive Installation
14
+
15
+ rake install
16
+
17
+ === Gem Installation
18
+
19
+ gem install hotchpotch-hparser
20
+
21
+
22
+ == Features/Problems
23
+
24
+
25
+ == Synopsis
26
+
27
+ To parse hatena format,please use HParser::Parser.
28
+
29
+ require 'hparser'
30
+
31
+ parser = HParser::Parser.new
32
+ puts parser.parse(hatena_syntax).map {|e| e.to_html }.join("\n")
33
+
34
+
35
+ == Copyright
36
+
37
+ Author:: HIROKI Mizuno(Original Author), Yuichi Tateno<hotchpotch@nospam@gmail.com>, Nitoyon
38
+ Copyright:: HIROKI Mizuno, Yuichi Tateno
39
+ License:: Ruby's
40
+
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/*.rb'] + FileList['test/*/*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ task :default => [:test]
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.4.0
data/hparser.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ $:.push File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "hparser"
6
+ gem.description = "Hatena Syntax parser for Ruby"
7
+ gem.homepage = "https://github.com/hotchpotch/hparser"
8
+ gem.summary = gem.description
9
+ gem.version = File.read("VERSION").strip
10
+ gem.authors = ["HIROKI Mizuno", "Yuichi Tateno", "Nitoyon"]
11
+ gem.email = ""
12
+ gem.has_rdoc = false
13
+ gem.files = `git ls-files`.split("\n")
14
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
15
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ gem.require_paths = ['lib']
17
+
18
+ gem.add_development_dependency "rake", ">= 0.9.2"
19
+ gem.add_development_dependency "pry"
20
+ end
21
+
@@ -21,12 +21,12 @@ module HParser
21
21
  end
22
22
  end
23
23
 
24
- def self.parse(scanner,inlines)
24
+ def self.parse(scanner,context,inlines)
25
25
  items = []
26
- while scanner.scan(/\A:/)
26
+ while scanner.scan(/\A:((?:<[^>]+>|\[[^\]]+\]|[^:])+):(.+)/)
27
27
  i = scanner.matched.index(':',1)
28
- title = inlines.parse scanner.matched[1...i]
29
- description = inlines.parse scanner.matched[i+1..-1]
28
+ title = inlines.parse scanner.matched_pattern[1], context
29
+ description = inlines.parse scanner.matched_pattern[2], context
30
30
  items.push Item.new(title,description)
31
31
  end
32
32
  items == [] ? nil : self.new(*items)
@@ -0,0 +1,19 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ module HParser
6
+ module Block
7
+ class FootnoteList
8
+ attr_reader :footnotes
9
+
10
+ def initialize(footnotes)
11
+ @footnotes = footnotes
12
+ end
13
+
14
+ def ==(o)
15
+ self.class == o.class and self.footnotes == o.footnotes
16
+ end
17
+ end
18
+ end
19
+ end
@@ -16,7 +16,7 @@ module HParser
16
16
  # *** level3
17
17
  class Head
18
18
  include Collectable
19
- def self.parse(scanner,inlines)
19
+ def self.parse(scanner,context,inlines)
20
20
  if scanner.scan(/\A\*/) then
21
21
  level = 0
22
22
  scanner.matched.each_byte{|c|
@@ -26,7 +26,7 @@ module HParser
26
26
  break
27
27
  end
28
28
  }
29
- Head.new level,inlines.parse(scanner.matched[level..-1].strip)
29
+ Head.new level,inlines.parse(scanner.matched[level..-1].strip, context)
30
30
  end
31
31
  end
32
32
 
@@ -10,7 +10,7 @@ module HParser
10
10
  module Block
11
11
  include HParser::Util
12
12
  def self.make_list_parser(level,mark,&proc)
13
- ProcParser.new{|scanner,inlines|
13
+ ProcParser.new{|scanner,context,inlines|
14
14
  if level == 3 then
15
15
  parser = Many1.new(Li.make_parser(level,mark))
16
16
  else
@@ -18,7 +18,7 @@ module HParser
18
18
  OrderList.make_parser(level+1),
19
19
  Li.make_parser(level,mark)))
20
20
  end
21
- list = parser.parse(scanner,inlines)
21
+ list = parser.parse(scanner,context,inlines)
22
22
 
23
23
  if list then
24
24
  proc.call list
@@ -30,8 +30,8 @@ module HParser
30
30
  # Maybe rewrite in near future.
31
31
  class UnorderList
32
32
  include Collectable
33
- def self.parse(scanner,inlines)
34
- Ul.make_parser(1).parse(scanner,inlines)
33
+ def self.parse(scanner,context,inlines)
34
+ Ul.make_parser(1).parse(scanner,context,inlines)
35
35
  end
36
36
 
37
37
  def self.make_parser(level)
@@ -52,8 +52,8 @@ module HParser
52
52
  # Maybe rewrite in near future.
53
53
  class OrderList
54
54
  include Collectable
55
- def self.parse(scanner,inlines)
56
- Ol.make_parser(1).parse(scanner,inlines)
55
+ def self.parse(scanner,context,inlines)
56
+ Ol.make_parser(1).parse(scanner,context,inlines)
57
57
  end
58
58
 
59
59
  def self.make_parser(level)
@@ -75,9 +75,9 @@ module HParser
75
75
  class ListItem
76
76
  def self.make_parser(level,mark)
77
77
  include HParser::Util
78
- ProcParser.new{|scanner,inlines|
78
+ ProcParser.new{|scanner,context,inlines|
79
79
  if scanner.scan(/\A#{Regexp.quote mark*level}.*/) then
80
- ListItem.new inlines.parse(scanner.matched[level..-1].strip)
80
+ ListItem.new inlines.parse(scanner.matched[level..-1].strip, context)
81
81
  end
82
82
  }
83
83
  end
@@ -18,7 +18,7 @@ module HParser
18
18
  # third line is parsed with HParser::Block::Empty.
19
19
  class Empty
20
20
  include Collectable
21
- def self.parse(scanner,inlines)
21
+ def self.parse(scanner,context,inlines)
22
22
  if scanner.scan('') then
23
23
  Empty.new
24
24
  end
@@ -36,9 +36,10 @@ module HParser
36
36
  class P
37
37
  include Collectable
38
38
  attr_reader :content
39
- def self.parse(scanner,inlines)
39
+ def self.parse(scanner,context,inlines)
40
40
  if scanner.scan(/./) then
41
- P.new inlines.parse(scanner.matched)
41
+ matched = scanner.matched
42
+ P.new inlines.parse(matched, context)
42
43
  end
43
44
  end
44
45
 
@@ -26,22 +26,27 @@ module HParser
26
26
  def self.get(scanner,from,to)
27
27
  from_q = Regexp.quote from
28
28
  to_q = Regexp.quote to
29
- if scanner.scan(/^#{from_q}$/)
30
- content = ''
31
- until scanner.scan(/^#{to_q}$/) do
32
- content += "\n"+ scanner.scan(/.*/)
29
+ if scanner.scan(/^#{from_q}\s*?$/)
30
+ lines = []
31
+ until scanner.scan(/^#{to_q}\s*?$/) do
32
+ matched = scanner.scan(/.*/)
33
+ if matched
34
+ lines << matched
35
+ else
36
+ break
37
+ end
33
38
  end
34
- return content.strip
39
+ return lines.join("\n")
35
40
  end
36
41
  end
37
42
 
38
43
  # make parser by begin/end-ing string
39
44
  def self.spliter(from,to)
40
45
  module_eval <<-"END"
41
- def self.parse(scanner,inlines)
46
+ def self.parse(scanner,context,inlines)
42
47
  content = get(scanner,"#{from}","#{to}")
43
48
  if content then
44
- self.new inlines.parse(content)
49
+ self.new inlines.parse(content, context)
45
50
  else
46
51
  nil
47
52
  end
@@ -1,11 +1,41 @@
1
+ require 'strscan'
1
2
  require 'hparser/block/pair'
2
3
  require 'hparser/block/collectable'
4
+ require 'hparser/inline/url'
3
5
  module HParser
4
6
  module Block
5
7
  # Quote parser.
6
- class Quote < Pair
8
+ class Quote
7
9
  include Collectable
8
- spliter '>>','<<'
10
+ include HParser::Inline
11
+ include HParser::Util
12
+ @@start_pattern = /^>(.*)>\s*$/
13
+ @@end_pattern = /^<<\s*$/
14
+ @@blocks = Concat.new(Or.new(*HParser::Parser.default_parser),
15
+ Skip.new(Empty))
16
+
17
+ def self.parse(scanner,context,inlines)
18
+ if scanner.scan(@@start_pattern)
19
+ url = Url.parse(StringScanner.new "[#{scanner.matched_pattern[1]}]")
20
+
21
+ items = []
22
+ until scanner.scan(@@end_pattern)
23
+ break unless scanner.match? /.*/
24
+ items << @@blocks.parse(scanner,context,inlines)[0]
25
+ end
26
+ self.new(items, url)
27
+ end
28
+ end
29
+
30
+ attr_reader :items, :url
31
+ def initialize(items, url = nil)
32
+ @items = items
33
+ @url = url
34
+ end
35
+
36
+ def ==(o)
37
+ o and self.class == o.class and self.items == o.items and @url == o.url
38
+ end
9
39
  end
10
40
  end
11
41
  end
@@ -0,0 +1,34 @@
1
+
2
+ require 'hparser/block/collectable'
3
+ require 'hparser/block/pair'
4
+
5
+ module HParser
6
+ module Block
7
+ class RAW < Pair
8
+ include Collectable
9
+
10
+ def self.parse(scanner, context, inlines)
11
+ if scanner.scan(/^></)
12
+ content = scanner.matched
13
+ until content.match(/><$/)
14
+ str = scanner.scan(/.*/)
15
+ if str.nil? then
16
+ content << "<"
17
+ break
18
+ end
19
+ content << "\n" << str
20
+ end
21
+ self.new inlines.parse(content[1..-2], context)
22
+ end
23
+ end
24
+
25
+ def ==(o)
26
+ self.class == o.class and self.content == o.content
27
+ end
28
+
29
+ def self.<=>(o)
30
+ -1
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,31 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/block/collectable'
6
+ module HParser
7
+ module Block
8
+ # SeeMore line parser.
9
+ #
10
+ # ==== or =====
11
+ class SeeMore
12
+ include Collectable
13
+ def self.parse(scanner,context,inlines)
14
+ if scanner.scan('=====')
15
+ SeeMore.new true
16
+ elsif scanner.scan('====') then
17
+ SeeMore.new false
18
+ end
19
+ end
20
+
21
+ attr_reader :is_super
22
+ def initialize(is_super)
23
+ @is_super = is_super
24
+ end
25
+
26
+ def ==(o)
27
+ o.class == self.class and o.is_super == self.is_super
28
+ end
29
+ end
30
+ end
31
+ end
@@ -6,13 +6,31 @@ module HParser
6
6
  class SuperPre < Pair
7
7
  include Collectable
8
8
 
9
- def self.parse scanner,inlines
10
- content = get scanner,'>||','||<'
9
+ def self.parse scanner,context,inlines
10
+
11
+ content = format = nil
12
+ if scanner.scan(/^>\|([A-Za-z0-9]*)\|\s*?$/)
13
+ lines = []
14
+ format = scanner.matched_pattern[1]
15
+ until scanner.scan(/^\|\|<\s*?$/) do
16
+ str = scanner.scan(/.*/)
17
+ break if !str
18
+ lines << str
19
+ end
20
+ content = lines.join("\n")
21
+ end
22
+
11
23
  if content then
12
- SuperPre.new content
24
+ SuperPre.new content, format
13
25
  end
14
26
  end
15
27
 
28
+ attr_reader :format
29
+ def initialize(content, format = nil)
30
+ super content
31
+ @format = format
32
+ end
33
+
16
34
  def self.<=>(o)
17
35
  -1
18
36
  end
@@ -9,14 +9,14 @@ module HParser
9
9
  class Table
10
10
  attr_reader :rows
11
11
  include Collectable
12
- def self.parse(scanner,inlines)
12
+ def self.parse(scanner,context,inlines)
13
13
  rows = []
14
14
  while scanner.scan(/\A\|/)
15
- rows.push scanner.matched[1..-1].split('|').map{|label|
15
+ rows.push scanner.matched[1..-1].split('|').select{|l| l[0]}.map{|label|
16
16
  if label[0].chr == '*' then
17
- Th.new inlines.parse(label[1..-1].strip)
17
+ Th.new inlines.parse(label[1..-1].strip, context)
18
18
  else
19
- Td.new inlines.parse(label.strip)
19
+ Td.new inlines.parse(label.strip, context)
20
20
  end
21
21
  }
22
22
  end
@@ -51,7 +51,9 @@ module HParser
51
51
 
52
52
  class Quote
53
53
  include Hatena
54
- alias_method :hatena_content,:content
54
+ def hatena_content
55
+ @items
56
+ end
55
57
  def hatena_filter c
56
58
  ">>\n"+c+"\n<<"
57
59
  end