hparser 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +11 -0
  3. data/ChangeLog +4 -0
  4. data/Gemfile +3 -0
  5. data/README.md +40 -0
  6. data/Rakefile +12 -0
  7. data/VERSION +1 -0
  8. data/hparser.gemspec +21 -0
  9. data/lib/hparser/block/dl.rb +4 -4
  10. data/lib/hparser/block/footnote_list.rb +19 -0
  11. data/lib/hparser/block/head.rb +2 -2
  12. data/lib/hparser/block/list.rb +8 -8
  13. data/lib/hparser/block/p.rb +4 -3
  14. data/lib/hparser/block/pair.rb +12 -7
  15. data/lib/hparser/block/quote.rb +32 -2
  16. data/lib/hparser/block/raw.rb +34 -0
  17. data/lib/hparser/block/see_more.rb +31 -0
  18. data/lib/hparser/block/super_pre.rb +21 -3
  19. data/lib/hparser/block/table.rb +4 -4
  20. data/lib/hparser/hatena.rb +3 -1
  21. data/lib/hparser/html.rb +181 -13
  22. data/lib/hparser/inline/comment.rb +27 -0
  23. data/lib/hparser/inline/footnote.rb +34 -0
  24. data/lib/hparser/inline/fotolife.rb +40 -0
  25. data/lib/hparser/inline/hatena_id.rb +7 -6
  26. data/lib/hparser/inline/parser.rb +3 -2
  27. data/lib/hparser/inline/tex.rb +27 -0
  28. data/lib/hparser/inline/text.rb +3 -2
  29. data/lib/hparser/inline/url.rb +20 -6
  30. data/lib/hparser/latex.rb +273 -0
  31. data/lib/hparser/parser.rb +17 -1
  32. data/lib/hparser/text.rb +42 -0
  33. data/lib/hparser/util/line_scanner.rb +3 -2
  34. data/lib/hparser.rb +1 -0
  35. data/test/integration_texts/error1.ok.hatena +23 -0
  36. data/test/test_block.rb +65 -2
  37. data/test/test_bruteforce.rb +48 -0
  38. data/test/test_dl.rb +13 -1
  39. data/test/test_footnote.rb +42 -0
  40. data/test/test_fotolife.rb +29 -0
  41. data/test/test_from_perl/01_module.t +559 -0
  42. data/test/test_from_perl/02_module_extend.t +36 -0
  43. data/test/test_from_perl/10_autolink.t +78 -0
  44. data/test/test_from_perl/11_autolink_extend.t +43 -0
  45. data/test/test_hatena.rb +2 -2
  46. data/test/test_head.rb +7 -1
  47. data/test/test_helper.rb +11 -0
  48. data/test/test_html.rb +39 -3
  49. data/test/test_id.rb +1 -1
  50. data/test/test_inline.rb +13 -1
  51. data/test/test_inline_html.rb +37 -2
  52. data/test/test_integration.rb +20 -0
  53. data/test/test_latex.rb +101 -0
  54. data/test/test_p.rb +23 -3
  55. data/test/test_pair.rb +22 -4
  56. data/test/test_quote.rb +69 -0
  57. data/test/test_see_more.rb +28 -0
  58. data/test/test_table.rb +1 -1
  59. data/test/test_tex.rb +24 -0
  60. data/test/test_text.rb +12 -2
  61. data/test/test_url.rb +39 -2
  62. metadata +141 -58
  63. data/README +0 -17
@@ -0,0 +1,273 @@
1
+ # Author:: Masayoshi Tajahashi (takahashimm@gmail.com)
2
+ # Copyright:: Copyright (c) 2010 Masayoshi Takahashi
3
+ # License:: Distributes under the same terms as Ruby
4
+ # This file define +to_latex+. +to_latex+ is convert hatena format to LaTeX.
5
+ #
6
+
7
+ module HParser
8
+ # This module provide +to_latex+ method.
9
+ # This method is intended to convert hatena format to LaTeX format.
10
+ #
11
+ # For example:
12
+ # Hatena::Parser.parse('*foo').to_latex # -> \section{foo}
13
+ # Hatena::Parser.parse('>|bar|<').to_latex # -> \begin{verbatim} <\n> bar <\n> \end{verbatim}
14
+ #
15
+ # Unlike html, LaTeX phrase cannot be separated with tag and conent.
16
+ # so common methods, such like tag_name and html_content, are not always used.
17
+ # Only to_latex method is commonly impletemted.
18
+ # If content is +Arary+,each elements convert to LaTeX by
19
+ # +to_latex+. Otherwise,using as it self.
20
+ #
21
+ module Latex
22
+ def to_latex
23
+ content = latex_content
24
+ if content.class == Array then
25
+ content = content.map{|x| x.to_latex}.join
26
+ end
27
+ content
28
+ end
29
+ end
30
+
31
+ module Block
32
+ class Head
33
+ include Latex
34
+
35
+ def to_latex
36
+ content = super
37
+ headers = [
38
+ nil,
39
+ "section",
40
+ "subsection",
41
+ "subsubsection",
42
+ "paragraph",
43
+ "subparagraph",
44
+ "textbf"
45
+ ]
46
+ level = @@head_level + self.level - 1
47
+ "\\#{headers[level]}{#{content}}\n\n"
48
+ end
49
+
50
+ @@head_level = 1
51
+ def self.head_level=(l)
52
+ @@head_level = l
53
+ end
54
+ def self.head_level
55
+ @@head_level
56
+ end
57
+
58
+ alias_method :latex_content,:content
59
+ end
60
+
61
+ class P
62
+ include Latex
63
+
64
+ def to_latex
65
+ content = super
66
+ content += "\n\n"
67
+ content
68
+ end
69
+
70
+ alias_method :latex_content, :content
71
+ end
72
+
73
+ class Empty
74
+ def to_latex() "\n\n" end
75
+ end
76
+
77
+ class SeeMore
78
+ def to_latex() '' end
79
+ end
80
+
81
+ class Pre
82
+ include Latex
83
+
84
+ def to_latex
85
+ content = super
86
+ %Q[\\begin{verbatim}\n#{content}\n\\end{verbatim}\n]
87
+ end
88
+
89
+ alias_method :latex_content, :content
90
+ end
91
+
92
+ class SuperPre
93
+ include Latex
94
+
95
+ def to_latex
96
+ content = latex_content ## not 'super'
97
+ %Q[\\begin{verbatim}\n#{content}\n\\end{verbatim}\n]
98
+ end
99
+
100
+ alias_method :latex_content,:content
101
+ end
102
+
103
+ class Quote
104
+ include Latex
105
+
106
+ def to_latex
107
+ content = super
108
+ %Q[\\begin{quotation}\n#{content}\n\\end{quotation}\n]
109
+ end
110
+
111
+ def latex_content
112
+ @items
113
+ end
114
+ end
115
+
116
+ class Table
117
+ def to_latex
118
+ row_size = self.row_size
119
+
120
+ output = "\\begin{table}\n"
121
+ output << " \\centering\n"
122
+ output << " \\begin{tabular}{ #{"l " * row_size }}\n"
123
+ self.map_row do |row|
124
+ output << " #{row.map{|cell| cell.to_latex }.join(" & ")} \\\\\n"
125
+ end
126
+ output << " \\end{tabular}\n"
127
+ output << "\\end{table}\n"
128
+ output
129
+ end
130
+
131
+ def row_size
132
+ self.map_row do |tr|
133
+ return tr.size
134
+ end
135
+ end
136
+ end
137
+
138
+ class Dl
139
+ include Latex
140
+
141
+ class Item
142
+ def to_latex
143
+ dt = self.title.map{|x| x.to_latex}.join
144
+ dd = self.description.map{|x| x.to_latex}.join
145
+
146
+ %Q(\\item[#{dt}] \\quad \\\\\n#{dd}\n)
147
+ end
148
+ end
149
+
150
+ def to_latex
151
+ content = super
152
+ %Q[\\begin{description}\n#{content}\n\\end{description}\n]
153
+ end
154
+
155
+ def latex_content
156
+ @items
157
+ end
158
+ end
159
+
160
+ class TableHeader
161
+ include Latex
162
+
163
+ def to_latex
164
+ content = super
165
+ content
166
+ end
167
+
168
+ alias_method :latex_content,:content
169
+ end
170
+
171
+ class TableCell
172
+ include Latex
173
+
174
+ def to_latex
175
+ content = super
176
+ content
177
+ end
178
+
179
+ alias_method :latex_content,:content
180
+ end
181
+
182
+ class UnorderList
183
+ include Latex
184
+
185
+ def to_latex
186
+ content = super
187
+ %Q[\\begin{itemize}\n#{items.map{|i| i.to_latex }.join("\n")}\n\\end{itemize}\n]
188
+ end
189
+ alias_method :latex_content,:items
190
+ end
191
+
192
+ class OrderList
193
+ include Latex
194
+
195
+ def to_latex
196
+ content = super
197
+ %Q[\\begin{enumerate}\n#{items.map{|i| i.to_latex }.join("\n")}\n\\end{enumerate}\n]
198
+ end
199
+
200
+ alias_method :latex_content,:items
201
+ end
202
+
203
+
204
+ class ListItem
205
+ include Latex
206
+
207
+ def to_latex
208
+ content = super
209
+ " \\item #{content}\n"
210
+ end
211
+
212
+ alias_method :latex_content,:content
213
+ end
214
+
215
+ class RAW
216
+ def to_latex
217
+ @content.map {|i| i.to_latex }.join
218
+ end
219
+ end
220
+
221
+ class FoonoteList
222
+ def to_latex
223
+ ""
224
+ end
225
+ end
226
+ end
227
+
228
+ module Inline
229
+ class Text
230
+ def to_latex
231
+ self.text
232
+ end
233
+ end
234
+
235
+ class Url
236
+ def to_latex
237
+ "\\href{#{self.url}/}{#{self.url}}"
238
+ end
239
+ end
240
+
241
+ class HatenaId
242
+ def to_latex
243
+ "\\href{http://d.hatena.ne.jp/#{self.name}/}{id:#{self.name}}"
244
+ end
245
+ end
246
+
247
+ class Fotolife
248
+ def to_latex
249
+ alias_method :to_latex,:url
250
+ end
251
+ end
252
+
253
+ class Tex
254
+ def to_latex
255
+ %($#{self.text}$)
256
+ end
257
+ end
258
+
259
+ class Footnote
260
+ def to_latex
261
+ %(\\footnote{#{self.text}})
262
+ end
263
+ end
264
+
265
+ class Comment
266
+ def to_latex
267
+ ""
268
+ end
269
+ end
270
+ end
271
+ end
272
+
273
+
@@ -4,6 +4,7 @@
4
4
 
5
5
  require 'hparser/util/parser'
6
6
  require 'hparser/inline/parser'
7
+ require 'hparser/inline/footnote'
7
8
  require 'hparser/block/collectable'
8
9
  require 'hparser/util/line_scanner'
9
10
 
@@ -42,9 +43,14 @@ module HParser
42
43
  #
43
44
  # Return array of block element.
44
45
  def parse str
45
- @blocks.parse(LineScanner.new(str.split("\n")),@inlines).map{|x|
46
+ context = Context.new
47
+ res = (@blocks.parse(LineScanner.new(str.split(/\r\n|\r|\n/)),context,@inlines) || []).map{|x|
46
48
  x[0]
47
49
  }
50
+ if context.footnotes.length > 0
51
+ res << FootnoteList.new(context.footnotes)
52
+ end
53
+ res
48
54
  end
49
55
 
50
56
  # Retutrn array of all usable parser.
@@ -66,5 +72,15 @@ module HParser
66
72
  }
67
73
  end
68
74
  end
75
+
76
+ # Parse context.
77
+ #
78
+ # Context instance is passed to all parsers.
79
+ class Context
80
+ attr_reader :footnotes
81
+ def initialize(footnotes=[])
82
+ @footnotes = footnotes
83
+ end
84
+ end
69
85
  end
70
86
 
data/lib/hparser/text.rb CHANGED
@@ -39,6 +39,10 @@ module HParser
39
39
  end
40
40
  end
41
41
 
42
+ class SeeMore
43
+ def to_text() "" end
44
+ end
45
+
42
46
  module Indent
43
47
  include Text
44
48
  def text_content
@@ -57,7 +61,11 @@ module HParser
57
61
  include Indent
58
62
  end
59
63
  class Quote
64
+ include Text
60
65
  include Indent
66
+ def text_content
67
+ @items
68
+ end
61
69
  end
62
70
 
63
71
  class Table
@@ -117,6 +125,20 @@ module HParser
117
125
  [HParser::Inline::Text.new(' '),self.content].flatten
118
126
  end
119
127
  end
128
+
129
+ class RAW
130
+ def to_text
131
+ @content.map {|i| i.to_text }.join
132
+ end
133
+ end
134
+
135
+ class FootnoteList
136
+ def to_text
137
+ @footnotes.map {|f|
138
+ %((*#{f.index}) #{f.text})
139
+ }.join("\n")
140
+ end
141
+ end
120
142
  end
121
143
 
122
144
  module Inline
@@ -133,5 +155,25 @@ module HParser
133
155
  "id:#{self.name}"
134
156
  end
135
157
  end
158
+
159
+ class Fotolife
160
+ alias_method :to_text,:url
161
+ end
162
+
163
+ class Tex
164
+ alias_method :to_text,:text
165
+ end
166
+
167
+ class Footnote
168
+ def to_text
169
+ "(*#{self.index})"
170
+ end
171
+ end
172
+
173
+ class Comment
174
+ def to_text
175
+ ""
176
+ end
177
+ end
136
178
  end
137
179
  end
@@ -5,12 +5,13 @@ module HParser
5
5
  module Util
6
6
  # StringScanner like class
7
7
  class LineScanner
8
- attr_reader :matched
8
+ attr_reader :matched, :matched_pattern
9
9
  def initialize(lines)
10
10
  @lines = lines
11
11
  end
12
12
 
13
13
  def scan(exp)
14
+ @matched_pattern = nil
14
15
  if match?(exp) then
15
16
  @matched = @lines.shift
16
17
  else
@@ -29,7 +30,7 @@ module HParser
29
30
  def match?(exp)
30
31
  if @lines == [] then
31
32
  false
32
- elsif exp.class == Regexp and @lines[0] =~ exp then
33
+ elsif exp.class == Regexp and (@matched_pattern = @lines[0].match(exp)) then
33
34
  true
34
35
  elsif @lines[0] == exp
35
36
  true
data/lib/hparser.rb CHANGED
@@ -6,3 +6,4 @@ require 'hparser/parser'
6
6
  require 'hparser/block/all'
7
7
  require 'hparser/inline/all'
8
8
  require 'hparser/html'
9
+ require 'hparser/latex'
@@ -0,0 +1,23 @@
1
+ - A
2
+ - B
3
+ >||
4
+ A
5
+ B
6
+ ||<
7
+ A
8
+ >||
9
+ C
10
+ D
11
+ ||<
12
+ - A
13
+ - A
14
+ - A
15
+ - A
16
+ -- A
17
+ -- A
18
+
19
+ ** A
20
+ - A
21
+
22
+ ** A
23
+ - A
data/test/test_block.rb CHANGED
@@ -1,4 +1,4 @@
1
- require 'test/unit'
1
+ require 'test_helper'
2
2
  require 'hparser/parser'
3
3
  require 'hparser/block/list'
4
4
 
@@ -61,14 +61,38 @@ END
61
61
  end
62
62
 
63
63
  def test_spre
64
- assert_equal [SuperPre.new('a')],parse(<<-END)
64
+ assert_equal [SuperPre.new(' a ')],parse(<<-END)
65
65
  >||
66
+ a
67
+ ||<
68
+ END
69
+ assert_equal [SuperPre.new('a')],parse(<<-END), 'with space'
70
+ >||
66
71
  a
72
+ ||<
73
+ END
74
+
75
+ end
76
+
77
+ def test_spre_html
78
+ assert_equal [SuperPre.new('<foo />')],parse(<<-END)
79
+ >||
80
+ <foo />
67
81
  ||<
68
82
  END
69
83
 
70
84
  end
71
85
 
86
+ def test_spre_format
87
+ parsed = parse(<<-END)
88
+ >|xml|
89
+ <foo />
90
+ ||<
91
+ END
92
+ assert_equal [SuperPre.new('<foo />')], parsed
93
+ assert_equal 'xml', parsed.first.format
94
+ end
95
+
72
96
  def test_list
73
97
  assert_equal [Ul.new(li('a'),Ol.new(li('b')),Ul.new(li('c')))],
74
98
  parse(<<-END)
@@ -78,6 +102,45 @@ END
78
102
  END
79
103
  end
80
104
 
105
+
106
+ def test_comment
107
+ assert_equal [HParser::Block::RAW.new([ Comment.new("\naaa\n") ])], parse(<<-END.unindent)
108
+ ><!--
109
+ aaa
110
+ --><
111
+ END
112
+ end
113
+
114
+ def test_raw
115
+ assert_equal [RAW.new([ Text.new("<ins>") ]), P.new([ Text.new("foo") ]), RAW.new([ Text.new("</ins>") ])], parse(<<-END.unindent)
116
+ ><ins><
117
+ foo
118
+ ></ins><
119
+ END
120
+ end
121
+
122
+ def test_raw_without_end_lt
123
+ assert_equal [RAW.new([ Text.new("<ins>") ]), P.new([ Text.new("foo") ]), RAW.new([ Text.new("</ins>") ])], parse(<<-END.unindent)
124
+ ><ins><
125
+ foo
126
+ ></ins>
127
+ END
128
+ end
129
+
130
+ def test_p
131
+ assert_equal [P.new([ Text.new(" foo") ])], parse(<<END)
132
+ foo
133
+ END
134
+
135
+ str = <<END
136
+ foo
137
+ bar
138
+ buz
139
+ END
140
+ assert_equal [P.new([ Text.new(" foo")]), P.new([Text.new(" bar") ]), P.new([Text.new(" buz") ])], parse(str)
141
+ end
142
+
143
+
81
144
  def li str
82
145
  Li.new([Text.new(str)])
83
146
  end
@@ -0,0 +1,48 @@
1
+ require 'test_helper'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/list'
4
+ require 'pathname'
5
+
6
+ class BlockTest < Test::Unit::TestCase
7
+ include HParser::Block
8
+ include HParser::Inline
9
+
10
+ def setup
11
+ @parser = HParser::Parser.new
12
+ end
13
+
14
+ def parse str
15
+ @parser.parse str
16
+ end
17
+
18
+ def test_from_cpan_text_hatena
19
+ Pathname.glob(Pathname.new(__FILE__).parent + "test_from_perl/*.t").each do |test|
20
+ data = test.read.gsub(/\r?\n|\r/, "\n")[/__END__\n([\s\S]+)/, 1].split(/^===\s*/)
21
+ data.each do |d|
22
+ name, *rest = d.split(/^--- */)
23
+ rest = rest.inject({}) {|r,i|
24
+ i.sub!(/^(.*)\n/, "")
25
+ r.update(Regexp.last_match[1] => i)
26
+ }
27
+ next unless rest["text"]
28
+
29
+ parsed = nil
30
+ assert_nothing_raised("#{test.basename}::#{name}\n\n#{rest["text"]}\n") {
31
+ parsed = @parser.parse(rest["text"])
32
+ }
33
+
34
+ html = nil
35
+ assert_nothing_raised("to_html") {
36
+ html = @parser.parse(rest["text"]).map {|i| i.to_html }.join.strip
37
+ }
38
+
39
+ # paranoid test
40
+ if ENV["TEST_PARANOID"]
41
+ require "rubygems"
42
+ require "hpricot"
43
+ assert_equal Hpricot(rest["html"].strip).to_s, Hpricot(html).to_s
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
data/test/test_dl.rb CHANGED
@@ -1,4 +1,4 @@
1
- require 'test/unit'
1
+ require 'test_helper'
2
2
  require 'hparser/parser'
3
3
  require 'hparser/block/dl'
4
4
 
@@ -23,4 +23,16 @@ class DlTest < Test::Unit::TestCase
23
23
  :bar:bar is ...
24
24
  END
25
25
  end
26
+
27
+ def test_dl2
28
+ first = Dl::Item.new([Text.new('<a href="http://example.com/">a</a>')],
29
+ [Text.new('b')])
30
+ assert_equal [Dl.new(first)],
31
+ parse(':<a href="http://example.com/">a</a>:b')
32
+ end
33
+
34
+ def test_fake_dl
35
+ assert_equal [],parse(":this is not dl")
36
+ assert_equal [],parse(":this is not dl too:")
37
+ end
26
38
  end
@@ -0,0 +1,42 @@
1
+ require 'test_helper'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/p'
4
+ require 'hparser/inline/parser'
5
+ require 'hparser/inline/footnote'
6
+
7
+ class FootnoteTest < Test::Unit::TestCase
8
+ include HParser::Inline
9
+ def setup
10
+ @parser = Parser.new [Footnote]
11
+ end
12
+
13
+ def parse str
14
+ @parser.parse str, HParser::Context.new
15
+ end
16
+
17
+ def test_new
18
+ footnote = Footnote.new(1, "text")
19
+ assert_equal 1, footnote.index
20
+ assert_equal "text", footnote.text
21
+ end
22
+
23
+ def test_parse
24
+ assert_equal [Footnote.new(1, "text")], parse("((text))")
25
+ assert_equal [Footnote.new(1, "text1"),Footnote.new(2, "text2")], parse("((text1))((text2))")
26
+ assert_equal [Text.new("((text))")], parse(")((text))(")
27
+ end
28
+
29
+ def test_parse_footnote_list
30
+ f1 = Footnote.new(1, "text1")
31
+ f2 = Footnote.new(2, "text2")
32
+
33
+ # [ P ([ Footnote(...), Foonote(...) ]),
34
+ # FoonoteList([ Footnote(...), Foonote(...) ])
35
+ # ]
36
+ elements = HParser::Parser.new.parse("((text1))((text2))")
37
+
38
+ assert_equal 2, elements.length
39
+ assert_equal HParser::Block::P.new([f1, f2]), elements[0]
40
+ assert_equal HParser::Block::FootnoteList.new([f1, f2]), elements[1]
41
+ end
42
+ end
@@ -0,0 +1,29 @@
1
+ require 'test_helper'
2
+ require 'hparser/inline/parser'
3
+ require 'hparser/inline/fotolife'
4
+
5
+ class FotolifeTest < Test::Unit::TestCase
6
+ include HParser::Inline
7
+ def setup
8
+ @parser = Parser.new [Fotolife]
9
+ end
10
+
11
+ def parse str
12
+ @parser.parse str
13
+ end
14
+
15
+ def test_new
16
+ f = Fotolife.new("nitoyon", "20100718", "010346", "jpg")
17
+ assert_equal "nitoyon", f.id
18
+ assert_equal "20100718", f.date
19
+ assert_equal "010346", f.time
20
+ assert_equal "jpg", f.ext
21
+ assert_equal "http://f.hatena.ne.jp/nitoyon/20100718010346", f.url
22
+ assert_equal "http://f.hatena.ne.jp/images/fotolife/n/nitoyon/20100718/20100718010346.jpg", f.image_url
23
+ end
24
+
25
+ def test_parse
26
+ assert_equal [Fotolife.new("nitoyon", "20100718", "010346", "jpg")],
27
+ parse("[f:id:nitoyon:20100718010346j:image]")
28
+ end
29
+ end