hparser 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +11 -0
  3. data/ChangeLog +4 -0
  4. data/Gemfile +3 -0
  5. data/README.md +40 -0
  6. data/Rakefile +12 -0
  7. data/VERSION +1 -0
  8. data/hparser.gemspec +21 -0
  9. data/lib/hparser/block/dl.rb +4 -4
  10. data/lib/hparser/block/footnote_list.rb +19 -0
  11. data/lib/hparser/block/head.rb +2 -2
  12. data/lib/hparser/block/list.rb +8 -8
  13. data/lib/hparser/block/p.rb +4 -3
  14. data/lib/hparser/block/pair.rb +12 -7
  15. data/lib/hparser/block/quote.rb +32 -2
  16. data/lib/hparser/block/raw.rb +34 -0
  17. data/lib/hparser/block/see_more.rb +31 -0
  18. data/lib/hparser/block/super_pre.rb +21 -3
  19. data/lib/hparser/block/table.rb +4 -4
  20. data/lib/hparser/hatena.rb +3 -1
  21. data/lib/hparser/html.rb +181 -13
  22. data/lib/hparser/inline/comment.rb +27 -0
  23. data/lib/hparser/inline/footnote.rb +34 -0
  24. data/lib/hparser/inline/fotolife.rb +40 -0
  25. data/lib/hparser/inline/hatena_id.rb +7 -6
  26. data/lib/hparser/inline/parser.rb +3 -2
  27. data/lib/hparser/inline/tex.rb +27 -0
  28. data/lib/hparser/inline/text.rb +3 -2
  29. data/lib/hparser/inline/url.rb +20 -6
  30. data/lib/hparser/latex.rb +273 -0
  31. data/lib/hparser/parser.rb +17 -1
  32. data/lib/hparser/text.rb +42 -0
  33. data/lib/hparser/util/line_scanner.rb +3 -2
  34. data/lib/hparser.rb +1 -0
  35. data/test/integration_texts/error1.ok.hatena +23 -0
  36. data/test/test_block.rb +65 -2
  37. data/test/test_bruteforce.rb +48 -0
  38. data/test/test_dl.rb +13 -1
  39. data/test/test_footnote.rb +42 -0
  40. data/test/test_fotolife.rb +29 -0
  41. data/test/test_from_perl/01_module.t +559 -0
  42. data/test/test_from_perl/02_module_extend.t +36 -0
  43. data/test/test_from_perl/10_autolink.t +78 -0
  44. data/test/test_from_perl/11_autolink_extend.t +43 -0
  45. data/test/test_hatena.rb +2 -2
  46. data/test/test_head.rb +7 -1
  47. data/test/test_helper.rb +11 -0
  48. data/test/test_html.rb +39 -3
  49. data/test/test_id.rb +1 -1
  50. data/test/test_inline.rb +13 -1
  51. data/test/test_inline_html.rb +37 -2
  52. data/test/test_integration.rb +20 -0
  53. data/test/test_latex.rb +101 -0
  54. data/test/test_p.rb +23 -3
  55. data/test/test_pair.rb +22 -4
  56. data/test/test_quote.rb +69 -0
  57. data/test/test_see_more.rb +28 -0
  58. data/test/test_table.rb +1 -1
  59. data/test/test_tex.rb +24 -0
  60. data/test/test_text.rb +12 -2
  61. data/test/test_url.rb +39 -2
  62. metadata +141 -58
  63. data/README +0 -17
@@ -0,0 +1,273 @@
1
+ # Author:: Masayoshi Tajahashi (takahashimm@gmail.com)
2
+ # Copyright:: Copyright (c) 2010 Masayoshi Takahashi
3
+ # License:: Distributes under the same terms as Ruby
4
+ # This file define +to_latex+. +to_latex+ is convert hatena format to LaTeX.
5
+ #
6
+
7
+ module HParser
8
+ # This module provide +to_latex+ method.
9
+ # This method is intended to convert hatena format to LaTeX format.
10
+ #
11
+ # For example:
12
+ # Hatena::Parser.parse('*foo').to_latex # -> \section{foo}
13
+ # Hatena::Parser.parse('>|bar|<').to_latex # -> \begin{verbatim} <\n> bar <\n> \end{verbatim}
14
+ #
15
+ # Unlike html, LaTeX phrase cannot be separated with tag and conent.
16
+ # so common methods, such like tag_name and html_content, are not always used.
17
+ # Only to_latex method is commonly impletemted.
18
+ # If content is +Arary+,each elements convert to LaTeX by
19
+ # +to_latex+. Otherwise,using as it self.
20
+ #
21
+ module Latex
22
+ def to_latex
23
+ content = latex_content
24
+ if content.class == Array then
25
+ content = content.map{|x| x.to_latex}.join
26
+ end
27
+ content
28
+ end
29
+ end
30
+
31
+ module Block
32
+ class Head
33
+ include Latex
34
+
35
+ def to_latex
36
+ content = super
37
+ headers = [
38
+ nil,
39
+ "section",
40
+ "subsection",
41
+ "subsubsection",
42
+ "paragraph",
43
+ "subparagraph",
44
+ "textbf"
45
+ ]
46
+ level = @@head_level + self.level - 1
47
+ "\\#{headers[level]}{#{content}}\n\n"
48
+ end
49
+
50
+ @@head_level = 1
51
+ def self.head_level=(l)
52
+ @@head_level = l
53
+ end
54
+ def self.head_level
55
+ @@head_level
56
+ end
57
+
58
+ alias_method :latex_content,:content
59
+ end
60
+
61
+ class P
62
+ include Latex
63
+
64
+ def to_latex
65
+ content = super
66
+ content += "\n\n"
67
+ content
68
+ end
69
+
70
+ alias_method :latex_content, :content
71
+ end
72
+
73
+ class Empty
74
+ def to_latex() "\n\n" end
75
+ end
76
+
77
+ class SeeMore
78
+ def to_latex() '' end
79
+ end
80
+
81
+ class Pre
82
+ include Latex
83
+
84
+ def to_latex
85
+ content = super
86
+ %Q[\\begin{verbatim}\n#{content}\n\\end{verbatim}\n]
87
+ end
88
+
89
+ alias_method :latex_content, :content
90
+ end
91
+
92
+ class SuperPre
93
+ include Latex
94
+
95
+ def to_latex
96
+ content = latex_content ## not 'super'
97
+ %Q[\\begin{verbatim}\n#{content}\n\\end{verbatim}\n]
98
+ end
99
+
100
+ alias_method :latex_content,:content
101
+ end
102
+
103
+ class Quote
104
+ include Latex
105
+
106
+ def to_latex
107
+ content = super
108
+ %Q[\\begin{quotation}\n#{content}\n\\end{quotation}\n]
109
+ end
110
+
111
+ def latex_content
112
+ @items
113
+ end
114
+ end
115
+
116
+ class Table
117
+ def to_latex
118
+ row_size = self.row_size
119
+
120
+ output = "\\begin{table}\n"
121
+ output << " \\centering\n"
122
+ output << " \\begin{tabular}{ #{"l " * row_size }}\n"
123
+ self.map_row do |row|
124
+ output << " #{row.map{|cell| cell.to_latex }.join(" & ")} \\\\\n"
125
+ end
126
+ output << " \\end{tabular}\n"
127
+ output << "\\end{table}\n"
128
+ output
129
+ end
130
+
131
+ def row_size
132
+ self.map_row do |tr|
133
+ return tr.size
134
+ end
135
+ end
136
+ end
137
+
138
+ class Dl
139
+ include Latex
140
+
141
+ class Item
142
+ def to_latex
143
+ dt = self.title.map{|x| x.to_latex}.join
144
+ dd = self.description.map{|x| x.to_latex}.join
145
+
146
+ %Q(\\item[#{dt}] \\quad \\\\\n#{dd}\n)
147
+ end
148
+ end
149
+
150
+ def to_latex
151
+ content = super
152
+ %Q[\\begin{description}\n#{content}\n\\end{description}\n]
153
+ end
154
+
155
+ def latex_content
156
+ @items
157
+ end
158
+ end
159
+
160
+ class TableHeader
161
+ include Latex
162
+
163
+ def to_latex
164
+ content = super
165
+ content
166
+ end
167
+
168
+ alias_method :latex_content,:content
169
+ end
170
+
171
+ class TableCell
172
+ include Latex
173
+
174
+ def to_latex
175
+ content = super
176
+ content
177
+ end
178
+
179
+ alias_method :latex_content,:content
180
+ end
181
+
182
+ class UnorderList
183
+ include Latex
184
+
185
+ def to_latex
186
+ content = super
187
+ %Q[\\begin{itemize}\n#{items.map{|i| i.to_latex }.join("\n")}\n\\end{itemize}\n]
188
+ end
189
+ alias_method :latex_content,:items
190
+ end
191
+
192
+ class OrderList
193
+ include Latex
194
+
195
+ def to_latex
196
+ content = super
197
+ %Q[\\begin{enumerate}\n#{items.map{|i| i.to_latex }.join("\n")}\n\\end{enumerate}\n]
198
+ end
199
+
200
+ alias_method :latex_content,:items
201
+ end
202
+
203
+
204
+ class ListItem
205
+ include Latex
206
+
207
+ def to_latex
208
+ content = super
209
+ " \\item #{content}\n"
210
+ end
211
+
212
+ alias_method :latex_content,:content
213
+ end
214
+
215
+ class RAW
216
+ def to_latex
217
+ @content.map {|i| i.to_latex }.join
218
+ end
219
+ end
220
+
221
+ class FoonoteList
222
+ def to_latex
223
+ ""
224
+ end
225
+ end
226
+ end
227
+
228
+ module Inline
229
+ class Text
230
+ def to_latex
231
+ self.text
232
+ end
233
+ end
234
+
235
+ class Url
236
+ def to_latex
237
+ "\\href{#{self.url}/}{#{self.url}}"
238
+ end
239
+ end
240
+
241
+ class HatenaId
242
+ def to_latex
243
+ "\\href{http://d.hatena.ne.jp/#{self.name}/}{id:#{self.name}}"
244
+ end
245
+ end
246
+
247
+ class Fotolife
248
+ def to_latex
249
+ alias_method :to_latex,:url
250
+ end
251
+ end
252
+
253
+ class Tex
254
+ def to_latex
255
+ %($#{self.text}$)
256
+ end
257
+ end
258
+
259
+ class Footnote
260
+ def to_latex
261
+ %(\\footnote{#{self.text}})
262
+ end
263
+ end
264
+
265
+ class Comment
266
+ def to_latex
267
+ ""
268
+ end
269
+ end
270
+ end
271
+ end
272
+
273
+
@@ -4,6 +4,7 @@
4
4
 
5
5
  require 'hparser/util/parser'
6
6
  require 'hparser/inline/parser'
7
+ require 'hparser/inline/footnote'
7
8
  require 'hparser/block/collectable'
8
9
  require 'hparser/util/line_scanner'
9
10
 
@@ -42,9 +43,14 @@ module HParser
42
43
  #
43
44
  # Return array of block element.
44
45
  def parse str
45
- @blocks.parse(LineScanner.new(str.split("\n")),@inlines).map{|x|
46
+ context = Context.new
47
+ res = (@blocks.parse(LineScanner.new(str.split(/\r\n|\r|\n/)),context,@inlines) || []).map{|x|
46
48
  x[0]
47
49
  }
50
+ if context.footnotes.length > 0
51
+ res << FootnoteList.new(context.footnotes)
52
+ end
53
+ res
48
54
  end
49
55
 
50
56
  # Retutrn array of all usable parser.
@@ -66,5 +72,15 @@ module HParser
66
72
  }
67
73
  end
68
74
  end
75
+
76
+ # Parse context.
77
+ #
78
+ # Context instance is passed to all parsers.
79
+ class Context
80
+ attr_reader :footnotes
81
+ def initialize(footnotes=[])
82
+ @footnotes = footnotes
83
+ end
84
+ end
69
85
  end
70
86
 
data/lib/hparser/text.rb CHANGED
@@ -39,6 +39,10 @@ module HParser
39
39
  end
40
40
  end
41
41
 
42
+ class SeeMore
43
+ def to_text() "" end
44
+ end
45
+
42
46
  module Indent
43
47
  include Text
44
48
  def text_content
@@ -57,7 +61,11 @@ module HParser
57
61
  include Indent
58
62
  end
59
63
  class Quote
64
+ include Text
60
65
  include Indent
66
+ def text_content
67
+ @items
68
+ end
61
69
  end
62
70
 
63
71
  class Table
@@ -117,6 +125,20 @@ module HParser
117
125
  [HParser::Inline::Text.new(' '),self.content].flatten
118
126
  end
119
127
  end
128
+
129
+ class RAW
130
+ def to_text
131
+ @content.map {|i| i.to_text }.join
132
+ end
133
+ end
134
+
135
+ class FootnoteList
136
+ def to_text
137
+ @footnotes.map {|f|
138
+ %((*#{f.index}) #{f.text})
139
+ }.join("\n")
140
+ end
141
+ end
120
142
  end
121
143
 
122
144
  module Inline
@@ -133,5 +155,25 @@ module HParser
133
155
  "id:#{self.name}"
134
156
  end
135
157
  end
158
+
159
+ class Fotolife
160
+ alias_method :to_text,:url
161
+ end
162
+
163
+ class Tex
164
+ alias_method :to_text,:text
165
+ end
166
+
167
+ class Footnote
168
+ def to_text
169
+ "(*#{self.index})"
170
+ end
171
+ end
172
+
173
+ class Comment
174
+ def to_text
175
+ ""
176
+ end
177
+ end
136
178
  end
137
179
  end
@@ -5,12 +5,13 @@ module HParser
5
5
  module Util
6
6
  # StringScanner like class
7
7
  class LineScanner
8
- attr_reader :matched
8
+ attr_reader :matched, :matched_pattern
9
9
  def initialize(lines)
10
10
  @lines = lines
11
11
  end
12
12
 
13
13
  def scan(exp)
14
+ @matched_pattern = nil
14
15
  if match?(exp) then
15
16
  @matched = @lines.shift
16
17
  else
@@ -29,7 +30,7 @@ module HParser
29
30
  def match?(exp)
30
31
  if @lines == [] then
31
32
  false
32
- elsif exp.class == Regexp and @lines[0] =~ exp then
33
+ elsif exp.class == Regexp and (@matched_pattern = @lines[0].match(exp)) then
33
34
  true
34
35
  elsif @lines[0] == exp
35
36
  true
data/lib/hparser.rb CHANGED
@@ -6,3 +6,4 @@ require 'hparser/parser'
6
6
  require 'hparser/block/all'
7
7
  require 'hparser/inline/all'
8
8
  require 'hparser/html'
9
+ require 'hparser/latex'
@@ -0,0 +1,23 @@
1
+ - A
2
+ - B
3
+ >||
4
+ A
5
+ B
6
+ ||<
7
+ A
8
+ >||
9
+ C
10
+ D
11
+ ||<
12
+ - A
13
+ - A
14
+ - A
15
+ - A
16
+ -- A
17
+ -- A
18
+
19
+ ** A
20
+ - A
21
+
22
+ ** A
23
+ - A
data/test/test_block.rb CHANGED
@@ -1,4 +1,4 @@
1
- require 'test/unit'
1
+ require 'test_helper'
2
2
  require 'hparser/parser'
3
3
  require 'hparser/block/list'
4
4
 
@@ -61,14 +61,38 @@ END
61
61
  end
62
62
 
63
63
  def test_spre
64
- assert_equal [SuperPre.new('a')],parse(<<-END)
64
+ assert_equal [SuperPre.new(' a ')],parse(<<-END)
65
65
  >||
66
+ a
67
+ ||<
68
+ END
69
+ assert_equal [SuperPre.new('a')],parse(<<-END), 'with space'
70
+ >||
66
71
  a
72
+ ||<
73
+ END
74
+
75
+ end
76
+
77
+ def test_spre_html
78
+ assert_equal [SuperPre.new('<foo />')],parse(<<-END)
79
+ >||
80
+ <foo />
67
81
  ||<
68
82
  END
69
83
 
70
84
  end
71
85
 
86
+ def test_spre_format
87
+ parsed = parse(<<-END)
88
+ >|xml|
89
+ <foo />
90
+ ||<
91
+ END
92
+ assert_equal [SuperPre.new('<foo />')], parsed
93
+ assert_equal 'xml', parsed.first.format
94
+ end
95
+
72
96
  def test_list
73
97
  assert_equal [Ul.new(li('a'),Ol.new(li('b')),Ul.new(li('c')))],
74
98
  parse(<<-END)
@@ -78,6 +102,45 @@ END
78
102
  END
79
103
  end
80
104
 
105
+
106
+ def test_comment
107
+ assert_equal [HParser::Block::RAW.new([ Comment.new("\naaa\n") ])], parse(<<-END.unindent)
108
+ ><!--
109
+ aaa
110
+ --><
111
+ END
112
+ end
113
+
114
+ def test_raw
115
+ assert_equal [RAW.new([ Text.new("<ins>") ]), P.new([ Text.new("foo") ]), RAW.new([ Text.new("</ins>") ])], parse(<<-END.unindent)
116
+ ><ins><
117
+ foo
118
+ ></ins><
119
+ END
120
+ end
121
+
122
+ def test_raw_without_end_lt
123
+ assert_equal [RAW.new([ Text.new("<ins>") ]), P.new([ Text.new("foo") ]), RAW.new([ Text.new("</ins>") ])], parse(<<-END.unindent)
124
+ ><ins><
125
+ foo
126
+ ></ins>
127
+ END
128
+ end
129
+
130
+ def test_p
131
+ assert_equal [P.new([ Text.new(" foo") ])], parse(<<END)
132
+ foo
133
+ END
134
+
135
+ str = <<END
136
+ foo
137
+ bar
138
+ buz
139
+ END
140
+ assert_equal [P.new([ Text.new(" foo")]), P.new([Text.new(" bar") ]), P.new([Text.new(" buz") ])], parse(str)
141
+ end
142
+
143
+
81
144
  def li str
82
145
  Li.new([Text.new(str)])
83
146
  end
@@ -0,0 +1,48 @@
1
+ require 'test_helper'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/list'
4
+ require 'pathname'
5
+
6
+ class BlockTest < Test::Unit::TestCase
7
+ include HParser::Block
8
+ include HParser::Inline
9
+
10
+ def setup
11
+ @parser = HParser::Parser.new
12
+ end
13
+
14
+ def parse str
15
+ @parser.parse str
16
+ end
17
+
18
+ def test_from_cpan_text_hatena
19
+ Pathname.glob(Pathname.new(__FILE__).parent + "test_from_perl/*.t").each do |test|
20
+ data = test.read.gsub(/\r?\n|\r/, "\n")[/__END__\n([\s\S]+)/, 1].split(/^===\s*/)
21
+ data.each do |d|
22
+ name, *rest = d.split(/^--- */)
23
+ rest = rest.inject({}) {|r,i|
24
+ i.sub!(/^(.*)\n/, "")
25
+ r.update(Regexp.last_match[1] => i)
26
+ }
27
+ next unless rest["text"]
28
+
29
+ parsed = nil
30
+ assert_nothing_raised("#{test.basename}::#{name}\n\n#{rest["text"]}\n") {
31
+ parsed = @parser.parse(rest["text"])
32
+ }
33
+
34
+ html = nil
35
+ assert_nothing_raised("to_html") {
36
+ html = @parser.parse(rest["text"]).map {|i| i.to_html }.join.strip
37
+ }
38
+
39
+ # paranoid test
40
+ if ENV["TEST_PARANOID"]
41
+ require "rubygems"
42
+ require "hpricot"
43
+ assert_equal Hpricot(rest["html"].strip).to_s, Hpricot(html).to_s
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
data/test/test_dl.rb CHANGED
@@ -1,4 +1,4 @@
1
- require 'test/unit'
1
+ require 'test_helper'
2
2
  require 'hparser/parser'
3
3
  require 'hparser/block/dl'
4
4
 
@@ -23,4 +23,16 @@ class DlTest < Test::Unit::TestCase
23
23
  :bar:bar is ...
24
24
  END
25
25
  end
26
+
27
+ def test_dl2
28
+ first = Dl::Item.new([Text.new('<a href="http://example.com/">a</a>')],
29
+ [Text.new('b')])
30
+ assert_equal [Dl.new(first)],
31
+ parse(':<a href="http://example.com/">a</a>:b')
32
+ end
33
+
34
+ def test_fake_dl
35
+ assert_equal [],parse(":this is not dl")
36
+ assert_equal [],parse(":this is not dl too:")
37
+ end
26
38
  end
@@ -0,0 +1,42 @@
1
+ require 'test_helper'
2
+ require 'hparser/parser'
3
+ require 'hparser/block/p'
4
+ require 'hparser/inline/parser'
5
+ require 'hparser/inline/footnote'
6
+
7
+ class FootnoteTest < Test::Unit::TestCase
8
+ include HParser::Inline
9
+ def setup
10
+ @parser = Parser.new [Footnote]
11
+ end
12
+
13
+ def parse str
14
+ @parser.parse str, HParser::Context.new
15
+ end
16
+
17
+ def test_new
18
+ footnote = Footnote.new(1, "text")
19
+ assert_equal 1, footnote.index
20
+ assert_equal "text", footnote.text
21
+ end
22
+
23
+ def test_parse
24
+ assert_equal [Footnote.new(1, "text")], parse("((text))")
25
+ assert_equal [Footnote.new(1, "text1"),Footnote.new(2, "text2")], parse("((text1))((text2))")
26
+ assert_equal [Text.new("((text))")], parse(")((text))(")
27
+ end
28
+
29
+ def test_parse_footnote_list
30
+ f1 = Footnote.new(1, "text1")
31
+ f2 = Footnote.new(2, "text2")
32
+
33
+ # [ P ([ Footnote(...), Foonote(...) ]),
34
+ # FoonoteList([ Footnote(...), Foonote(...) ])
35
+ # ]
36
+ elements = HParser::Parser.new.parse("((text1))((text2))")
37
+
38
+ assert_equal 2, elements.length
39
+ assert_equal HParser::Block::P.new([f1, f2]), elements[0]
40
+ assert_equal HParser::Block::FootnoteList.new([f1, f2]), elements[1]
41
+ end
42
+ end
@@ -0,0 +1,29 @@
1
+ require 'test_helper'
2
+ require 'hparser/inline/parser'
3
+ require 'hparser/inline/fotolife'
4
+
5
+ class FotolifeTest < Test::Unit::TestCase
6
+ include HParser::Inline
7
+ def setup
8
+ @parser = Parser.new [Fotolife]
9
+ end
10
+
11
+ def parse str
12
+ @parser.parse str
13
+ end
14
+
15
+ def test_new
16
+ f = Fotolife.new("nitoyon", "20100718", "010346", "jpg")
17
+ assert_equal "nitoyon", f.id
18
+ assert_equal "20100718", f.date
19
+ assert_equal "010346", f.time
20
+ assert_equal "jpg", f.ext
21
+ assert_equal "http://f.hatena.ne.jp/nitoyon/20100718010346", f.url
22
+ assert_equal "http://f.hatena.ne.jp/images/fotolife/n/nitoyon/20100718/20100718010346.jpg", f.image_url
23
+ end
24
+
25
+ def test_parse
26
+ assert_equal [Fotolife.new("nitoyon", "20100718", "010346", "jpg")],
27
+ parse("[f:id:nitoyon:20100718010346j:image]")
28
+ end
29
+ end