hparser 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.gitignore +2 -0
  2. data/.travis.yml +11 -0
  3. data/ChangeLog +4 -0
  4. data/Gemfile +3 -0
  5. data/README.md +40 -0
  6. data/Rakefile +12 -0
  7. data/VERSION +1 -0
  8. data/hparser.gemspec +21 -0
  9. data/lib/hparser/block/dl.rb +4 -4
  10. data/lib/hparser/block/footnote_list.rb +19 -0
  11. data/lib/hparser/block/head.rb +2 -2
  12. data/lib/hparser/block/list.rb +8 -8
  13. data/lib/hparser/block/p.rb +4 -3
  14. data/lib/hparser/block/pair.rb +12 -7
  15. data/lib/hparser/block/quote.rb +32 -2
  16. data/lib/hparser/block/raw.rb +34 -0
  17. data/lib/hparser/block/see_more.rb +31 -0
  18. data/lib/hparser/block/super_pre.rb +21 -3
  19. data/lib/hparser/block/table.rb +4 -4
  20. data/lib/hparser/hatena.rb +3 -1
  21. data/lib/hparser/html.rb +181 -13
  22. data/lib/hparser/inline/comment.rb +27 -0
  23. data/lib/hparser/inline/footnote.rb +34 -0
  24. data/lib/hparser/inline/fotolife.rb +40 -0
  25. data/lib/hparser/inline/hatena_id.rb +7 -6
  26. data/lib/hparser/inline/parser.rb +3 -2
  27. data/lib/hparser/inline/tex.rb +27 -0
  28. data/lib/hparser/inline/text.rb +3 -2
  29. data/lib/hparser/inline/url.rb +20 -6
  30. data/lib/hparser/latex.rb +273 -0
  31. data/lib/hparser/parser.rb +17 -1
  32. data/lib/hparser/text.rb +42 -0
  33. data/lib/hparser/util/line_scanner.rb +3 -2
  34. data/lib/hparser.rb +1 -0
  35. data/test/integration_texts/error1.ok.hatena +23 -0
  36. data/test/test_block.rb +65 -2
  37. data/test/test_bruteforce.rb +48 -0
  38. data/test/test_dl.rb +13 -1
  39. data/test/test_footnote.rb +42 -0
  40. data/test/test_fotolife.rb +29 -0
  41. data/test/test_from_perl/01_module.t +559 -0
  42. data/test/test_from_perl/02_module_extend.t +36 -0
  43. data/test/test_from_perl/10_autolink.t +78 -0
  44. data/test/test_from_perl/11_autolink_extend.t +43 -0
  45. data/test/test_hatena.rb +2 -2
  46. data/test/test_head.rb +7 -1
  47. data/test/test_helper.rb +11 -0
  48. data/test/test_html.rb +39 -3
  49. data/test/test_id.rb +1 -1
  50. data/test/test_inline.rb +13 -1
  51. data/test/test_inline_html.rb +37 -2
  52. data/test/test_integration.rb +20 -0
  53. data/test/test_latex.rb +101 -0
  54. data/test/test_p.rb +23 -3
  55. data/test/test_pair.rb +22 -4
  56. data/test/test_quote.rb +69 -0
  57. data/test/test_see_more.rb +28 -0
  58. data/test/test_table.rb +1 -1
  59. data/test/test_tex.rb +24 -0
  60. data/test/test_text.rb +12 -2
  61. data/test/test_url.rb +39 -2
  62. metadata +141 -58
  63. data/README +0 -17
data/lib/hparser/html.rb CHANGED
@@ -38,6 +38,34 @@ module HParser
38
38
  end
39
39
  %(<#{html_tag}>#{content}</#{html_tag}>)
40
40
  end
41
+
42
+ ESCAPE_TABLE = {
43
+ '&' => '&amp;',
44
+ '"' => '&quot;',
45
+ '<' => '&lt;',
46
+ '>' => '&gt;'
47
+ }
48
+
49
+ def escape(str)
50
+ str.gsub(/[&"<>]/n) {|c| ESCAPE_TABLE[c] }
51
+ end
52
+ end
53
+
54
+ module ListContainerHtml
55
+ def to_html
56
+ f = false
57
+ content = html_content.map{|x|
58
+ if x.class == Block::ListItem
59
+ s = (f ? "</li>" : "") + %(<li>#{x.to_html})
60
+ f = true
61
+ s
62
+ else
63
+ x.to_html
64
+ end
65
+ }.join
66
+ content += "</li>" if f
67
+ %(<#{html_tag}>#{content}</#{html_tag}>)
68
+ end
41
69
  end
42
70
 
43
71
  module Block
@@ -45,7 +73,15 @@ module HParser
45
73
  include Html
46
74
  private
47
75
  def html_tag
48
- "h#{self.level}"
76
+ "h#{@@head_level + self.level - 1}"
77
+ end
78
+
79
+ @@head_level = 1
80
+ def self.head_level=(l)
81
+ @@head_level = l
82
+ end
83
+ def self.head_level
84
+ @@head_level
49
85
  end
50
86
 
51
87
  alias_method :html_content,:content
@@ -60,7 +96,17 @@ module HParser
60
96
  end
61
97
 
62
98
  class Empty
63
- def to_html() '<p><br /></p>' end
99
+ def to_html() '<br />' end
100
+ end
101
+
102
+ class SeeMore
103
+ def to_html()
104
+ if self.is_super
105
+ '<a name="seeall"></a>'
106
+ else
107
+ '<a name="seemore"></a>'
108
+ end
109
+ end
64
110
  end
65
111
 
66
112
  class Pre
@@ -72,16 +118,78 @@ module HParser
72
118
 
73
119
  class SuperPre
74
120
  include Html
75
- private
121
+ @@class_format_prefix = nil
122
+ def self.class_format_prefix
123
+ @@class_format_prefix
124
+ end
125
+ def self.class_format_prefix=(prefix)
126
+ @@class_format_prefix = prefix
127
+ end
128
+ @@use_pygments = false
129
+ def self.use_pygments
130
+ @@use_pygments
131
+ end
132
+ def self.use_pygments=(use_or_not)
133
+ @@use_pygments = use_or_not
134
+ end
135
+
136
+ def to_html
137
+ content = html_content.gsub(/&/, "&amp;").gsub(/\"/, "&quot;").gsub(/>/, "&gt;").gsub(/</, "&lt;")
138
+ if format != "" && @@use_pygments
139
+ # quick hack language name converter (super pre -> pygments)
140
+ lang = format
141
+ case format
142
+ when "cs"
143
+ lang = "csharp"
144
+ when "lisp"
145
+ lang = "cl"
146
+ when "patch"
147
+ lang = "diff"
148
+ when "vb"
149
+ lang = "vbnet"
150
+ end
151
+
152
+ begin
153
+ require 'pygments'
154
+ Pygments.highlight(html_content,
155
+ :lexer => lang, :options => {:encoding => 'utf-8'})
156
+ rescue LoadError
157
+ require 'albino'
158
+ Albino.new(html_content, lang).colorize
159
+ end
160
+ elsif format
161
+ %(<#{html_tag} class="#{@@class_format_prefix}#{escape(format)}">#{content}</#{html_tag}>)
162
+ else
163
+ %(<#{html_tag}>#{content}</#{html_tag}>)
164
+ end
165
+ end
166
+
76
167
  def html_tag() 'pre' end
77
168
  alias_method :html_content,:content
78
169
  end
79
170
 
80
171
  class Quote
81
172
  include Html
173
+
174
+ class QuoteUrl
175
+ include Html
176
+ def initialize(url)
177
+ @url = url
178
+ end
179
+ private
180
+ def html_tag() 'cite' end
181
+ def html_content() @url.to_html end
182
+ end
183
+
82
184
  private
83
185
  def html_tag() 'blockquote' end
84
- alias_method :html_content,:content
186
+ def html_content
187
+ if @url
188
+ @items + [QuoteUrl.new(@url)]
189
+ else
190
+ @items
191
+ end
192
+ end
85
193
  end
86
194
 
87
195
  class Table
@@ -127,7 +235,7 @@ module HParser
127
235
  end
128
236
 
129
237
  class UnorderList
130
- include Html
238
+ include ListContainerHtml
131
239
  private
132
240
  def html_tag
133
241
  'ul'
@@ -136,7 +244,7 @@ module HParser
136
244
  end
137
245
 
138
246
  class OrderList
139
- include Html
247
+ include ListContainerHtml
140
248
  private
141
249
  def html_tag
142
250
  'ol'
@@ -146,12 +254,31 @@ module HParser
146
254
 
147
255
 
148
256
  class ListItem
149
- include Html
150
- private
151
- def html_tag
152
- 'li'
257
+ def to_html
258
+ if content.class == Array then
259
+ content.map{|x| x.to_html}.join
260
+ else
261
+ content
262
+ end
263
+ end
264
+ end
265
+
266
+ class RAW
267
+ def to_html
268
+ @content.map {|i| i.to_html }.join
269
+ end
270
+ end
271
+
272
+ class FootnoteList
273
+ def to_html
274
+ %(<div class="footnote">#{self.html_content}</div>)
275
+ end
276
+
277
+ def html_content
278
+ @footnotes.map {|f|
279
+ %(<p class="footnote"><a href="#fn#{f.index}" name="f#{f.index}">*#{f.index}</a>: #{f.text}</p>)
280
+ }.join
153
281
  end
154
- alias_method :html_content,:content
155
282
  end
156
283
  end
157
284
 
@@ -163,14 +290,55 @@ module HParser
163
290
  end
164
291
 
165
292
  class Url
293
+ include Html
294
+ require "cgi"
166
295
  def to_html
167
- %(<a href="#{self.url}">#{self.url}</a>)
296
+ if @bookmark then
297
+ require 'uri'
298
+ enc_url = URI.encode(url)
299
+ bookmark = %( <a href="http://b.hatena.ne.jp/entry/#{enc_url}" class="http-bookmark">) +
300
+ %(<img src="http://b.hatena.ne.jp/entry/image/#{enc_url}" alt="" class="http-bookmark"></a>)
301
+ end
302
+ %(<a href="#{self.url}">#{CGI.escapeHTML(self.title)}</a>#{bookmark})
168
303
  end
169
304
  end
170
305
 
171
306
  class HatenaId
172
307
  def to_html
173
- %(<a href="http://d.hatena.ne.jp/#{self.name}/">id:#{self.name}</a>)
308
+ if self.is_detail
309
+ %(<a href="http://d.hatena.ne.jp/#{self.name}/" class="hatena-id-icon">) +
310
+ %(<img src="http://www.st-hatena.com/users/#{self.name[0..1]}/#{self.name}/profile_s.gif") +
311
+ %( width="16" height="16" alt="id:#{self.name}" class="hatena-id-icon">id:#{self.name}</a>)
312
+ else
313
+ %(<a href="http://d.hatena.ne.jp/#{self.name}/">id:#{self.name}</a>)
314
+ end
315
+ end
316
+ end
317
+
318
+ class Fotolife
319
+ def to_html
320
+ %(<a href="#{self.url}"><img src="#{self.image_url}"></a>)
321
+ end
322
+ end
323
+
324
+ class Footnote
325
+ def to_html
326
+ text = self.text.gsub(/<.*?>/, '')
327
+ %(<span class="footnote"><a href="#f#{self.index}" title="#{text}" name="fn#{self.index}">*#{self.index}</a></span>)
328
+ end
329
+ end
330
+
331
+ class Tex
332
+ def to_html
333
+ require "cgi"
334
+ url = "http://chart.apis.google.com/chart?cht=tx&chf=bg,s,00000000&chl=" + CGI.escape(self.text)
335
+ %(<img src="#{url}" class="tex" alt="#{CGI.escapeHTML(self.text)}">)
336
+ end
337
+ end
338
+
339
+ class Comment
340
+ def to_html
341
+ ""
174
342
  end
175
343
  end
176
344
  end
@@ -0,0 +1,27 @@
1
+
2
+
3
+ require 'hparser/inline/collectable'
4
+
5
+ module HParser
6
+ module Inline
7
+ class Comment
8
+ include Collectable
9
+
10
+ attr_reader :content
11
+
12
+ def self.parse(scanner, context=nil)
13
+ if scanner.scan(/<!--.+-->/m)
14
+ self.new(scanner.matched[4..-4])
15
+ end
16
+ end
17
+
18
+ def initialize(content)
19
+ @content = content
20
+ end
21
+
22
+ def ==(o)
23
+ o.class == self.class and @content == o.content
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,34 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/inline/collectable'
6
+ module HParser
7
+ module Inline
8
+ class Footnote
9
+ include Collectable
10
+
11
+ attr_reader :index, :text
12
+ def initialize(index, text)
13
+ @index = index
14
+ @text = text
15
+ end
16
+
17
+ def self.parse(scanner, context)
18
+ if scanner.scan(/\)\(\(.+?\)\)\(/) then
19
+ # )((xxx))( -> ((xxx))
20
+ Text.new scanner[0][1..-2]
21
+ elsif scanner.scan(/\(\((.+?)\)\)/) then
22
+ index = context.footnotes.length + 1
23
+ f = Footnote.new index, scanner[0][2..-3]
24
+ context.footnotes << f
25
+ f
26
+ end
27
+ end
28
+
29
+ def ==(o)
30
+ self.class == o.class and @index == o.index and @text == o.text
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,40 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/inline/collectable'
6
+ module HParser
7
+ module Inline
8
+ class Fotolife
9
+ include Collectable
10
+
11
+ attr_reader :id, :date, :time, :ext
12
+ def initialize(id, date, time, ext)
13
+ @id = id
14
+ @date = date
15
+ @time = time
16
+ @ext = ext
17
+ end
18
+
19
+ def url
20
+ "http://f.hatena.ne.jp/#{@id}/#{@date}#{@time}"
21
+ end
22
+
23
+ def image_url
24
+ "http://f.hatena.ne.jp/images/fotolife/#{id[0..0]}/#{id}/#{@date}/#{@date}#{@time}.#{@ext}"
25
+ end
26
+
27
+ def self.parse(scanner, context=nil)
28
+ if scanner.scan(/\[f:id:([^:]+):(\d{8})(\d{6})(p|g|j):image(:[^\]]+)?\]/) then
29
+ Fotolife.new scanner[1], scanner[2], scanner[3],
30
+ scanner[4] == 'j' ? 'jpg' : scanner[4] == 'p' ? 'png' : 'gif'
31
+ end
32
+ end
33
+
34
+ def ==(o)
35
+ self.class == o.class and @id == o.id and @date == o.date and
36
+ @time == o.time and @ext == o.ext
37
+ end
38
+ end
39
+ end
40
+ end
@@ -11,19 +11,20 @@ module HParser
11
11
  # id:mzp
12
12
  class HatenaId
13
13
  include Collectable
14
- attr_reader :name
15
- def initialize(name)
14
+ attr_reader :name, :is_detail
15
+ def initialize(name, is_detail=false)
16
16
  @name = name
17
+ @is_detail = is_detail
17
18
  end
18
19
 
19
- def self.parse(scanner)
20
- if scanner.scan(/id:\w+/) then
21
- HatenaId.new scanner.matched[3..-1]
20
+ def self.parse(scanner, context=nil)
21
+ if scanner.scan(/id:([A-Za-z][a-zA-Z0-9_\-]{2,31})(:detail)?/) then
22
+ HatenaId.new scanner[1], !scanner[2].nil?
22
23
  end
23
24
  end
24
25
 
25
26
  def ==(o)
26
- self.class == o.class and @name == o.name
27
+ self.class == o.class and @name == o.name and @is_detail == o.is_detail
27
28
  end
28
29
  end
29
30
  end
@@ -13,9 +13,10 @@ module HParser
13
13
  @document = Many1.new(Or.new(*parsers))
14
14
  end
15
15
 
16
- def parse str
16
+ def parse str, context=nil
17
17
  scanner = StringScanner.new str
18
- join_text @document.parse(scanner)
18
+ e = @document.parse(scanner, context) || [ HParser::Inline::Text.new("") ]
19
+ join_text e
19
20
  end
20
21
 
21
22
  def self.default_parser
@@ -0,0 +1,27 @@
1
+ # Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
2
+ # Copyright:: Copyright (c) 2006 MIZUNO Hiroki
3
+ # License:: Distributes under the same terms as Ruby
4
+
5
+ require 'hparser/inline/collectable'
6
+ module HParser
7
+ module Inline
8
+ class Tex
9
+ include Collectable
10
+
11
+ attr_reader :text
12
+ def initialize(text)
13
+ @text = text
14
+ end
15
+
16
+ def self.parse(scanner, context=nil)
17
+ if scanner.scan(/\[tex:([^\]]+)\]/) then
18
+ Tex.new scanner[1]
19
+ end
20
+ end
21
+
22
+ def ==(o)
23
+ self.class == o.class and @text == o.text
24
+ end
25
+ end
26
+ end
27
+ end
@@ -10,11 +10,12 @@ module HParser
10
10
  attr_reader :text
11
11
 
12
12
  def self.<=>(o)
13
+ # This parser should be last.
13
14
  1
14
15
  end
15
16
 
16
- def self.parse(scanner)
17
- if scanner.scan(%r!<a.*</a>!) or scanner.scan(/./m)
17
+ def self.parse(scanner, context=nil)
18
+ if scanner.scan(%r!<(a|iframe|script).*?</\1>!) or scanner.scan(%r!<img\s.*?>!) or scanner.scan(/./m)
18
19
  Text.new(scanner.matched)
19
20
  end
20
21
  end
@@ -7,19 +7,33 @@ module HParser
7
7
  module Inline
8
8
  class Url
9
9
  include Collectable
10
- attr_reader :url
11
- def self.parse(scanner)
12
- if scanner.scan(%r!https?://[A-Za-z0-9./]+!) then
13
- Url.new scanner.matched
10
+ @@url_re = %r!https?://[A-Za-z0-9~\/._\?\&=\-%#\+:;,\@\'\$\*\!]+!
11
+ @@bracket_url_with_title_re = %r!\[(#{@@url_re}):title(?:=(.*?))?(:bookmark)?\]!
12
+ @@bracket_url_re = %r!\[(#{@@url_re})\]!
13
+
14
+ attr_reader :url, :title, :bookmark
15
+ def self.parse(scanner, context=nil)
16
+ if scanner.scan(@@url_re) then
17
+ Url.new scanner.matched, scanner.matched
18
+ elsif scanner.scan(@@bracket_url_with_title_re) then
19
+ title = ""
20
+ title = scanner[2] if scanner[2] and scanner[2] != ":bookmark"
21
+ bookmark = (scanner[2] == ":bookmark" || scanner[3] == ":bookmark")
22
+ Url.new scanner[1], title, bookmark
23
+ elsif scanner.scan(@@bracket_url_re)
24
+ Url.new scanner[1]
14
25
  end
15
26
  end
16
27
 
17
- def initialize(url)
28
+ def initialize(url, title=nil, bookmark=false)
18
29
  @url = url
30
+ @title = title.nil? ? url : title.empty? ? "(undefined)" : title
31
+ @bookmark = bookmark
19
32
  end
20
33
 
21
34
  def ==(o)
22
- self.class and o.class and @url == o.url
35
+ o and self.class and o.class and @url == o.url and @title == o.title and
36
+ @bookmark == o.bookmark
23
37
  end
24
38
  end
25
39
  end