hparser 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/.travis.yml +11 -0
- data/ChangeLog +4 -0
- data/Gemfile +3 -0
- data/README.md +40 -0
- data/Rakefile +12 -0
- data/VERSION +1 -0
- data/hparser.gemspec +21 -0
- data/lib/hparser/block/dl.rb +4 -4
- data/lib/hparser/block/footnote_list.rb +19 -0
- data/lib/hparser/block/head.rb +2 -2
- data/lib/hparser/block/list.rb +8 -8
- data/lib/hparser/block/p.rb +4 -3
- data/lib/hparser/block/pair.rb +12 -7
- data/lib/hparser/block/quote.rb +32 -2
- data/lib/hparser/block/raw.rb +34 -0
- data/lib/hparser/block/see_more.rb +31 -0
- data/lib/hparser/block/super_pre.rb +21 -3
- data/lib/hparser/block/table.rb +4 -4
- data/lib/hparser/hatena.rb +3 -1
- data/lib/hparser/html.rb +181 -13
- data/lib/hparser/inline/comment.rb +27 -0
- data/lib/hparser/inline/footnote.rb +34 -0
- data/lib/hparser/inline/fotolife.rb +40 -0
- data/lib/hparser/inline/hatena_id.rb +7 -6
- data/lib/hparser/inline/parser.rb +3 -2
- data/lib/hparser/inline/tex.rb +27 -0
- data/lib/hparser/inline/text.rb +3 -2
- data/lib/hparser/inline/url.rb +20 -6
- data/lib/hparser/latex.rb +273 -0
- data/lib/hparser/parser.rb +17 -1
- data/lib/hparser/text.rb +42 -0
- data/lib/hparser/util/line_scanner.rb +3 -2
- data/lib/hparser.rb +1 -0
- data/test/integration_texts/error1.ok.hatena +23 -0
- data/test/test_block.rb +65 -2
- data/test/test_bruteforce.rb +48 -0
- data/test/test_dl.rb +13 -1
- data/test/test_footnote.rb +42 -0
- data/test/test_fotolife.rb +29 -0
- data/test/test_from_perl/01_module.t +559 -0
- data/test/test_from_perl/02_module_extend.t +36 -0
- data/test/test_from_perl/10_autolink.t +78 -0
- data/test/test_from_perl/11_autolink_extend.t +43 -0
- data/test/test_hatena.rb +2 -2
- data/test/test_head.rb +7 -1
- data/test/test_helper.rb +11 -0
- data/test/test_html.rb +39 -3
- data/test/test_id.rb +1 -1
- data/test/test_inline.rb +13 -1
- data/test/test_inline_html.rb +37 -2
- data/test/test_integration.rb +20 -0
- data/test/test_latex.rb +101 -0
- data/test/test_p.rb +23 -3
- data/test/test_pair.rb +22 -4
- data/test/test_quote.rb +69 -0
- data/test/test_see_more.rb +28 -0
- data/test/test_table.rb +1 -1
- data/test/test_tex.rb +24 -0
- data/test/test_text.rb +12 -2
- data/test/test_url.rb +39 -2
- metadata +141 -58
- data/README +0 -17
data/lib/hparser/html.rb
CHANGED
@@ -38,6 +38,34 @@ module HParser
|
|
38
38
|
end
|
39
39
|
%(<#{html_tag}>#{content}</#{html_tag}>)
|
40
40
|
end
|
41
|
+
|
42
|
+
ESCAPE_TABLE = {
|
43
|
+
'&' => '&',
|
44
|
+
'"' => '"',
|
45
|
+
'<' => '<',
|
46
|
+
'>' => '>'
|
47
|
+
}
|
48
|
+
|
49
|
+
def escape(str)
|
50
|
+
str.gsub(/[&"<>]/n) {|c| ESCAPE_TABLE[c] }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
module ListContainerHtml
|
55
|
+
def to_html
|
56
|
+
f = false
|
57
|
+
content = html_content.map{|x|
|
58
|
+
if x.class == Block::ListItem
|
59
|
+
s = (f ? "</li>" : "") + %(<li>#{x.to_html})
|
60
|
+
f = true
|
61
|
+
s
|
62
|
+
else
|
63
|
+
x.to_html
|
64
|
+
end
|
65
|
+
}.join
|
66
|
+
content += "</li>" if f
|
67
|
+
%(<#{html_tag}>#{content}</#{html_tag}>)
|
68
|
+
end
|
41
69
|
end
|
42
70
|
|
43
71
|
module Block
|
@@ -45,7 +73,15 @@ module HParser
|
|
45
73
|
include Html
|
46
74
|
private
|
47
75
|
def html_tag
|
48
|
-
"h#{self.level}"
|
76
|
+
"h#{@@head_level + self.level - 1}"
|
77
|
+
end
|
78
|
+
|
79
|
+
@@head_level = 1
|
80
|
+
def self.head_level=(l)
|
81
|
+
@@head_level = l
|
82
|
+
end
|
83
|
+
def self.head_level
|
84
|
+
@@head_level
|
49
85
|
end
|
50
86
|
|
51
87
|
alias_method :html_content,:content
|
@@ -60,7 +96,17 @@ module HParser
|
|
60
96
|
end
|
61
97
|
|
62
98
|
class Empty
|
63
|
-
def to_html() '<
|
99
|
+
def to_html() '<br />' end
|
100
|
+
end
|
101
|
+
|
102
|
+
class SeeMore
|
103
|
+
def to_html()
|
104
|
+
if self.is_super
|
105
|
+
'<a name="seeall"></a>'
|
106
|
+
else
|
107
|
+
'<a name="seemore"></a>'
|
108
|
+
end
|
109
|
+
end
|
64
110
|
end
|
65
111
|
|
66
112
|
class Pre
|
@@ -72,16 +118,78 @@ module HParser
|
|
72
118
|
|
73
119
|
class SuperPre
|
74
120
|
include Html
|
75
|
-
|
121
|
+
@@class_format_prefix = nil
|
122
|
+
def self.class_format_prefix
|
123
|
+
@@class_format_prefix
|
124
|
+
end
|
125
|
+
def self.class_format_prefix=(prefix)
|
126
|
+
@@class_format_prefix = prefix
|
127
|
+
end
|
128
|
+
@@use_pygments = false
|
129
|
+
def self.use_pygments
|
130
|
+
@@use_pygments
|
131
|
+
end
|
132
|
+
def self.use_pygments=(use_or_not)
|
133
|
+
@@use_pygments = use_or_not
|
134
|
+
end
|
135
|
+
|
136
|
+
def to_html
|
137
|
+
content = html_content.gsub(/&/, "&").gsub(/\"/, """).gsub(/>/, ">").gsub(/</, "<")
|
138
|
+
if format != "" && @@use_pygments
|
139
|
+
# quick hack language name converter (super pre -> pygments)
|
140
|
+
lang = format
|
141
|
+
case format
|
142
|
+
when "cs"
|
143
|
+
lang = "csharp"
|
144
|
+
when "lisp"
|
145
|
+
lang = "cl"
|
146
|
+
when "patch"
|
147
|
+
lang = "diff"
|
148
|
+
when "vb"
|
149
|
+
lang = "vbnet"
|
150
|
+
end
|
151
|
+
|
152
|
+
begin
|
153
|
+
require 'pygments'
|
154
|
+
Pygments.highlight(html_content,
|
155
|
+
:lexer => lang, :options => {:encoding => 'utf-8'})
|
156
|
+
rescue LoadError
|
157
|
+
require 'albino'
|
158
|
+
Albino.new(html_content, lang).colorize
|
159
|
+
end
|
160
|
+
elsif format
|
161
|
+
%(<#{html_tag} class="#{@@class_format_prefix}#{escape(format)}">#{content}</#{html_tag}>)
|
162
|
+
else
|
163
|
+
%(<#{html_tag}>#{content}</#{html_tag}>)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
76
167
|
def html_tag() 'pre' end
|
77
168
|
alias_method :html_content,:content
|
78
169
|
end
|
79
170
|
|
80
171
|
class Quote
|
81
172
|
include Html
|
173
|
+
|
174
|
+
class QuoteUrl
|
175
|
+
include Html
|
176
|
+
def initialize(url)
|
177
|
+
@url = url
|
178
|
+
end
|
179
|
+
private
|
180
|
+
def html_tag() 'cite' end
|
181
|
+
def html_content() @url.to_html end
|
182
|
+
end
|
183
|
+
|
82
184
|
private
|
83
185
|
def html_tag() 'blockquote' end
|
84
|
-
|
186
|
+
def html_content
|
187
|
+
if @url
|
188
|
+
@items + [QuoteUrl.new(@url)]
|
189
|
+
else
|
190
|
+
@items
|
191
|
+
end
|
192
|
+
end
|
85
193
|
end
|
86
194
|
|
87
195
|
class Table
|
@@ -127,7 +235,7 @@ module HParser
|
|
127
235
|
end
|
128
236
|
|
129
237
|
class UnorderList
|
130
|
-
include
|
238
|
+
include ListContainerHtml
|
131
239
|
private
|
132
240
|
def html_tag
|
133
241
|
'ul'
|
@@ -136,7 +244,7 @@ module HParser
|
|
136
244
|
end
|
137
245
|
|
138
246
|
class OrderList
|
139
|
-
include
|
247
|
+
include ListContainerHtml
|
140
248
|
private
|
141
249
|
def html_tag
|
142
250
|
'ol'
|
@@ -146,12 +254,31 @@ module HParser
|
|
146
254
|
|
147
255
|
|
148
256
|
class ListItem
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
257
|
+
def to_html
|
258
|
+
if content.class == Array then
|
259
|
+
content.map{|x| x.to_html}.join
|
260
|
+
else
|
261
|
+
content
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
class RAW
|
267
|
+
def to_html
|
268
|
+
@content.map {|i| i.to_html }.join
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
class FootnoteList
|
273
|
+
def to_html
|
274
|
+
%(<div class="footnote">#{self.html_content}</div>)
|
275
|
+
end
|
276
|
+
|
277
|
+
def html_content
|
278
|
+
@footnotes.map {|f|
|
279
|
+
%(<p class="footnote"><a href="#fn#{f.index}" name="f#{f.index}">*#{f.index}</a>: #{f.text}</p>)
|
280
|
+
}.join
|
153
281
|
end
|
154
|
-
alias_method :html_content,:content
|
155
282
|
end
|
156
283
|
end
|
157
284
|
|
@@ -163,14 +290,55 @@ module HParser
|
|
163
290
|
end
|
164
291
|
|
165
292
|
class Url
|
293
|
+
include Html
|
294
|
+
require "cgi"
|
166
295
|
def to_html
|
167
|
-
|
296
|
+
if @bookmark then
|
297
|
+
require 'uri'
|
298
|
+
enc_url = URI.encode(url)
|
299
|
+
bookmark = %( <a href="http://b.hatena.ne.jp/entry/#{enc_url}" class="http-bookmark">) +
|
300
|
+
%(<img src="http://b.hatena.ne.jp/entry/image/#{enc_url}" alt="" class="http-bookmark"></a>)
|
301
|
+
end
|
302
|
+
%(<a href="#{self.url}">#{CGI.escapeHTML(self.title)}</a>#{bookmark})
|
168
303
|
end
|
169
304
|
end
|
170
305
|
|
171
306
|
class HatenaId
|
172
307
|
def to_html
|
173
|
-
|
308
|
+
if self.is_detail
|
309
|
+
%(<a href="http://d.hatena.ne.jp/#{self.name}/" class="hatena-id-icon">) +
|
310
|
+
%(<img src="http://www.st-hatena.com/users/#{self.name[0..1]}/#{self.name}/profile_s.gif") +
|
311
|
+
%( width="16" height="16" alt="id:#{self.name}" class="hatena-id-icon">id:#{self.name}</a>)
|
312
|
+
else
|
313
|
+
%(<a href="http://d.hatena.ne.jp/#{self.name}/">id:#{self.name}</a>)
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
class Fotolife
|
319
|
+
def to_html
|
320
|
+
%(<a href="#{self.url}"><img src="#{self.image_url}"></a>)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
class Footnote
|
325
|
+
def to_html
|
326
|
+
text = self.text.gsub(/<.*?>/, '')
|
327
|
+
%(<span class="footnote"><a href="#f#{self.index}" title="#{text}" name="fn#{self.index}">*#{self.index}</a></span>)
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
class Tex
|
332
|
+
def to_html
|
333
|
+
require "cgi"
|
334
|
+
url = "http://chart.apis.google.com/chart?cht=tx&chf=bg,s,00000000&chl=" + CGI.escape(self.text)
|
335
|
+
%(<img src="#{url}" class="tex" alt="#{CGI.escapeHTML(self.text)}">)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
class Comment
|
340
|
+
def to_html
|
341
|
+
""
|
174
342
|
end
|
175
343
|
end
|
176
344
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
require 'hparser/inline/collectable'
|
4
|
+
|
5
|
+
module HParser
|
6
|
+
module Inline
|
7
|
+
class Comment
|
8
|
+
include Collectable
|
9
|
+
|
10
|
+
attr_reader :content
|
11
|
+
|
12
|
+
def self.parse(scanner, context=nil)
|
13
|
+
if scanner.scan(/<!--.+-->/m)
|
14
|
+
self.new(scanner.matched[4..-4])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(content)
|
19
|
+
@content = content
|
20
|
+
end
|
21
|
+
|
22
|
+
def ==(o)
|
23
|
+
o.class == self.class and @content == o.content
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/inline/collectable'
|
6
|
+
module HParser
|
7
|
+
module Inline
|
8
|
+
class Footnote
|
9
|
+
include Collectable
|
10
|
+
|
11
|
+
attr_reader :index, :text
|
12
|
+
def initialize(index, text)
|
13
|
+
@index = index
|
14
|
+
@text = text
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.parse(scanner, context)
|
18
|
+
if scanner.scan(/\)\(\(.+?\)\)\(/) then
|
19
|
+
# )((xxx))( -> ((xxx))
|
20
|
+
Text.new scanner[0][1..-2]
|
21
|
+
elsif scanner.scan(/\(\((.+?)\)\)/) then
|
22
|
+
index = context.footnotes.length + 1
|
23
|
+
f = Footnote.new index, scanner[0][2..-3]
|
24
|
+
context.footnotes << f
|
25
|
+
f
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def ==(o)
|
30
|
+
self.class == o.class and @index == o.index and @text == o.text
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/inline/collectable'
|
6
|
+
module HParser
|
7
|
+
module Inline
|
8
|
+
class Fotolife
|
9
|
+
include Collectable
|
10
|
+
|
11
|
+
attr_reader :id, :date, :time, :ext
|
12
|
+
def initialize(id, date, time, ext)
|
13
|
+
@id = id
|
14
|
+
@date = date
|
15
|
+
@time = time
|
16
|
+
@ext = ext
|
17
|
+
end
|
18
|
+
|
19
|
+
def url
|
20
|
+
"http://f.hatena.ne.jp/#{@id}/#{@date}#{@time}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def image_url
|
24
|
+
"http://f.hatena.ne.jp/images/fotolife/#{id[0..0]}/#{id}/#{@date}/#{@date}#{@time}.#{@ext}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.parse(scanner, context=nil)
|
28
|
+
if scanner.scan(/\[f:id:([^:]+):(\d{8})(\d{6})(p|g|j):image(:[^\]]+)?\]/) then
|
29
|
+
Fotolife.new scanner[1], scanner[2], scanner[3],
|
30
|
+
scanner[4] == 'j' ? 'jpg' : scanner[4] == 'p' ? 'png' : 'gif'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def ==(o)
|
35
|
+
self.class == o.class and @id == o.id and @date == o.date and
|
36
|
+
@time == o.time and @ext == o.ext
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -11,19 +11,20 @@ module HParser
|
|
11
11
|
# id:mzp
|
12
12
|
class HatenaId
|
13
13
|
include Collectable
|
14
|
-
attr_reader :name
|
15
|
-
def initialize(name)
|
14
|
+
attr_reader :name, :is_detail
|
15
|
+
def initialize(name, is_detail=false)
|
16
16
|
@name = name
|
17
|
+
@is_detail = is_detail
|
17
18
|
end
|
18
19
|
|
19
|
-
def self.parse(scanner)
|
20
|
-
if scanner.scan(/id
|
21
|
-
HatenaId.new scanner
|
20
|
+
def self.parse(scanner, context=nil)
|
21
|
+
if scanner.scan(/id:([A-Za-z][a-zA-Z0-9_\-]{2,31})(:detail)?/) then
|
22
|
+
HatenaId.new scanner[1], !scanner[2].nil?
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
25
26
|
def ==(o)
|
26
|
-
self.class == o.class and @name == o.name
|
27
|
+
self.class == o.class and @name == o.name and @is_detail == o.is_detail
|
27
28
|
end
|
28
29
|
end
|
29
30
|
end
|
@@ -13,9 +13,10 @@ module HParser
|
|
13
13
|
@document = Many1.new(Or.new(*parsers))
|
14
14
|
end
|
15
15
|
|
16
|
-
def parse str
|
16
|
+
def parse str, context=nil
|
17
17
|
scanner = StringScanner.new str
|
18
|
-
|
18
|
+
e = @document.parse(scanner, context) || [ HParser::Inline::Text.new("") ]
|
19
|
+
join_text e
|
19
20
|
end
|
20
21
|
|
21
22
|
def self.default_parser
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Author:: MIZUNO Hiroki (hiroki1124@gmail.com)
|
2
|
+
# Copyright:: Copyright (c) 2006 MIZUNO Hiroki
|
3
|
+
# License:: Distributes under the same terms as Ruby
|
4
|
+
|
5
|
+
require 'hparser/inline/collectable'
|
6
|
+
module HParser
|
7
|
+
module Inline
|
8
|
+
class Tex
|
9
|
+
include Collectable
|
10
|
+
|
11
|
+
attr_reader :text
|
12
|
+
def initialize(text)
|
13
|
+
@text = text
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse(scanner, context=nil)
|
17
|
+
if scanner.scan(/\[tex:([^\]]+)\]/) then
|
18
|
+
Tex.new scanner[1]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def ==(o)
|
23
|
+
self.class == o.class and @text == o.text
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/hparser/inline/text.rb
CHANGED
@@ -10,11 +10,12 @@ module HParser
|
|
10
10
|
attr_reader :text
|
11
11
|
|
12
12
|
def self.<=>(o)
|
13
|
+
# This parser should be last.
|
13
14
|
1
|
14
15
|
end
|
15
16
|
|
16
|
-
def self.parse(scanner)
|
17
|
-
if scanner.scan(%r!<a
|
17
|
+
def self.parse(scanner, context=nil)
|
18
|
+
if scanner.scan(%r!<(a|iframe|script).*?</\1>!) or scanner.scan(%r!<img\s.*?>!) or scanner.scan(/./m)
|
18
19
|
Text.new(scanner.matched)
|
19
20
|
end
|
20
21
|
end
|
data/lib/hparser/inline/url.rb
CHANGED
@@ -7,19 +7,33 @@ module HParser
|
|
7
7
|
module Inline
|
8
8
|
class Url
|
9
9
|
include Collectable
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
@@url_re = %r!https?://[A-Za-z0-9~\/._\?\&=\-%#\+:;,\@\'\$\*\!]+!
|
11
|
+
@@bracket_url_with_title_re = %r!\[(#{@@url_re}):title(?:=(.*?))?(:bookmark)?\]!
|
12
|
+
@@bracket_url_re = %r!\[(#{@@url_re})\]!
|
13
|
+
|
14
|
+
attr_reader :url, :title, :bookmark
|
15
|
+
def self.parse(scanner, context=nil)
|
16
|
+
if scanner.scan(@@url_re) then
|
17
|
+
Url.new scanner.matched, scanner.matched
|
18
|
+
elsif scanner.scan(@@bracket_url_with_title_re) then
|
19
|
+
title = ""
|
20
|
+
title = scanner[2] if scanner[2] and scanner[2] != ":bookmark"
|
21
|
+
bookmark = (scanner[2] == ":bookmark" || scanner[3] == ":bookmark")
|
22
|
+
Url.new scanner[1], title, bookmark
|
23
|
+
elsif scanner.scan(@@bracket_url_re)
|
24
|
+
Url.new scanner[1]
|
14
25
|
end
|
15
26
|
end
|
16
27
|
|
17
|
-
def initialize(url)
|
28
|
+
def initialize(url, title=nil, bookmark=false)
|
18
29
|
@url = url
|
30
|
+
@title = title.nil? ? url : title.empty? ? "(undefined)" : title
|
31
|
+
@bookmark = bookmark
|
19
32
|
end
|
20
33
|
|
21
34
|
def ==(o)
|
22
|
-
self.class and o.class and @url == o.url
|
35
|
+
o and self.class and o.class and @url == o.url and @title == o.title and
|
36
|
+
@bookmark == o.bookmark
|
23
37
|
end
|
24
38
|
end
|
25
39
|
end
|