iwadon-text-hatena 0.12.20080627.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/README +146 -0
  2. data/README.en +62 -0
  3. data/README.rdoc +62 -0
  4. data/Rakefile +62 -0
  5. data/lib/text/hatena.rb +62 -0
  6. data/lib/text/hatena/auto_link.rb +114 -0
  7. data/lib/text/hatena/auto_link/amazon.rb +31 -0
  8. data/lib/text/hatena/auto_link/asin.rb +155 -0
  9. data/lib/text/hatena/auto_link/ean.rb +62 -0
  10. data/lib/text/hatena/auto_link/ftp.rb +19 -0
  11. data/lib/text/hatena/auto_link/google.rb +32 -0
  12. data/lib/text/hatena/auto_link/hatena_antenna.rb +26 -0
  13. data/lib/text/hatena/auto_link/hatena_bookmark.rb +57 -0
  14. data/lib/text/hatena/auto_link/hatena_diary.rb +77 -0
  15. data/lib/text/hatena/auto_link/hatena_fotolife.rb +101 -0
  16. data/lib/text/hatena/auto_link/hatena_graph.rb +69 -0
  17. data/lib/text/hatena/auto_link/hatena_group.rb +96 -0
  18. data/lib/text/hatena/auto_link/hatena_id.rb +34 -0
  19. data/lib/text/hatena/auto_link/hatena_idea.rb +55 -0
  20. data/lib/text/hatena/auto_link/hatena_map.rb +56 -0
  21. data/lib/text/hatena/auto_link/hatena_question.rb +44 -0
  22. data/lib/text/hatena/auto_link/hatena_rss.rb +26 -0
  23. data/lib/text/hatena/auto_link/hatena_search.rb +44 -0
  24. data/lib/text/hatena/auto_link/http.rb +102 -0
  25. data/lib/text/hatena/auto_link/mailto.rb +24 -0
  26. data/lib/text/hatena/auto_link/rakuten.rb +24 -0
  27. data/lib/text/hatena/auto_link/scheme.rb +40 -0
  28. data/lib/text/hatena/auto_link/tex.rb +26 -0
  29. data/lib/text/hatena/auto_link/unbracket.rb +23 -0
  30. data/lib/text/hatena/blockquote_node.rb +43 -0
  31. data/lib/text/hatena/body_node.rb +18 -0
  32. data/lib/text/hatena/br_node.rb +19 -0
  33. data/lib/text/hatena/cdata_node.rb +22 -0
  34. data/lib/text/hatena/context.rb +113 -0
  35. data/lib/text/hatena/dl_node.rb +28 -0
  36. data/lib/text/hatena/footnote_node.rb +33 -0
  37. data/lib/text/hatena/h3_node.rb +58 -0
  38. data/lib/text/hatena/h4_node.rb +19 -0
  39. data/lib/text/hatena/h5_node.rb +19 -0
  40. data/lib/text/hatena/html_filter.rb +207 -0
  41. data/lib/text/hatena/list_node.rb +59 -0
  42. data/lib/text/hatena/node.rb +33 -0
  43. data/lib/text/hatena/p_node.rb +18 -0
  44. data/lib/text/hatena/pre_node.rb +37 -0
  45. data/lib/text/hatena/section_node.rb +39 -0
  46. data/lib/text/hatena/superpre_node.rb +71 -0
  47. data/lib/text/hatena/table_node.rb +34 -0
  48. data/lib/text/hatena/tag_node.rb +44 -0
  49. data/lib/text/hatena/tagline_node.rb +19 -0
  50. data/lib/text/hatena/text.rb +36 -0
  51. data/lib/text/hatena/utils/htmlsplit.rb +891 -0
  52. data/lib/text/hatena/utils/section_node_utils.rb +43 -0
  53. data/t/test_02_autolink_text.rb +20 -0
  54. data/t/test_06_autolink_hatenafotolife.rb +104 -0
  55. data/t/test_08_autolink_asin.rb +65 -0
  56. data/t/test_09_autolink_hatenadiary.rb +62 -0
  57. data/t/test_11_autolink_tex.rb +32 -0
  58. data/t/test_13_autolink_hatenaantenna.rb +26 -0
  59. data/t/test_14_autolink_hatenabookmark.rb +56 -0
  60. data/t/test_15_autolink_hatenarss.rb +26 -0
  61. data/t/test_16_autolink_hatenaidea.rb +38 -0
  62. data/t/test_17_autolink_hatenaquestion.rb +32 -0
  63. data/t/test_18_autolink_ean.rb +32 -0
  64. data/t/test_19_autolink_hatenagraph.rb +44 -0
  65. data/t/test_23_autolink_amazon.rb +27 -0
  66. data/t/test_asin.rb +64 -0
  67. data/t/test_auto_link.rb +19 -0
  68. data/t/test_dl_node.rb +23 -0
  69. data/t/test_erb_tag.rb +23 -0
  70. data/t/test_footnote.rb +63 -0
  71. data/t/test_footnote_in_list.rb +53 -0
  72. data/t/test_helper.rb +13 -0
  73. data/t/test_quote.rb +75 -0
  74. data/t/test_raw_html.rb +25 -0
  75. data/t/test_sanitize.rb +27 -0
  76. data/t/test_superpre_vimcolor.rb +94 -0
  77. data/t/test_text_hatena.rb +287 -0
  78. data/t/test_text_hatena_autolink.rb +333 -0
  79. data/text-hatena.gemspec +133 -0
  80. metadata +131 -0
@@ -0,0 +1,26 @@
1
+ require "text/hatena/auto_link/scheme"
2
+
3
+ module Text
4
+ class Hatena
5
+ class AutoLink
6
+ class HatenaRSS < Scheme
7
+ @@pattern = /\[?(r:id:([A-Za-z][a-zA-Z0-9_\-]{2,14}))\]?/i
8
+
9
+ def patterns
10
+ [@@pattern]
11
+ end
12
+
13
+ def init
14
+ super
15
+ @domain = 'r.hatena.ne.jp'
16
+ end
17
+
18
+ def parse(text, opt = {})
19
+ return if @@pattern !~ text
20
+ return sprintf('<a href="http://%s/%s/"%s>%s</a>',
21
+ @domain, $2, @a_target_string, $1)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,44 @@
1
+ require "text/hatena/auto_link/scheme"
2
+
3
+ module Text
4
+ class Hatena
5
+ class AutoLink
6
+ class HatenaSearch < Scheme
7
+ @@pattern = /\[search:(?:(keyword|question|asin|web):)?([^\]]+?)\]/i
8
+
9
+ def patterns
10
+ [@@pattern]
11
+ end
12
+
13
+ def init
14
+ super
15
+ @domain = 'search.hatena.ne.jp'
16
+ end
17
+
18
+ def parse(text, opt)
19
+ return if @@pattern !~ text
20
+ type, word = $1, $2
21
+ enword = html_encode(word)
22
+ case type.to_s.downcase
23
+ when 'question'
24
+ return sprintf('<a href="http://%s/questsearch?word=%s&ie=utf8"%s>search:%s:%s</a>',
25
+ @domain, enword, @a_target_string,
26
+ type, word)
27
+ when 'asin'
28
+ return sprintf('<a href="http://%s/asinsearch?word=%s&ie=utf8"%s>search:%s:%s</a>',
29
+ @domain, enword, @a_target_string,
30
+ type, word)
31
+ when 'web'
32
+ return sprintf('<a href="http://%s/websearch?word=%s&ie=utf8"%s>search:%s:%s</a>',
33
+ @domain, enword, @a_target_string,
34
+ type, word)
35
+ else
36
+ return sprintf('<a href="http://%s/keyword?word=%s&ie=utf8"%s>search:%s%s</a>',
37
+ @domain, enword, @a_target_string,
38
+ type ? "#{type}:" : '', word)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,102 @@
1
+ require "open-uri"
2
+ require "text/hatena/auto_link/scheme"
3
+
4
+ module Text
5
+ class Hatena
6
+ class AutoLink
7
+ class HTTP < Scheme
8
+ @@pattern_simple = /\[?(https?:\/\/[A-Za-z0-9~\/._\?\&=\-%#\+:\;,\@\'\(\)\!]+)\]?/i
9
+ @@pattern_useful = /\[(https?:\/\/[A-Za-z0-9~\/._\?\&=\-%#\+:\;,\@\'\(\)\!]+?):(title(?:=([^\]]*))?|barcode|detail|image(?::([hw]\d+))?)\]/i
10
+
11
+ def patterns
12
+ [@@pattern_useful, @@pattern_simple]
13
+ end
14
+
15
+ def parse(text, opt = {})
16
+ case text
17
+ when @@pattern_useful
18
+ return _parse_useful(text, opt)
19
+ when @@pattern_simple
20
+ return _parse_simple(text)
21
+ end
22
+ end
23
+
24
+ def _parse_simple(url)
25
+ return nil if url.nil? or url.empty?
26
+ url.sub!(/^\[/, '')
27
+ url.sub!(/\]$/, '')
28
+ sprintf('<a href="%s"%s>%s</a>', url, @a_target_string, url)
29
+ end
30
+
31
+ def _parse_useful(text, opt)
32
+ return unless @@pattern_useful =~ text
33
+ url, type, title, size = $1, $2, $3, $4
34
+ case type
35
+ when /^title/i
36
+ title ||= _get_page_title(url)
37
+ sprintf('<a href="%s"%s>%s</a>', url, @a_target_string, title)
38
+ when /^detail/i
39
+ title ||= _get_page_title(url)
40
+ html = sprintf('<div class="hatena-http-detail"><p class="hatena-http-detail-url"><a href="%s"%s>%s</a></p><p class="hatena-http-detail-title">%s</p></div>', url, @a_target_string, url, title)
41
+ html = "</p>#{html}<p>" if opt[:in_paragraph]
42
+ html
43
+ when /^image/i
44
+ if /\.(jpe?g|gif|png|bmp)$/i =~ url
45
+ size_string = ""
46
+ if /^h(\d+)$/i =~ size
47
+ size_string = sprintf(' height="%s"', $1)
48
+ elsif /^w(\d+)$/i =~ size
49
+ size_string = sprintf(' width="%s"', $1)
50
+ end
51
+ sprintf('<a href="%s"%s><img src="%s" alt="%s" class="hatena-http-image"%s></a>',
52
+ url,
53
+ @a_target_string,
54
+ url,
55
+ url,
56
+ size_string
57
+ )
58
+ else
59
+ sprintf('<a href="%s"%s>%s</a>',
60
+ url,
61
+ @a_target_string,
62
+ url
63
+ )
64
+ end
65
+ when /^barcode/i
66
+ str = html_encode(url)
67
+ sprintf('<a href="%s"%s><img src="http://d.hatena.ne.jp/barcode?str=%s" class="barcode" alt="%s"></a>',
68
+ url,
69
+ @a_target_string,
70
+ str,
71
+ url)
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def _get_page_title(url)
78
+ begin
79
+ open(url) do |f|
80
+ content = f.read(131072) # 2^17
81
+ return "#{url} (notitle)" unless /<title.*?>(.*?)<\/title>/i =~ content
82
+ title = $1
83
+ if h = @option[:title_handler]
84
+ if /charset="?(.+?)"?$/i =~ f.content_type
85
+ cset = $1.downcase
86
+ elsif /<meta[^>]+charset="?([\w\d\s\-]+)"?/i =~ content
87
+ cset = $1.downcase
88
+ end
89
+ title = h.call(title, cset)
90
+ end
91
+ return title
92
+ end
93
+ rescue Timeout::Error
94
+ return "#{url} (timeout)"
95
+ rescue Exception => e
96
+ return "#{url} (#{e.message})"
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,24 @@
1
+ require "text/hatena/auto_link/scheme"
2
+
3
+ module Text
4
+ class Hatena
5
+ class AutoLink
6
+ class Mailto < Scheme
7
+ @@pattern = /\[?mailto:([a-zA-Z0-9_][a-zA-Z0-9_\.\-]+\@[a-zA-Z0-9_]+[a-zA-Z0-9_\.\-]*[a-zA-Z0-9_])\]?/i
8
+
9
+ def patterns
10
+ [@@pattern]
11
+ end
12
+
13
+ def parse(text, opt)
14
+ return unless @@pattern =~ text
15
+ addr = $1
16
+ return sprintf('<a href="mailto:%s">mailto:%s</a>',
17
+ addr,
18
+ addr
19
+ )
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ require "kconv"
2
+ require "text/hatena/auto_link/scheme"
3
+
4
+ module Text
5
+ class Hatena
6
+ class AutoLink
7
+ class Rakuten < Scheme
8
+ @@pattern = /\[rakuten:([^\]]+?)\]/i
9
+
10
+ def patterns
11
+ [@@pattern]
12
+ end
13
+
14
+ def parse(text, opt)
15
+ return if @@pattern !~ text
16
+ word = $1
17
+ return sprintf('<a href="http://pt.afl.rakuten.co.jp/c/002e8f0a.89099887/?sv=2&v=3&p=0&sitem=%s"%s>rakuten:%s</a>',
18
+ html_encode(word.toeuc),
19
+ @a_target_string, word)
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,40 @@
1
+ module Text
2
+ class Hatena
3
+ class AutoLink
4
+ class Scheme
5
+ attr_accessor :patterns
6
+
7
+ def initialize(args = {})
8
+ @option = args
9
+ init
10
+ end
11
+
12
+ def init
13
+ @a_target = @option[:a_target]
14
+ @a_target_string = @a_target ?
15
+ %Q! target="#{escape_attr(@a_target)}"! :
16
+ ""
17
+ end
18
+
19
+ def parse(text)
20
+ text
21
+ end
22
+
23
+ def pattern
24
+ /#{patterns.join("|")}/
25
+ end
26
+
27
+ def escape_attr(str)
28
+ str.gsub(/\"/, "&quote;")
29
+ end
30
+
31
+ def html_encode(text)
32
+ return nil if text.nil? or text.empty?
33
+ text.gsub(/(\W)/n) do
34
+ sprintf("%%%02x", $1[0])
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,26 @@
1
+ require "text/hatena/auto_link/scheme"
2
+
3
+ module Text
4
+ class Hatena
5
+ class AutoLink
6
+ class Tex < Scheme
7
+ @@pattern = /\[tex:(.*?[^\\\\])\]/i
8
+
9
+ def patterns
10
+ [@@pattern]
11
+ end
12
+
13
+ def parse(text, opt = {})
14
+ return if @@pattern !~ text
15
+ alt = escape_attr($1)
16
+ tex = $1
17
+ tex.gsub!(/\\([\[\]])/, '\1')
18
+ tex.gsub!(/\s/, '~')
19
+ tex.gsub!(/"/, '&quot;')
20
+ return sprintf('<img src="http://d.hatena.ne.jp/cgi-bin/mimetex.cgi?%s" class="tex" alt="%s">',
21
+ tex, alt)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,23 @@
1
+ require "text/hatena/auto_link/scheme"
2
+
3
+ module Text
4
+ class Hatena
5
+ class AutoLink
6
+ class Unbracket < Scheme
7
+ @@pattern = /\[\](.+?)\[\]/i
8
+
9
+ def patterns
10
+ [@@pattern]
11
+ end
12
+
13
+ def parse(text, opt = {})
14
+ if @@pattern =~ text
15
+ $1
16
+ else
17
+ nil
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,43 @@
1
+ require "text/hatena/section_node"
2
+
3
+ module Text
4
+ class Hatena
5
+ class BlockquoteNode < SectionNode
6
+ def init
7
+ @pattern = /^>((?!<).*)>$/
8
+ @endpattern = /^<<$/
9
+ @childnode = %w(h4 h5 blockquote dl list pre superpre table tagline tag)
10
+ # @startstring = "<blockquote>"
11
+ @endstring = "</blockquote>"
12
+ end
13
+
14
+ def parse
15
+ c = @context
16
+ return unless @pattern =~ c.nextline
17
+ url = $1
18
+ c.shiftline
19
+ t = "\t" * @ilevel
20
+ _set_child_node_refs
21
+ startstring = "<blockquote"
22
+ if url and not url.empty?
23
+ html = c.autolink.parse('[' << url << ']')
24
+ if /<a href="([^"]+?)">([^<]+)<\/a>/ =~ html
25
+ startstring << " title=\"#{$2}\" cite=\"#{$1}\""
26
+ end
27
+ end
28
+ startstring << ">"
29
+ c.htmllines(t + startstring)
30
+ while c.hasnext
31
+ l = c.nextline
32
+ if @endpattern =~ l
33
+ c.shiftline
34
+ break
35
+ end
36
+ break unless node = _findnode(l)
37
+ node.parse
38
+ end
39
+ c.htmllines(t + @endstring)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,18 @@
1
+ require "text/hatena/footnote_node"
2
+ require "text/hatena/node"
3
+ require "text/hatena/section_node"
4
+
5
+ module Text
6
+ class Hatena
7
+ class BodyNode < Node
8
+ def parse
9
+ c = @context
10
+ while c.hasnext
11
+ node = SectionNode.new({ :context => c,
12
+ :ilevel => @ilevel })
13
+ node.parse
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ require "text/hatena/node"
2
+
3
+ module Text
4
+ class Hatena
5
+ class BrNode < Node
6
+ def parse
7
+ c = @context
8
+ l = c.shiftline
9
+ return unless l.empty?
10
+ t = "\t" * @ilevel
11
+ if c.lasthtmlline == "#{t}<br>" or c.lasthtmlline == t
12
+ c.htmllines("#{t}<br>")
13
+ else
14
+ c.htmllines(t)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,22 @@
1
+ require "text/hatena/node"
2
+
3
+ module Text
4
+ class Hatena
5
+ class CDataNode < Node
6
+ def parse
7
+ c = @context
8
+ l = c.shiftline
9
+ t = "\t" * @ilevel
10
+ #if c.lasthtmlline == "#{t}<br>" or c.lasthtmlline == t
11
+ # c.htmllines("#{t}<br>")
12
+ #else
13
+ # c.htmllines(t)
14
+ #end
15
+ text = Text.new({:context => @context})
16
+ text.parse(l)
17
+ l = text.html
18
+ c.htmllines("#{t}#{l}")
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,113 @@
1
+ require "text/hatena/auto_link"
2
+
3
+ module Text
4
+ class Hatena
5
+ class Context
6
+ def initialize(args = {})
7
+ @text = args[:text]
8
+ @baseuri = args[:baseuri]
9
+ @permalink = args[:permalink]
10
+ @invalidnode = args[:invalidnode]
11
+ @sectionanchor = args[:sectionanchor]
12
+ @autolink_option = args[:autolink_option]
13
+ @texthandler = args[:texthandler]
14
+ @htmllines = []
15
+ @html = ""
16
+ @footnotes = []
17
+ @sectioncount = 0
18
+ @syntaxrefs = []
19
+ @noparagraph = false
20
+ init
21
+ end
22
+
23
+ def init
24
+ @text.gsub!(/\r/, "")
25
+ @lines = @text.split(/\n/)
26
+ @index = -1
27
+ end
28
+
29
+ def hasnext
30
+ not @lines[@index + 1].nil?
31
+ end
32
+
33
+ def nextline
34
+ @lines[@index + 1]
35
+ end
36
+
37
+ def shiftline
38
+ @lines[@index += 1]
39
+ end
40
+
41
+ def currentline
42
+ @lines[@index]
43
+ end
44
+
45
+ def html
46
+ @htmllines.join("\n")
47
+ end
48
+
49
+ def htmllines(arg = nil)
50
+ @htmllines.push(arg) unless arg.nil?
51
+ @htmllines
52
+ end
53
+
54
+ def lasthtmlline
55
+ @htmllines[-1]
56
+ end
57
+
58
+ def footnotes(arg = nil)
59
+ @footnotes.push(arg) unless arg.nil?
60
+ @footnotes
61
+ end
62
+
63
+ def syntaxrefs(arg = nil)
64
+ @syntaxrefs.push(arg) unless arg.nil?
65
+ @syntaxrefs
66
+ end
67
+
68
+ def syntaxpattern(arg = nil)
69
+ @syntaxpattern.push(arg) unless arg.nil?
70
+ @syntaxpattern
71
+ end
72
+
73
+ def noparagraph(*args)
74
+ @noparagraph = args[0] unless args.empty?
75
+ @noparagraph
76
+ end
77
+
78
+ def autolink(*args)
79
+ @autolink = args[0] unless args.empty?
80
+ @autolink ||= AutoLink.new(@autolink_option)
81
+ @autolink
82
+ end
83
+
84
+ def sectioncount
85
+ @sectioncount
86
+ end
87
+
88
+ def incrementsection
89
+ @sectioncount += 1
90
+ end
91
+
92
+ def baseuri
93
+ @baseuri
94
+ end
95
+
96
+ def permalink
97
+ @permalink
98
+ end
99
+
100
+ def invalidnode
101
+ @invalidnode
102
+ end
103
+
104
+ def sectionanchor
105
+ @sectionanchor
106
+ end
107
+
108
+ def texthandler
109
+ @texthandler
110
+ end
111
+ end
112
+ end
113
+ end