textile 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 29b8619628c3c467404af4c62c21abe0fe57315c
4
+ data.tar.gz: 58c3225030b2fc5182bb20540f738b40fd0a3ad2
5
+ SHA512:
6
+ metadata.gz: 2f0744c414b3e4f81e2c03555b5a87697b9ce41670e05cfbc1a561e88d2b55907a679e4105e22c74ac83eab556d3729302953c29d8330d3a79d768b4d9bb8730
7
+ data.tar.gz: cc332b9b6d3cb62f57e76217b9836fa6b35319a4fe52c93a98087818bacc3acbbca9ecfd934a4c8d43fe81fa7e3b050f31a299e1db6547ddfd0d7444ea648342
data/README ADDED
@@ -0,0 +1,41 @@
1
+ Recursive-descent Textile parser in Ruby
2
+
3
+ operand = operand1+ ?;
4
+
5
+ operand1 =
6
+ <text>
7
+
8
+ | '[==', operand, '==]'
9
+ | '[bq=", <author>, '"]', operand, '[/bq]'
10
+ | '[bq]', operand, '[/bq]'
11
+ | '[spoiler]', operand, '[/spoiler]'
12
+ | '==', operand, '=='
13
+
14
+ | link
15
+ | image
16
+
17
+ | '[**', operand, '**]' | '**', operand, '**'
18
+ | '[*', operand, '*]' | '*', operand, '*'
19
+ | '[__', operand, '__]' | '__', operand, '__'
20
+ | '[_', operand, '_]' | '_', operand, '_'
21
+ | '[@', operand, '@]' | '@', operand, '@'
22
+ | '[+', operand, '+]' | '+', operand, '+'
23
+ | '[^', operand, '^]' | '^', operand, '^'
24
+ | '[-', operand, '-]' | '-', operand, '-'
25
+ | '[~', operand, '~]' | '~', operand, '~'
26
+ | '[??', operand, '??]' | '??', operand, '??'
27
+ ;
28
+
29
+ link =
30
+ '["', operand, '(', <text>, ')":', <url>, ']'
31
+ | '"', operand, '(', <text>, ')":', <url>
32
+ | '["', operand, '":', <url>, ']'
33
+ | '"', operand, '":', <url>;
34
+
35
+ image =
36
+ '[!', <url>, '(', <text>, ')!:', <url>, ']'
37
+ | '!', <url>, '(', <text>, ')!:', <url>
38
+ | '[!', <url>, '(', <text>, ')!]'
39
+ | '!', <url>, '(', <text>, ')!'
40
+ | '[!', <url>, '!]'
41
+ | '!', <url>, '!';
@@ -0,0 +1,7 @@
1
+ require 'textile/parser'
2
+
3
+ module Textile
4
+ def self.parse(text)
5
+ TextileParser.parse(text.dup)
6
+ end
7
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+ require 'cgi'
3
+
4
+ class MultiNode
5
+ def initialize(nodes)
6
+ @nodes = nodes || []
7
+ end
8
+
9
+ def build
10
+ @nodes.map(&:build).join('')
11
+ end
12
+ end
13
+
14
+ class TextNode
15
+ def initialize(text)
16
+ @text = text
17
+ end
18
+
19
+ # Hook for booru monkeypatch
20
+ def build
21
+ CGI.escapeHTML(@text).gsub("\n", '<br>')
22
+ end
23
+ end
24
+
25
+ class RawTextNode
26
+ def initialize(text)
27
+ @text = text
28
+ end
29
+
30
+ def build
31
+ CGI.escapeHTML(@text).gsub("\n", '<br>')
32
+ end
33
+ end
34
+
35
+ class HTMLNode
36
+ def initialize(tag_name, inner, attributes = {})
37
+ @tag_name = tag_name
38
+ @inner = inner
39
+ @attributes = attributes || {}
40
+ end
41
+
42
+ def build
43
+ output = []
44
+ output << '<'
45
+ output << @tag_name
46
+ @attributes.each do |name, value|
47
+ output << ' '
48
+ output << name
49
+ output << '="'
50
+ output << CGI.escapeHTML(value)
51
+ output << '"'
52
+ end
53
+ output << '>'
54
+ output << @inner.build
55
+ output << '</'
56
+ output << @tag_name
57
+ output << '>'
58
+ output.join('')
59
+ end
60
+ end
61
+
62
+ class ImageNode
63
+ def initialize(src)
64
+ @src = src
65
+ end
66
+
67
+ def build
68
+ output = []
69
+ output << '<img src="'
70
+ output << transform_src
71
+ output << '"/>'
72
+ output.join('')
73
+ end
74
+
75
+ # Hook for booru monkeypatch
76
+ def transform_src
77
+ CGI.escapeHTML(@src)
78
+ end
79
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+ require 'textile/nodes'
3
+
4
+ module TextileParser
5
+ extend self
6
+
7
+ def parse(text)
8
+ ary = []
9
+ operand(ary, text) until text.empty?
10
+ MultiNode.new(ary)
11
+ end
12
+
13
+ def find_syms(text)
14
+ # Find possible symbol matches
15
+ syms = SYM_TO_INDEX.map { |sym, index| [sym, text.index(index)] }
16
+ .reject { |sym, index| index.nil? }
17
+
18
+ # Sort by starting position - closer is better
19
+ syms = syms.sort_by{ |x| x[1] }
20
+
21
+ # Get associated regexps and find first
22
+ matchdata = nil
23
+ match = syms.map { |sym, index| [sym, SYM_TO_REGEX[sym]] }
24
+ .detect { |sym, re| matchdata = re.match(text) }
25
+
26
+ # [sym, matchdata]
27
+ [match[0], matchdata] if match
28
+ end
29
+
30
+ def operand(ary, text)
31
+ sym, md = find_syms(text)
32
+ if sym.nil? || md.nil?
33
+ # No match, consume entire string.
34
+ return ary << TextNode.new(text.slice!(0 .. text.length))
35
+ end
36
+
37
+ # Consume string before match.
38
+ if md.pre_match.size > 0
39
+ ary << TextNode.new(text.slice!(0 ... md.pre_match.size))
40
+ end
41
+
42
+ # Act on match.
43
+ # FIXME: Separate logic for string consumption:
44
+ case sym
45
+ when :raw_bracket
46
+ balanced = balance_markup(text, md.to_s, '[==', '==]').match(SYM_TO_REGEX[:raw_bracket])[1]
47
+ ary << RawTextNode.new(balanced)
48
+ when :bq_author
49
+ balanced = balance_markup(text, md.to_s, BQ_LEFT, '[/bq]').match(SYM_TO_REGEX[:bq_author])[2]
50
+ ary << HTMLNode.new(:blockquote, parse(balanced), title: $1)
51
+ when :bq
52
+ balanced = balance_markup(text, md.to_s, BQ_LEFT, '[/bq]').match(SYM_TO_REGEX[:bq])[1]
53
+ ary << HTMLNode.new(:blockquote, parse(balanced))
54
+ when :spoiler
55
+ balanced = balance_markup(text, md.to_s, '[spoiler]', '[/spoiler]').match(SYM_TO_REGEX[:spoiler])[1]
56
+ ary << HTMLNode.new(:span, parse(balanced), class: 'spoiler')
57
+ else
58
+ text.slice!(0 .. md.to_s.size)
59
+ end
60
+
61
+ case sym
62
+ when :raw
63
+ ary << RawTextNode.new(md[1])
64
+ when :link_title_bracket, :link_title
65
+ ary << HTMLNode.new(:a, parse(md[1]), title: md[2], href: md[3])
66
+ when :link_bracket, :link
67
+ ary << HTMLNode.new(:a, parse(md[1]), href: md[2])
68
+ when :image_link_title_bracket, :image_link_title
69
+ ary << HTMLNode.new(:a, ImageNode.new(md[1]), title: md[2], href: md[3])
70
+ when :image_link_bracket, :image_link
71
+ ary << HTMLNode.new(:a, ImageNode.new(md[1]), href: md[2])
72
+ when :image_title_bracket, :image_title
73
+ ary << HTMLNode.new(:span, ImageNode.new(md[1]), title: md[2])
74
+ when :image_bracket, :image
75
+ ary << ImageNode.new(md[1])
76
+ when :dblbold_bracket, :dblbold
77
+ ary << HTMLNode.new(:b, parse(md[1]))
78
+ when :bold_bracket, :bold
79
+ ary << HTMLNode.new(:strong, parse(md[1]))
80
+ when :dblitalic_bracket, :dblitalic
81
+ ary << HTMLNode.new(:i, parse(md[1]))
82
+ when :italic_bracket, :italic
83
+ ary << HTMLNode.new(:em, parse(md[1]))
84
+ when :code_bracket, :code
85
+ ary << HTMLNode.new(:code, parse(md[1]))
86
+ when :ins_bracket, :ins
87
+ ary << HTMLNode.new(:ins, parse(md[1]))
88
+ when :sup_bracket, :sup
89
+ ary << HTMLNode.new(:sup, parse(md[1]))
90
+ when :del_bracket, :del
91
+ ary << HTMLNode.new(:del, parse(md[1]))
92
+ when :sub_bracket, :sub
93
+ ary << HTMLNode.new(:sub, parse(md[1]))
94
+ when :cite_bracket, :cite
95
+ ary << HTMLNode.new(:cite, parse(md[1]))
96
+ end
97
+ end
98
+
99
+ private
100
+
101
+ # Find the longest substring that contains balanced markup,
102
+ # or the whole string if this is impossible.
103
+ def balance_markup(text, matched, left, right)
104
+ both = Regexp.union(left, right)
105
+ left = Regexp.union(left)
106
+ right = Regexp.union(right)
107
+
108
+ s = StringScanner.new(matched)
109
+ n, lowest_pos = 0, 0
110
+ i = loop do
111
+ match = s.scan(both)
112
+ case
113
+ when match =~ left
114
+ n += 1
115
+ when match =~ right
116
+ n -= 1
117
+ lowest_pos = s.pos
118
+ else
119
+ m = s.scan_until(both)
120
+ s.pos = s.pos - s.matched.size if m
121
+ s.terminate if m.nil?
122
+ end
123
+
124
+ break lowest_pos.pred if n.zero? || s.eos?
125
+ end
126
+
127
+ text.slice!(0 .. i)
128
+ matched[0 .. i]
129
+ end
130
+
131
+ # Properly nesting operator pairs:
132
+ # [bq][/bq] [bq="author"][/bq]
133
+ # [spoiler][/spoiler]
134
+ # [== ==]
135
+
136
+ # Non-nesting operator pairs:
137
+ # == " ! ** * __ _ @ + ^ - ~ ??
138
+
139
+ # Ruby \s does not match extra unicode space characters.
140
+ RX_SPACE_CHARS = ' \t\u00a0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000'
141
+
142
+ RX_URL = %r{
143
+ (?:http:\/\/|https:\/\/|\/\/|\/|\#) # protocol
144
+ (?:[^%#{RX_SPACE_CHARS}"!\n\r]|%[0-9a-fA-F]{2})+ # path
145
+ [^#{RX_SPACE_CHARS}`~!@$^&"\n\r\*_+\-=\[\]\\|;:,.'?\#)] # invalid
146
+ }x
147
+
148
+ BQ_LEFT = /\[bq="([^"]*)"\]|\[bq\]/
149
+
150
+ # Symbol table, in operator precedence order:
151
+ # 0. Symbol name.
152
+ # 1. Start string for optimized matching.
153
+ # 2. Complete match definition.
154
+ SYMS = [
155
+ [:raw_bracket, '[==', /\[==(.*)==\]/],
156
+ [:bq_author, '[bq="', /\[bq="([^"]*)"\](.*)\[\/bq\]/],
157
+ [:bq, '[bq]', /\[bq\](.*)\[\/bq\]/],
158
+ [:spoiler, '[spoiler]', /\[spoiler\](.*)\[\/spoiler\]/],
159
+ [:raw, '==', /==(.*)==/],
160
+
161
+ [:link_title_bracket, '["', /\A\["([^"]*)\(([^\)]*)\)":(#{RX_URL})\]/],
162
+ [:link_title, '"', /"([^"]*)\(([^\)]*)\)":(#{RX_URL})/],
163
+ [:link_bracket, '["', /\["([^"]*)":(#{RX_URL})\]/],
164
+ [:link, '"', /"([^"]*)":(#{RX_URL})/],
165
+
166
+ [:image_link_title_bracket, '[!', /\[!(#{RX_URL})\(([^\)]*)\)!:(#{RX_URL})\]/],
167
+ [:image_link_title, '!', /!(#{RX_URL})\(([^\)]*)\)!:(#{RX_URL})/],
168
+ [:image_link_bracket, '[!', /\[!(#{RX_URL})!:(#{RX_URL})\]/],
169
+ [:image_link, '!', /!(#{RX_URL})!:(#{RX_URL})/],
170
+ [:image_title_bracket, '[!', /\[!(#{RX_URL})\(([^\)]*)\)!\]/],
171
+ [:image_title, '!', /!(#{RX_URL})\(([^\)]*)\)!/],
172
+ [:image_bracket, '[!', /\[!(#{RX_URL})!\]/],
173
+ [:image, '!', /!(#{RX_URL})!/],
174
+
175
+ [:dblbold_bracket, '[**', /\[\*\*((?:.|\n.|\n(?=\*\*\]))+?)\*\*\]/],
176
+ [:dblbold, '**', /\*\*((?:.|\n.|\n(?=\*\*))+?)\*\*/],
177
+ [:bold_bracket, '[*', /\[\*((?:.|\n.|\n(?=\*\]))+?)\*\]/],
178
+ [:bold, '*', /\*((?:.|\n.|\n(?=\*\]))+?)\*/],
179
+ [:dblitalic_bracket, '[__', /\[__((?:.|\n.|\n(?=__\]))+?)__\]/],
180
+ [:dblitalic, '__', /__((?:.|\n.|\n(?=__))+?)__/],
181
+ [:italic_bracket, '[_', /\[_((?:.|\n.|\n(?=_\]))+?)_\]/],
182
+ [:italic, '_', /_((?:.|\n.|\n(?=_))+?)_/],
183
+ [:code_bracket, '[@', /\[@((?:.|\n.|\n(?=@\]))+?)@\]/],
184
+ [:code, '@', /@((?:.|\n.|\n(?=@))+?)@/],
185
+ [:ins_bracket, '[+', /\[\+((?:.|\n.|\n(?=\+\]))+?)\+\]/],
186
+ [:ins, '+', /\+((?:.|\n.|\n(?=\+))+?)\+/],
187
+ [:sup_bracket, '[^', /\[\^((?:.|\n.|\n(?=\^\]))+?)\^\]/],
188
+ [:sup, '^', /\^((?:.|\n.|\n(?=\^))+?)\^/],
189
+ [:del_bracket, '[-', /\[\-((?:.|\n.|\n(?=\-\]))+?)\-\]/],
190
+ [:del, '-', /\-((?:.|\n.|\n(?=\-))+?)\-/],
191
+ [:sub_bracket, '[~', /\[\~((?:.|\n.|\n(?=\~\]))+?)\~\]/],
192
+ [:sub, '~', /\~((?:.|\n.|\n(?=\~))+?)\~/],
193
+ [:cite_bracket, '[??', /\[\?\?((?:.|\n.|\n(?=\?\?\]))+?)\?\?\]/],
194
+ [:cite, '??', /\?\?((?:.|\n.|\n(?=\?\?))+?)\?\?/],
195
+ ]
196
+
197
+ SYM_TO_INDEX = Hash[SYMS.map { |name, index, re| [name, index] }]
198
+ SYM_TO_REGEX = Hash[SYMS.map { |name, index, re| [name, re] }]
199
+ end
@@ -0,0 +1,3 @@
1
+ module Textile
2
+ VERSION = '0.4.0'.freeze
3
+ end
@@ -0,0 +1,15 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require 'textile/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'textile'
6
+ s.version = Textile::VERSION.dup
7
+ s.license = "MIT"
8
+ s.summary = "Recursive-descent style Textile parser"
9
+ s.description = "Recursive-descent style Textile parser"
10
+ s.authors = ["Liam P. White"]
11
+ s.email = 'example@example.com'
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.require_paths = ["lib"]
15
+ end
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: textile
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.0
5
+ platform: ruby
6
+ authors:
7
+ - Liam P. White
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-11-14 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Recursive-descent style Textile parser
14
+ email: example@example.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - README
20
+ - lib/textile.rb
21
+ - lib/textile/nodes.rb
22
+ - lib/textile/parser.rb
23
+ - lib/textile/version.rb
24
+ - textile.gemspec
25
+ homepage:
26
+ licenses:
27
+ - MIT
28
+ metadata: {}
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 2.5.1
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: Recursive-descent style Textile parser
49
+ test_files: []