textile 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 29b8619628c3c467404af4c62c21abe0fe57315c
4
+ data.tar.gz: 58c3225030b2fc5182bb20540f738b40fd0a3ad2
5
+ SHA512:
6
+ metadata.gz: 2f0744c414b3e4f81e2c03555b5a87697b9ce41670e05cfbc1a561e88d2b55907a679e4105e22c74ac83eab556d3729302953c29d8330d3a79d768b4d9bb8730
7
+ data.tar.gz: cc332b9b6d3cb62f57e76217b9836fa6b35319a4fe52c93a98087818bacc3acbbca9ecfd934a4c8d43fe81fa7e3b050f31a299e1db6547ddfd0d7444ea648342
data/README ADDED
@@ -0,0 +1,41 @@
1
+ Recursive-descent Textile parser in Ruby
2
+
3
+ operand = operand1+ ?;
4
+
5
+ operand1 =
6
+ <text>
7
+
8
+ | '[==', operand, '==]'
9
+ | '[bq=", <author>, '"]', operand, '[/bq]'
10
+ | '[bq]', operand, '[/bq]'
11
+ | '[spoiler]', operand, '[/spoiler]'
12
+ | '==', operand, '=='
13
+
14
+ | link
15
+ | image
16
+
17
+ | '[**', operand, '**]' | '**', operand, '**'
18
+ | '[*', operand, '*]' | '*', operand, '*'
19
+ | '[__', operand, '__]' | '__', operand, '__'
20
+ | '[_', operand, '_]' | '_', operand, '_'
21
+ | '[@', operand, '@]' | '@', operand, '@'
22
+ | '[+', operand, '+]' | '+', operand, '+'
23
+ | '[^', operand, '^]' | '^', operand, '^'
24
+ | '[-', operand, '-]' | '-', operand, '-'
25
+ | '[~', operand, '~]' | '~', operand, '~'
26
+ | '[??', operand, '??]' | '??', operand, '??'
27
+ ;
28
+
29
+ link =
30
+ '["', operand, '(', <text>, ')":', <url>, ']'
31
+ | '"', operand, '(', <text>, ')":', <url>
32
+ | '["', operand, '":', <url>, ']'
33
+ | '"', operand, '":', <url>;
34
+
35
+ image =
36
+ '[!', <url>, '(', <text>, ')!:', <url>, ']'
37
+ | '!', <url>, '(', <text>, ')!:', <url>
38
+ | '[!', <url>, '(', <text>, ')!]'
39
+ | '!', <url>, '(', <text>, ')!'
40
+ | '[!', <url>, '!]'
41
+ | '!', <url>, '!';
@@ -0,0 +1,7 @@
1
+ require 'textile/parser'
2
+
3
+ module Textile
4
+ def self.parse(text)
5
+ TextileParser.parse(text.dup)
6
+ end
7
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+ require 'cgi'
3
+
4
+ class MultiNode
5
+ def initialize(nodes)
6
+ @nodes = nodes || []
7
+ end
8
+
9
+ def build
10
+ @nodes.map(&:build).join('')
11
+ end
12
+ end
13
+
14
+ class TextNode
15
+ def initialize(text)
16
+ @text = text
17
+ end
18
+
19
+ # Hook for booru monkeypatch
20
+ def build
21
+ CGI.escapeHTML(@text).gsub("\n", '<br>')
22
+ end
23
+ end
24
+
25
+ class RawTextNode
26
+ def initialize(text)
27
+ @text = text
28
+ end
29
+
30
+ def build
31
+ CGI.escapeHTML(@text).gsub("\n", '<br>')
32
+ end
33
+ end
34
+
35
+ class HTMLNode
36
+ def initialize(tag_name, inner, attributes = {})
37
+ @tag_name = tag_name
38
+ @inner = inner
39
+ @attributes = attributes || {}
40
+ end
41
+
42
+ def build
43
+ output = []
44
+ output << '<'
45
+ output << @tag_name
46
+ @attributes.each do |name, value|
47
+ output << ' '
48
+ output << name
49
+ output << '="'
50
+ output << CGI.escapeHTML(value)
51
+ output << '"'
52
+ end
53
+ output << '>'
54
+ output << @inner.build
55
+ output << '</'
56
+ output << @tag_name
57
+ output << '>'
58
+ output.join('')
59
+ end
60
+ end
61
+
62
+ class ImageNode
63
+ def initialize(src)
64
+ @src = src
65
+ end
66
+
67
+ def build
68
+ output = []
69
+ output << '<img src="'
70
+ output << transform_src
71
+ output << '"/>'
72
+ output.join('')
73
+ end
74
+
75
+ # Hook for booru monkeypatch
76
+ def transform_src
77
+ CGI.escapeHTML(@src)
78
+ end
79
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+ require 'textile/nodes'
3
+
4
+ module TextileParser
5
+ extend self
6
+
7
+ def parse(text)
8
+ ary = []
9
+ operand(ary, text) until text.empty?
10
+ MultiNode.new(ary)
11
+ end
12
+
13
+ def find_syms(text)
14
+ # Find possible symbol matches
15
+ syms = SYM_TO_INDEX.map { |sym, index| [sym, text.index(index)] }
16
+ .reject { |sym, index| index.nil? }
17
+
18
+ # Sort by starting position - closer is better
19
+ syms = syms.sort_by{ |x| x[1] }
20
+
21
+ # Get associated regexps and find first
22
+ matchdata = nil
23
+ match = syms.map { |sym, index| [sym, SYM_TO_REGEX[sym]] }
24
+ .detect { |sym, re| matchdata = re.match(text) }
25
+
26
+ # [sym, matchdata]
27
+ [match[0], matchdata] if match
28
+ end
29
+
30
+ def operand(ary, text)
31
+ sym, md = find_syms(text)
32
+ if sym.nil? || md.nil?
33
+ # No match, consume entire string.
34
+ return ary << TextNode.new(text.slice!(0 .. text.length))
35
+ end
36
+
37
+ # Consume string before match.
38
+ if md.pre_match.size > 0
39
+ ary << TextNode.new(text.slice!(0 ... md.pre_match.size))
40
+ end
41
+
42
+ # Act on match.
43
+ # FIXME: Separate logic for string consumption:
44
+ case sym
45
+ when :raw_bracket
46
+ balanced = balance_markup(text, md.to_s, '[==', '==]').match(SYM_TO_REGEX[:raw_bracket])[1]
47
+ ary << RawTextNode.new(balanced)
48
+ when :bq_author
49
+ balanced = balance_markup(text, md.to_s, BQ_LEFT, '[/bq]').match(SYM_TO_REGEX[:bq_author])[2]
50
+ ary << HTMLNode.new(:blockquote, parse(balanced), title: $1)
51
+ when :bq
52
+ balanced = balance_markup(text, md.to_s, BQ_LEFT, '[/bq]').match(SYM_TO_REGEX[:bq])[1]
53
+ ary << HTMLNode.new(:blockquote, parse(balanced))
54
+ when :spoiler
55
+ balanced = balance_markup(text, md.to_s, '[spoiler]', '[/spoiler]').match(SYM_TO_REGEX[:spoiler])[1]
56
+ ary << HTMLNode.new(:span, parse(balanced), class: 'spoiler')
57
+ else
58
+ text.slice!(0 .. md.to_s.size)
59
+ end
60
+
61
+ case sym
62
+ when :raw
63
+ ary << RawTextNode.new(md[1])
64
+ when :link_title_bracket, :link_title
65
+ ary << HTMLNode.new(:a, parse(md[1]), title: md[2], href: md[3])
66
+ when :link_bracket, :link
67
+ ary << HTMLNode.new(:a, parse(md[1]), href: md[2])
68
+ when :image_link_title_bracket, :image_link_title
69
+ ary << HTMLNode.new(:a, ImageNode.new(md[1]), title: md[2], href: md[3])
70
+ when :image_link_bracket, :image_link
71
+ ary << HTMLNode.new(:a, ImageNode.new(md[1]), href: md[2])
72
+ when :image_title_bracket, :image_title
73
+ ary << HTMLNode.new(:span, ImageNode.new(md[1]), title: md[2])
74
+ when :image_bracket, :image
75
+ ary << ImageNode.new(md[1])
76
+ when :dblbold_bracket, :dblbold
77
+ ary << HTMLNode.new(:b, parse(md[1]))
78
+ when :bold_bracket, :bold
79
+ ary << HTMLNode.new(:strong, parse(md[1]))
80
+ when :dblitalic_bracket, :dblitalic
81
+ ary << HTMLNode.new(:i, parse(md[1]))
82
+ when :italic_bracket, :italic
83
+ ary << HTMLNode.new(:em, parse(md[1]))
84
+ when :code_bracket, :code
85
+ ary << HTMLNode.new(:code, parse(md[1]))
86
+ when :ins_bracket, :ins
87
+ ary << HTMLNode.new(:ins, parse(md[1]))
88
+ when :sup_bracket, :sup
89
+ ary << HTMLNode.new(:sup, parse(md[1]))
90
+ when :del_bracket, :del
91
+ ary << HTMLNode.new(:del, parse(md[1]))
92
+ when :sub_bracket, :sub
93
+ ary << HTMLNode.new(:sub, parse(md[1]))
94
+ when :cite_bracket, :cite
95
+ ary << HTMLNode.new(:cite, parse(md[1]))
96
+ end
97
+ end
98
+
99
+ private
100
+
101
+ # Find the longest substring that contains balanced markup,
102
+ # or the whole string if this is impossible.
103
+ def balance_markup(text, matched, left, right)
104
+ both = Regexp.union(left, right)
105
+ left = Regexp.union(left)
106
+ right = Regexp.union(right)
107
+
108
+ s = StringScanner.new(matched)
109
+ n, lowest_pos = 0, 0
110
+ i = loop do
111
+ match = s.scan(both)
112
+ case
113
+ when match =~ left
114
+ n += 1
115
+ when match =~ right
116
+ n -= 1
117
+ lowest_pos = s.pos
118
+ else
119
+ m = s.scan_until(both)
120
+ s.pos = s.pos - s.matched.size if m
121
+ s.terminate if m.nil?
122
+ end
123
+
124
+ break lowest_pos.pred if n.zero? || s.eos?
125
+ end
126
+
127
+ text.slice!(0 .. i)
128
+ matched[0 .. i]
129
+ end
130
+
131
+ # Properly nesting operator pairs:
132
+ # [bq][/bq] [bq="author"][/bq]
133
+ # [spoiler][/spoiler]
134
+ # [== ==]
135
+
136
+ # Non-nesting operator pairs:
137
+ # == " ! ** * __ _ @ + ^ - ~ ??
138
+
139
+ # Ruby \s does not match extra unicode space characters.
140
+ RX_SPACE_CHARS = ' \t\u00a0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000'
141
+
142
+ RX_URL = %r{
143
+ (?:http:\/\/|https:\/\/|\/\/|\/|\#) # protocol
144
+ (?:[^%#{RX_SPACE_CHARS}"!\n\r]|%[0-9a-fA-F]{2})+ # path
145
+ [^#{RX_SPACE_CHARS}`~!@$^&"\n\r\*_+\-=\[\]\\|;:,.'?\#)] # invalid
146
+ }x
147
+
148
+ BQ_LEFT = /\[bq="([^"]*)"\]|\[bq\]/
149
+
150
+ # Symbol table, in operator precedence order:
151
+ # 0. Symbol name.
152
+ # 1. Start string for optimized matching.
153
+ # 2. Complete match definition.
154
+ SYMS = [
155
+ [:raw_bracket, '[==', /\[==(.*)==\]/],
156
+ [:bq_author, '[bq="', /\[bq="([^"]*)"\](.*)\[\/bq\]/],
157
+ [:bq, '[bq]', /\[bq\](.*)\[\/bq\]/],
158
+ [:spoiler, '[spoiler]', /\[spoiler\](.*)\[\/spoiler\]/],
159
+ [:raw, '==', /==(.*)==/],
160
+
161
+ [:link_title_bracket, '["', /\A\["([^"]*)\(([^\)]*)\)":(#{RX_URL})\]/],
162
+ [:link_title, '"', /"([^"]*)\(([^\)]*)\)":(#{RX_URL})/],
163
+ [:link_bracket, '["', /\["([^"]*)":(#{RX_URL})\]/],
164
+ [:link, '"', /"([^"]*)":(#{RX_URL})/],
165
+
166
+ [:image_link_title_bracket, '[!', /\[!(#{RX_URL})\(([^\)]*)\)!:(#{RX_URL})\]/],
167
+ [:image_link_title, '!', /!(#{RX_URL})\(([^\)]*)\)!:(#{RX_URL})/],
168
+ [:image_link_bracket, '[!', /\[!(#{RX_URL})!:(#{RX_URL})\]/],
169
+ [:image_link, '!', /!(#{RX_URL})!:(#{RX_URL})/],
170
+ [:image_title_bracket, '[!', /\[!(#{RX_URL})\(([^\)]*)\)!\]/],
171
+ [:image_title, '!', /!(#{RX_URL})\(([^\)]*)\)!/],
172
+ [:image_bracket, '[!', /\[!(#{RX_URL})!\]/],
173
+ [:image, '!', /!(#{RX_URL})!/],
174
+
175
+ [:dblbold_bracket, '[**', /\[\*\*((?:.|\n.|\n(?=\*\*\]))+?)\*\*\]/],
176
+ [:dblbold, '**', /\*\*((?:.|\n.|\n(?=\*\*))+?)\*\*/],
177
+ [:bold_bracket, '[*', /\[\*((?:.|\n.|\n(?=\*\]))+?)\*\]/],
178
+ [:bold, '*', /\*((?:.|\n.|\n(?=\*\]))+?)\*/],
179
+ [:dblitalic_bracket, '[__', /\[__((?:.|\n.|\n(?=__\]))+?)__\]/],
180
+ [:dblitalic, '__', /__((?:.|\n.|\n(?=__))+?)__/],
181
+ [:italic_bracket, '[_', /\[_((?:.|\n.|\n(?=_\]))+?)_\]/],
182
+ [:italic, '_', /_((?:.|\n.|\n(?=_))+?)_/],
183
+ [:code_bracket, '[@', /\[@((?:.|\n.|\n(?=@\]))+?)@\]/],
184
+ [:code, '@', /@((?:.|\n.|\n(?=@))+?)@/],
185
+ [:ins_bracket, '[+', /\[\+((?:.|\n.|\n(?=\+\]))+?)\+\]/],
186
+ [:ins, '+', /\+((?:.|\n.|\n(?=\+))+?)\+/],
187
+ [:sup_bracket, '[^', /\[\^((?:.|\n.|\n(?=\^\]))+?)\^\]/],
188
+ [:sup, '^', /\^((?:.|\n.|\n(?=\^))+?)\^/],
189
+ [:del_bracket, '[-', /\[\-((?:.|\n.|\n(?=\-\]))+?)\-\]/],
190
+ [:del, '-', /\-((?:.|\n.|\n(?=\-))+?)\-/],
191
+ [:sub_bracket, '[~', /\[\~((?:.|\n.|\n(?=\~\]))+?)\~\]/],
192
+ [:sub, '~', /\~((?:.|\n.|\n(?=\~))+?)\~/],
193
+ [:cite_bracket, '[??', /\[\?\?((?:.|\n.|\n(?=\?\?\]))+?)\?\?\]/],
194
+ [:cite, '??', /\?\?((?:.|\n.|\n(?=\?\?))+?)\?\?/],
195
+ ]
196
+
197
+ SYM_TO_INDEX = Hash[SYMS.map { |name, index, re| [name, index] }]
198
+ SYM_TO_REGEX = Hash[SYMS.map { |name, index, re| [name, re] }]
199
+ end
@@ -0,0 +1,3 @@
1
+ module Textile
2
+ VERSION = '0.4.0'.freeze
3
+ end
@@ -0,0 +1,15 @@
1
+ $:.push File.expand_path("../lib", __FILE__)
2
+ require 'textile/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'textile'
6
+ s.version = Textile::VERSION.dup
7
+ s.license = "MIT"
8
+ s.summary = "Recursive-descent style Textile parser"
9
+ s.description = "Recursive-descent style Textile parser"
10
+ s.authors = ["Liam P. White"]
11
+ s.email = 'example@example.com'
12
+
13
+ s.files = `git ls-files`.split("\n")
14
+ s.require_paths = ["lib"]
15
+ end
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: textile
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.4.0
5
+ platform: ruby
6
+ authors:
7
+ - Liam P. White
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-11-14 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Recursive-descent style Textile parser
14
+ email: example@example.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - README
20
+ - lib/textile.rb
21
+ - lib/textile/nodes.rb
22
+ - lib/textile/parser.rb
23
+ - lib/textile/version.rb
24
+ - textile.gemspec
25
+ homepage:
26
+ licenses:
27
+ - MIT
28
+ metadata: {}
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 2.5.1
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: Recursive-descent style Textile parser
49
+ test_files: []