textile 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README +41 -0
- data/lib/textile.rb +7 -0
- data/lib/textile/nodes.rb +79 -0
- data/lib/textile/parser.rb +199 -0
- data/lib/textile/version.rb +3 -0
- data/textile.gemspec +15 -0
- metadata +49 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 29b8619628c3c467404af4c62c21abe0fe57315c
|
4
|
+
data.tar.gz: 58c3225030b2fc5182bb20540f738b40fd0a3ad2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2f0744c414b3e4f81e2c03555b5a87697b9ce41670e05cfbc1a561e88d2b55907a679e4105e22c74ac83eab556d3729302953c29d8330d3a79d768b4d9bb8730
|
7
|
+
data.tar.gz: cc332b9b6d3cb62f57e76217b9836fa6b35319a4fe52c93a98087818bacc3acbbca9ecfd934a4c8d43fe81fa7e3b050f31a299e1db6547ddfd0d7444ea648342
|
data/README
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
Recursive-descent Textile parser in Ruby
|
2
|
+
|
3
|
+
operand = operand1+ ?;
|
4
|
+
|
5
|
+
operand1 =
|
6
|
+
<text>
|
7
|
+
|
8
|
+
| '[==', operand, '==]'
|
9
|
+
| '[bq=", <author>, '"]', operand, '[/bq]'
|
10
|
+
| '[bq]', operand, '[/bq]'
|
11
|
+
| '[spoiler]', operand, '[/spoiler]'
|
12
|
+
| '==', operand, '=='
|
13
|
+
|
14
|
+
| link
|
15
|
+
| image
|
16
|
+
|
17
|
+
| '[**', operand, '**]' | '**', operand, '**'
|
18
|
+
| '[*', operand, '*]' | '*', operand, '*'
|
19
|
+
| '[__', operand, '__]' | '__', operand, '__'
|
20
|
+
| '[_', operand, '_]' | '_', operand, '_'
|
21
|
+
| '[@', operand, '@]' | '@', operand, '@'
|
22
|
+
| '[+', operand, '+]' | '+', operand, '+'
|
23
|
+
| '[^', operand, '^]' | '^', operand, '^'
|
24
|
+
| '[-', operand, '-]' | '-', operand, '-'
|
25
|
+
| '[~', operand, '~]' | '~', operand, '~'
|
26
|
+
| '[??', operand, '??]' | '??', operand, '??'
|
27
|
+
;
|
28
|
+
|
29
|
+
link =
|
30
|
+
'["', operand, '(', <text>, ')":', <url>, ']'
|
31
|
+
| '"', operand, '(', <text>, ')":', <url>
|
32
|
+
| '["', operand, '":', <url>, ']'
|
33
|
+
| '"', operand, '":', <url>;
|
34
|
+
|
35
|
+
image =
|
36
|
+
'[!', <url>, '(', <text>, ')!:', <url>, ']'
|
37
|
+
| '!', <url>, '(', <text>, ')!:', <url>
|
38
|
+
| '[!', <url>, '(', <text>, ')!]'
|
39
|
+
| '!', <url>, '(', <text>, ')!'
|
40
|
+
| '[!', <url>, '!]'
|
41
|
+
| '!', <url>, '!';
|
data/lib/textile.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
class MultiNode
|
5
|
+
def initialize(nodes)
|
6
|
+
@nodes = nodes || []
|
7
|
+
end
|
8
|
+
|
9
|
+
def build
|
10
|
+
@nodes.map(&:build).join('')
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class TextNode
|
15
|
+
def initialize(text)
|
16
|
+
@text = text
|
17
|
+
end
|
18
|
+
|
19
|
+
# Hook for booru monkeypatch
|
20
|
+
def build
|
21
|
+
CGI.escapeHTML(@text).gsub("\n", '<br>')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class RawTextNode
|
26
|
+
def initialize(text)
|
27
|
+
@text = text
|
28
|
+
end
|
29
|
+
|
30
|
+
def build
|
31
|
+
CGI.escapeHTML(@text).gsub("\n", '<br>')
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class HTMLNode
|
36
|
+
def initialize(tag_name, inner, attributes = {})
|
37
|
+
@tag_name = tag_name
|
38
|
+
@inner = inner
|
39
|
+
@attributes = attributes || {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def build
|
43
|
+
output = []
|
44
|
+
output << '<'
|
45
|
+
output << @tag_name
|
46
|
+
@attributes.each do |name, value|
|
47
|
+
output << ' '
|
48
|
+
output << name
|
49
|
+
output << '="'
|
50
|
+
output << CGI.escapeHTML(value)
|
51
|
+
output << '"'
|
52
|
+
end
|
53
|
+
output << '>'
|
54
|
+
output << @inner.build
|
55
|
+
output << '</'
|
56
|
+
output << @tag_name
|
57
|
+
output << '>'
|
58
|
+
output.join('')
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class ImageNode
|
63
|
+
def initialize(src)
|
64
|
+
@src = src
|
65
|
+
end
|
66
|
+
|
67
|
+
def build
|
68
|
+
output = []
|
69
|
+
output << '<img src="'
|
70
|
+
output << transform_src
|
71
|
+
output << '"/>'
|
72
|
+
output.join('')
|
73
|
+
end
|
74
|
+
|
75
|
+
# Hook for booru monkeypatch
|
76
|
+
def transform_src
|
77
|
+
CGI.escapeHTML(@src)
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'textile/nodes'
|
3
|
+
|
4
|
+
module TextileParser
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def parse(text)
|
8
|
+
ary = []
|
9
|
+
operand(ary, text) until text.empty?
|
10
|
+
MultiNode.new(ary)
|
11
|
+
end
|
12
|
+
|
13
|
+
def find_syms(text)
|
14
|
+
# Find possible symbol matches
|
15
|
+
syms = SYM_TO_INDEX.map { |sym, index| [sym, text.index(index)] }
|
16
|
+
.reject { |sym, index| index.nil? }
|
17
|
+
|
18
|
+
# Sort by starting position - closer is better
|
19
|
+
syms = syms.sort_by{ |x| x[1] }
|
20
|
+
|
21
|
+
# Get associated regexps and find first
|
22
|
+
matchdata = nil
|
23
|
+
match = syms.map { |sym, index| [sym, SYM_TO_REGEX[sym]] }
|
24
|
+
.detect { |sym, re| matchdata = re.match(text) }
|
25
|
+
|
26
|
+
# [sym, matchdata]
|
27
|
+
[match[0], matchdata] if match
|
28
|
+
end
|
29
|
+
|
30
|
+
def operand(ary, text)
|
31
|
+
sym, md = find_syms(text)
|
32
|
+
if sym.nil? || md.nil?
|
33
|
+
# No match, consume entire string.
|
34
|
+
return ary << TextNode.new(text.slice!(0 .. text.length))
|
35
|
+
end
|
36
|
+
|
37
|
+
# Consume string before match.
|
38
|
+
if md.pre_match.size > 0
|
39
|
+
ary << TextNode.new(text.slice!(0 ... md.pre_match.size))
|
40
|
+
end
|
41
|
+
|
42
|
+
# Act on match.
|
43
|
+
# FIXME: Separate logic for string consumption:
|
44
|
+
case sym
|
45
|
+
when :raw_bracket
|
46
|
+
balanced = balance_markup(text, md.to_s, '[==', '==]').match(SYM_TO_REGEX[:raw_bracket])[1]
|
47
|
+
ary << RawTextNode.new(balanced)
|
48
|
+
when :bq_author
|
49
|
+
balanced = balance_markup(text, md.to_s, BQ_LEFT, '[/bq]').match(SYM_TO_REGEX[:bq_author])[2]
|
50
|
+
ary << HTMLNode.new(:blockquote, parse(balanced), title: $1)
|
51
|
+
when :bq
|
52
|
+
balanced = balance_markup(text, md.to_s, BQ_LEFT, '[/bq]').match(SYM_TO_REGEX[:bq])[1]
|
53
|
+
ary << HTMLNode.new(:blockquote, parse(balanced))
|
54
|
+
when :spoiler
|
55
|
+
balanced = balance_markup(text, md.to_s, '[spoiler]', '[/spoiler]').match(SYM_TO_REGEX[:spoiler])[1]
|
56
|
+
ary << HTMLNode.new(:span, parse(balanced), class: 'spoiler')
|
57
|
+
else
|
58
|
+
text.slice!(0 .. md.to_s.size)
|
59
|
+
end
|
60
|
+
|
61
|
+
case sym
|
62
|
+
when :raw
|
63
|
+
ary << RawTextNode.new(md[1])
|
64
|
+
when :link_title_bracket, :link_title
|
65
|
+
ary << HTMLNode.new(:a, parse(md[1]), title: md[2], href: md[3])
|
66
|
+
when :link_bracket, :link
|
67
|
+
ary << HTMLNode.new(:a, parse(md[1]), href: md[2])
|
68
|
+
when :image_link_title_bracket, :image_link_title
|
69
|
+
ary << HTMLNode.new(:a, ImageNode.new(md[1]), title: md[2], href: md[3])
|
70
|
+
when :image_link_bracket, :image_link
|
71
|
+
ary << HTMLNode.new(:a, ImageNode.new(md[1]), href: md[2])
|
72
|
+
when :image_title_bracket, :image_title
|
73
|
+
ary << HTMLNode.new(:span, ImageNode.new(md[1]), title: md[2])
|
74
|
+
when :image_bracket, :image
|
75
|
+
ary << ImageNode.new(md[1])
|
76
|
+
when :dblbold_bracket, :dblbold
|
77
|
+
ary << HTMLNode.new(:b, parse(md[1]))
|
78
|
+
when :bold_bracket, :bold
|
79
|
+
ary << HTMLNode.new(:strong, parse(md[1]))
|
80
|
+
when :dblitalic_bracket, :dblitalic
|
81
|
+
ary << HTMLNode.new(:i, parse(md[1]))
|
82
|
+
when :italic_bracket, :italic
|
83
|
+
ary << HTMLNode.new(:em, parse(md[1]))
|
84
|
+
when :code_bracket, :code
|
85
|
+
ary << HTMLNode.new(:code, parse(md[1]))
|
86
|
+
when :ins_bracket, :ins
|
87
|
+
ary << HTMLNode.new(:ins, parse(md[1]))
|
88
|
+
when :sup_bracket, :sup
|
89
|
+
ary << HTMLNode.new(:sup, parse(md[1]))
|
90
|
+
when :del_bracket, :del
|
91
|
+
ary << HTMLNode.new(:del, parse(md[1]))
|
92
|
+
when :sub_bracket, :sub
|
93
|
+
ary << HTMLNode.new(:sub, parse(md[1]))
|
94
|
+
when :cite_bracket, :cite
|
95
|
+
ary << HTMLNode.new(:cite, parse(md[1]))
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
# Find the longest substring that contains balanced markup,
|
102
|
+
# or the whole string if this is impossible.
|
103
|
+
def balance_markup(text, matched, left, right)
|
104
|
+
both = Regexp.union(left, right)
|
105
|
+
left = Regexp.union(left)
|
106
|
+
right = Regexp.union(right)
|
107
|
+
|
108
|
+
s = StringScanner.new(matched)
|
109
|
+
n, lowest_pos = 0, 0
|
110
|
+
i = loop do
|
111
|
+
match = s.scan(both)
|
112
|
+
case
|
113
|
+
when match =~ left
|
114
|
+
n += 1
|
115
|
+
when match =~ right
|
116
|
+
n -= 1
|
117
|
+
lowest_pos = s.pos
|
118
|
+
else
|
119
|
+
m = s.scan_until(both)
|
120
|
+
s.pos = s.pos - s.matched.size if m
|
121
|
+
s.terminate if m.nil?
|
122
|
+
end
|
123
|
+
|
124
|
+
break lowest_pos.pred if n.zero? || s.eos?
|
125
|
+
end
|
126
|
+
|
127
|
+
text.slice!(0 .. i)
|
128
|
+
matched[0 .. i]
|
129
|
+
end
|
130
|
+
|
131
|
+
# Properly nesting operator pairs:
|
132
|
+
# [bq][/bq] [bq="author"][/bq]
|
133
|
+
# [spoiler][/spoiler]
|
134
|
+
# [== ==]
|
135
|
+
|
136
|
+
# Non-nesting operator pairs:
|
137
|
+
# == " ! ** * __ _ @ + ^ - ~ ??
|
138
|
+
|
139
|
+
# Ruby \s does not match extra unicode space characters.
|
140
|
+
RX_SPACE_CHARS = ' \t\u00a0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000'
|
141
|
+
|
142
|
+
RX_URL = %r{
|
143
|
+
(?:http:\/\/|https:\/\/|\/\/|\/|\#) # protocol
|
144
|
+
(?:[^%#{RX_SPACE_CHARS}"!\n\r]|%[0-9a-fA-F]{2})+ # path
|
145
|
+
[^#{RX_SPACE_CHARS}`~!@$^&"\n\r\*_+\-=\[\]\\|;:,.'?\#)] # invalid
|
146
|
+
}x
|
147
|
+
|
148
|
+
BQ_LEFT = /\[bq="([^"]*)"\]|\[bq\]/
|
149
|
+
|
150
|
+
# Symbol table, in operator precedence order:
|
151
|
+
# 0. Symbol name.
|
152
|
+
# 1. Start string for optimized matching.
|
153
|
+
# 2. Complete match definition.
|
154
|
+
SYMS = [
|
155
|
+
[:raw_bracket, '[==', /\[==(.*)==\]/],
|
156
|
+
[:bq_author, '[bq="', /\[bq="([^"]*)"\](.*)\[\/bq\]/],
|
157
|
+
[:bq, '[bq]', /\[bq\](.*)\[\/bq\]/],
|
158
|
+
[:spoiler, '[spoiler]', /\[spoiler\](.*)\[\/spoiler\]/],
|
159
|
+
[:raw, '==', /==(.*)==/],
|
160
|
+
|
161
|
+
[:link_title_bracket, '["', /\A\["([^"]*)\(([^\)]*)\)":(#{RX_URL})\]/],
|
162
|
+
[:link_title, '"', /"([^"]*)\(([^\)]*)\)":(#{RX_URL})/],
|
163
|
+
[:link_bracket, '["', /\["([^"]*)":(#{RX_URL})\]/],
|
164
|
+
[:link, '"', /"([^"]*)":(#{RX_URL})/],
|
165
|
+
|
166
|
+
[:image_link_title_bracket, '[!', /\[!(#{RX_URL})\(([^\)]*)\)!:(#{RX_URL})\]/],
|
167
|
+
[:image_link_title, '!', /!(#{RX_URL})\(([^\)]*)\)!:(#{RX_URL})/],
|
168
|
+
[:image_link_bracket, '[!', /\[!(#{RX_URL})!:(#{RX_URL})\]/],
|
169
|
+
[:image_link, '!', /!(#{RX_URL})!:(#{RX_URL})/],
|
170
|
+
[:image_title_bracket, '[!', /\[!(#{RX_URL})\(([^\)]*)\)!\]/],
|
171
|
+
[:image_title, '!', /!(#{RX_URL})\(([^\)]*)\)!/],
|
172
|
+
[:image_bracket, '[!', /\[!(#{RX_URL})!\]/],
|
173
|
+
[:image, '!', /!(#{RX_URL})!/],
|
174
|
+
|
175
|
+
[:dblbold_bracket, '[**', /\[\*\*((?:.|\n.|\n(?=\*\*\]))+?)\*\*\]/],
|
176
|
+
[:dblbold, '**', /\*\*((?:.|\n.|\n(?=\*\*))+?)\*\*/],
|
177
|
+
[:bold_bracket, '[*', /\[\*((?:.|\n.|\n(?=\*\]))+?)\*\]/],
|
178
|
+
[:bold, '*', /\*((?:.|\n.|\n(?=\*\]))+?)\*/],
|
179
|
+
[:dblitalic_bracket, '[__', /\[__((?:.|\n.|\n(?=__\]))+?)__\]/],
|
180
|
+
[:dblitalic, '__', /__((?:.|\n.|\n(?=__))+?)__/],
|
181
|
+
[:italic_bracket, '[_', /\[_((?:.|\n.|\n(?=_\]))+?)_\]/],
|
182
|
+
[:italic, '_', /_((?:.|\n.|\n(?=_))+?)_/],
|
183
|
+
[:code_bracket, '[@', /\[@((?:.|\n.|\n(?=@\]))+?)@\]/],
|
184
|
+
[:code, '@', /@((?:.|\n.|\n(?=@))+?)@/],
|
185
|
+
[:ins_bracket, '[+', /\[\+((?:.|\n.|\n(?=\+\]))+?)\+\]/],
|
186
|
+
[:ins, '+', /\+((?:.|\n.|\n(?=\+))+?)\+/],
|
187
|
+
[:sup_bracket, '[^', /\[\^((?:.|\n.|\n(?=\^\]))+?)\^\]/],
|
188
|
+
[:sup, '^', /\^((?:.|\n.|\n(?=\^))+?)\^/],
|
189
|
+
[:del_bracket, '[-', /\[\-((?:.|\n.|\n(?=\-\]))+?)\-\]/],
|
190
|
+
[:del, '-', /\-((?:.|\n.|\n(?=\-))+?)\-/],
|
191
|
+
[:sub_bracket, '[~', /\[\~((?:.|\n.|\n(?=\~\]))+?)\~\]/],
|
192
|
+
[:sub, '~', /\~((?:.|\n.|\n(?=\~))+?)\~/],
|
193
|
+
[:cite_bracket, '[??', /\[\?\?((?:.|\n.|\n(?=\?\?\]))+?)\?\?\]/],
|
194
|
+
[:cite, '??', /\?\?((?:.|\n.|\n(?=\?\?))+?)\?\?/],
|
195
|
+
]
|
196
|
+
|
197
|
+
SYM_TO_INDEX = Hash[SYMS.map { |name, index, re| [name, index] }]
|
198
|
+
SYM_TO_REGEX = Hash[SYMS.map { |name, index, re| [name, re] }]
|
199
|
+
end
|
data/textile.gemspec
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require 'textile/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'textile'
|
6
|
+
s.version = Textile::VERSION.dup
|
7
|
+
s.license = "MIT"
|
8
|
+
s.summary = "Recursive-descent style Textile parser"
|
9
|
+
s.description = "Recursive-descent style Textile parser"
|
10
|
+
s.authors = ["Liam P. White"]
|
11
|
+
s.email = 'example@example.com'
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.require_paths = ["lib"]
|
15
|
+
end
|
metadata
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: textile
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Liam P. White
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-11-14 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Recursive-descent style Textile parser
|
14
|
+
email: example@example.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- README
|
20
|
+
- lib/textile.rb
|
21
|
+
- lib/textile/nodes.rb
|
22
|
+
- lib/textile/parser.rb
|
23
|
+
- lib/textile/version.rb
|
24
|
+
- textile.gemspec
|
25
|
+
homepage:
|
26
|
+
licenses:
|
27
|
+
- MIT
|
28
|
+
metadata: {}
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 2.5.1
|
46
|
+
signing_key:
|
47
|
+
specification_version: 4
|
48
|
+
summary: Recursive-descent style Textile parser
|
49
|
+
test_files: []
|