textile 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README +41 -0
- data/lib/textile.rb +7 -0
- data/lib/textile/nodes.rb +79 -0
- data/lib/textile/parser.rb +199 -0
- data/lib/textile/version.rb +3 -0
- data/textile.gemspec +15 -0
- metadata +49 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 29b8619628c3c467404af4c62c21abe0fe57315c
|
4
|
+
data.tar.gz: 58c3225030b2fc5182bb20540f738b40fd0a3ad2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2f0744c414b3e4f81e2c03555b5a87697b9ce41670e05cfbc1a561e88d2b55907a679e4105e22c74ac83eab556d3729302953c29d8330d3a79d768b4d9bb8730
|
7
|
+
data.tar.gz: cc332b9b6d3cb62f57e76217b9836fa6b35319a4fe52c93a98087818bacc3acbbca9ecfd934a4c8d43fe81fa7e3b050f31a299e1db6547ddfd0d7444ea648342
|
data/README
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
Recursive-descent Textile parser in Ruby
|
2
|
+
|
3
|
+
operand = operand1+ ?;
|
4
|
+
|
5
|
+
operand1 =
|
6
|
+
<text>
|
7
|
+
|
8
|
+
| '[==', operand, '==]'
|
9
|
+
| '[bq=", <author>, '"]', operand, '[/bq]'
|
10
|
+
| '[bq]', operand, '[/bq]'
|
11
|
+
| '[spoiler]', operand, '[/spoiler]'
|
12
|
+
| '==', operand, '=='
|
13
|
+
|
14
|
+
| link
|
15
|
+
| image
|
16
|
+
|
17
|
+
| '[**', operand, '**]' | '**', operand, '**'
|
18
|
+
| '[*', operand, '*]' | '*', operand, '*'
|
19
|
+
| '[__', operand, '__]' | '__', operand, '__'
|
20
|
+
| '[_', operand, '_]' | '_', operand, '_'
|
21
|
+
| '[@', operand, '@]' | '@', operand, '@'
|
22
|
+
| '[+', operand, '+]' | '+', operand, '+'
|
23
|
+
| '[^', operand, '^]' | '^', operand, '^'
|
24
|
+
| '[-', operand, '-]' | '-', operand, '-'
|
25
|
+
| '[~', operand, '~]' | '~', operand, '~'
|
26
|
+
| '[??', operand, '??]' | '??', operand, '??'
|
27
|
+
;
|
28
|
+
|
29
|
+
link =
|
30
|
+
'["', operand, '(', <text>, ')":', <url>, ']'
|
31
|
+
| '"', operand, '(', <text>, ')":', <url>
|
32
|
+
| '["', operand, '":', <url>, ']'
|
33
|
+
| '"', operand, '":', <url>;
|
34
|
+
|
35
|
+
image =
|
36
|
+
'[!', <url>, '(', <text>, ')!:', <url>, ']'
|
37
|
+
| '!', <url>, '(', <text>, ')!:', <url>
|
38
|
+
| '[!', <url>, '(', <text>, ')!]'
|
39
|
+
| '!', <url>, '(', <text>, ')!'
|
40
|
+
| '[!', <url>, '!]'
|
41
|
+
| '!', <url>, '!';
|
data/lib/textile.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'cgi'
|
3
|
+
|
4
|
+
class MultiNode
|
5
|
+
def initialize(nodes)
|
6
|
+
@nodes = nodes || []
|
7
|
+
end
|
8
|
+
|
9
|
+
def build
|
10
|
+
@nodes.map(&:build).join('')
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class TextNode
|
15
|
+
def initialize(text)
|
16
|
+
@text = text
|
17
|
+
end
|
18
|
+
|
19
|
+
# Hook for booru monkeypatch
|
20
|
+
def build
|
21
|
+
CGI.escapeHTML(@text).gsub("\n", '<br>')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class RawTextNode
|
26
|
+
def initialize(text)
|
27
|
+
@text = text
|
28
|
+
end
|
29
|
+
|
30
|
+
def build
|
31
|
+
CGI.escapeHTML(@text).gsub("\n", '<br>')
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class HTMLNode
|
36
|
+
def initialize(tag_name, inner, attributes = {})
|
37
|
+
@tag_name = tag_name
|
38
|
+
@inner = inner
|
39
|
+
@attributes = attributes || {}
|
40
|
+
end
|
41
|
+
|
42
|
+
def build
|
43
|
+
output = []
|
44
|
+
output << '<'
|
45
|
+
output << @tag_name
|
46
|
+
@attributes.each do |name, value|
|
47
|
+
output << ' '
|
48
|
+
output << name
|
49
|
+
output << '="'
|
50
|
+
output << CGI.escapeHTML(value)
|
51
|
+
output << '"'
|
52
|
+
end
|
53
|
+
output << '>'
|
54
|
+
output << @inner.build
|
55
|
+
output << '</'
|
56
|
+
output << @tag_name
|
57
|
+
output << '>'
|
58
|
+
output.join('')
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class ImageNode
|
63
|
+
def initialize(src)
|
64
|
+
@src = src
|
65
|
+
end
|
66
|
+
|
67
|
+
def build
|
68
|
+
output = []
|
69
|
+
output << '<img src="'
|
70
|
+
output << transform_src
|
71
|
+
output << '"/>'
|
72
|
+
output.join('')
|
73
|
+
end
|
74
|
+
|
75
|
+
# Hook for booru monkeypatch
|
76
|
+
def transform_src
|
77
|
+
CGI.escapeHTML(@src)
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'textile/nodes'
|
3
|
+
|
4
|
+
module TextileParser
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def parse(text)
|
8
|
+
ary = []
|
9
|
+
operand(ary, text) until text.empty?
|
10
|
+
MultiNode.new(ary)
|
11
|
+
end
|
12
|
+
|
13
|
+
def find_syms(text)
|
14
|
+
# Find possible symbol matches
|
15
|
+
syms = SYM_TO_INDEX.map { |sym, index| [sym, text.index(index)] }
|
16
|
+
.reject { |sym, index| index.nil? }
|
17
|
+
|
18
|
+
# Sort by starting position - closer is better
|
19
|
+
syms = syms.sort_by{ |x| x[1] }
|
20
|
+
|
21
|
+
# Get associated regexps and find first
|
22
|
+
matchdata = nil
|
23
|
+
match = syms.map { |sym, index| [sym, SYM_TO_REGEX[sym]] }
|
24
|
+
.detect { |sym, re| matchdata = re.match(text) }
|
25
|
+
|
26
|
+
# [sym, matchdata]
|
27
|
+
[match[0], matchdata] if match
|
28
|
+
end
|
29
|
+
|
30
|
+
def operand(ary, text)
|
31
|
+
sym, md = find_syms(text)
|
32
|
+
if sym.nil? || md.nil?
|
33
|
+
# No match, consume entire string.
|
34
|
+
return ary << TextNode.new(text.slice!(0 .. text.length))
|
35
|
+
end
|
36
|
+
|
37
|
+
# Consume string before match.
|
38
|
+
if md.pre_match.size > 0
|
39
|
+
ary << TextNode.new(text.slice!(0 ... md.pre_match.size))
|
40
|
+
end
|
41
|
+
|
42
|
+
# Act on match.
|
43
|
+
# FIXME: Separate logic for string consumption:
|
44
|
+
case sym
|
45
|
+
when :raw_bracket
|
46
|
+
balanced = balance_markup(text, md.to_s, '[==', '==]').match(SYM_TO_REGEX[:raw_bracket])[1]
|
47
|
+
ary << RawTextNode.new(balanced)
|
48
|
+
when :bq_author
|
49
|
+
balanced = balance_markup(text, md.to_s, BQ_LEFT, '[/bq]').match(SYM_TO_REGEX[:bq_author])[2]
|
50
|
+
ary << HTMLNode.new(:blockquote, parse(balanced), title: $1)
|
51
|
+
when :bq
|
52
|
+
balanced = balance_markup(text, md.to_s, BQ_LEFT, '[/bq]').match(SYM_TO_REGEX[:bq])[1]
|
53
|
+
ary << HTMLNode.new(:blockquote, parse(balanced))
|
54
|
+
when :spoiler
|
55
|
+
balanced = balance_markup(text, md.to_s, '[spoiler]', '[/spoiler]').match(SYM_TO_REGEX[:spoiler])[1]
|
56
|
+
ary << HTMLNode.new(:span, parse(balanced), class: 'spoiler')
|
57
|
+
else
|
58
|
+
text.slice!(0 .. md.to_s.size)
|
59
|
+
end
|
60
|
+
|
61
|
+
case sym
|
62
|
+
when :raw
|
63
|
+
ary << RawTextNode.new(md[1])
|
64
|
+
when :link_title_bracket, :link_title
|
65
|
+
ary << HTMLNode.new(:a, parse(md[1]), title: md[2], href: md[3])
|
66
|
+
when :link_bracket, :link
|
67
|
+
ary << HTMLNode.new(:a, parse(md[1]), href: md[2])
|
68
|
+
when :image_link_title_bracket, :image_link_title
|
69
|
+
ary << HTMLNode.new(:a, ImageNode.new(md[1]), title: md[2], href: md[3])
|
70
|
+
when :image_link_bracket, :image_link
|
71
|
+
ary << HTMLNode.new(:a, ImageNode.new(md[1]), href: md[2])
|
72
|
+
when :image_title_bracket, :image_title
|
73
|
+
ary << HTMLNode.new(:span, ImageNode.new(md[1]), title: md[2])
|
74
|
+
when :image_bracket, :image
|
75
|
+
ary << ImageNode.new(md[1])
|
76
|
+
when :dblbold_bracket, :dblbold
|
77
|
+
ary << HTMLNode.new(:b, parse(md[1]))
|
78
|
+
when :bold_bracket, :bold
|
79
|
+
ary << HTMLNode.new(:strong, parse(md[1]))
|
80
|
+
when :dblitalic_bracket, :dblitalic
|
81
|
+
ary << HTMLNode.new(:i, parse(md[1]))
|
82
|
+
when :italic_bracket, :italic
|
83
|
+
ary << HTMLNode.new(:em, parse(md[1]))
|
84
|
+
when :code_bracket, :code
|
85
|
+
ary << HTMLNode.new(:code, parse(md[1]))
|
86
|
+
when :ins_bracket, :ins
|
87
|
+
ary << HTMLNode.new(:ins, parse(md[1]))
|
88
|
+
when :sup_bracket, :sup
|
89
|
+
ary << HTMLNode.new(:sup, parse(md[1]))
|
90
|
+
when :del_bracket, :del
|
91
|
+
ary << HTMLNode.new(:del, parse(md[1]))
|
92
|
+
when :sub_bracket, :sub
|
93
|
+
ary << HTMLNode.new(:sub, parse(md[1]))
|
94
|
+
when :cite_bracket, :cite
|
95
|
+
ary << HTMLNode.new(:cite, parse(md[1]))
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
# Find the longest substring that contains balanced markup,
|
102
|
+
# or the whole string if this is impossible.
|
103
|
+
def balance_markup(text, matched, left, right)
|
104
|
+
both = Regexp.union(left, right)
|
105
|
+
left = Regexp.union(left)
|
106
|
+
right = Regexp.union(right)
|
107
|
+
|
108
|
+
s = StringScanner.new(matched)
|
109
|
+
n, lowest_pos = 0, 0
|
110
|
+
i = loop do
|
111
|
+
match = s.scan(both)
|
112
|
+
case
|
113
|
+
when match =~ left
|
114
|
+
n += 1
|
115
|
+
when match =~ right
|
116
|
+
n -= 1
|
117
|
+
lowest_pos = s.pos
|
118
|
+
else
|
119
|
+
m = s.scan_until(both)
|
120
|
+
s.pos = s.pos - s.matched.size if m
|
121
|
+
s.terminate if m.nil?
|
122
|
+
end
|
123
|
+
|
124
|
+
break lowest_pos.pred if n.zero? || s.eos?
|
125
|
+
end
|
126
|
+
|
127
|
+
text.slice!(0 .. i)
|
128
|
+
matched[0 .. i]
|
129
|
+
end
|
130
|
+
|
131
|
+
# Properly nesting operator pairs:
|
132
|
+
# [bq][/bq] [bq="author"][/bq]
|
133
|
+
# [spoiler][/spoiler]
|
134
|
+
# [== ==]
|
135
|
+
|
136
|
+
# Non-nesting operator pairs:
|
137
|
+
# == " ! ** * __ _ @ + ^ - ~ ??
|
138
|
+
|
139
|
+
# Ruby \s does not match extra unicode space characters.
|
140
|
+
RX_SPACE_CHARS = ' \t\u00a0\u1680\u180E\u2000-\u200A\u202F\u205F\u3000'
|
141
|
+
|
142
|
+
RX_URL = %r{
|
143
|
+
(?:http:\/\/|https:\/\/|\/\/|\/|\#) # protocol
|
144
|
+
(?:[^%#{RX_SPACE_CHARS}"!\n\r]|%[0-9a-fA-F]{2})+ # path
|
145
|
+
[^#{RX_SPACE_CHARS}`~!@$^&"\n\r\*_+\-=\[\]\\|;:,.'?\#)] # invalid
|
146
|
+
}x
|
147
|
+
|
148
|
+
BQ_LEFT = /\[bq="([^"]*)"\]|\[bq\]/
|
149
|
+
|
150
|
+
# Symbol table, in operator precedence order:
|
151
|
+
# 0. Symbol name.
|
152
|
+
# 1. Start string for optimized matching.
|
153
|
+
# 2. Complete match definition.
|
154
|
+
SYMS = [
|
155
|
+
[:raw_bracket, '[==', /\[==(.*)==\]/],
|
156
|
+
[:bq_author, '[bq="', /\[bq="([^"]*)"\](.*)\[\/bq\]/],
|
157
|
+
[:bq, '[bq]', /\[bq\](.*)\[\/bq\]/],
|
158
|
+
[:spoiler, '[spoiler]', /\[spoiler\](.*)\[\/spoiler\]/],
|
159
|
+
[:raw, '==', /==(.*)==/],
|
160
|
+
|
161
|
+
[:link_title_bracket, '["', /\A\["([^"]*)\(([^\)]*)\)":(#{RX_URL})\]/],
|
162
|
+
[:link_title, '"', /"([^"]*)\(([^\)]*)\)":(#{RX_URL})/],
|
163
|
+
[:link_bracket, '["', /\["([^"]*)":(#{RX_URL})\]/],
|
164
|
+
[:link, '"', /"([^"]*)":(#{RX_URL})/],
|
165
|
+
|
166
|
+
[:image_link_title_bracket, '[!', /\[!(#{RX_URL})\(([^\)]*)\)!:(#{RX_URL})\]/],
|
167
|
+
[:image_link_title, '!', /!(#{RX_URL})\(([^\)]*)\)!:(#{RX_URL})/],
|
168
|
+
[:image_link_bracket, '[!', /\[!(#{RX_URL})!:(#{RX_URL})\]/],
|
169
|
+
[:image_link, '!', /!(#{RX_URL})!:(#{RX_URL})/],
|
170
|
+
[:image_title_bracket, '[!', /\[!(#{RX_URL})\(([^\)]*)\)!\]/],
|
171
|
+
[:image_title, '!', /!(#{RX_URL})\(([^\)]*)\)!/],
|
172
|
+
[:image_bracket, '[!', /\[!(#{RX_URL})!\]/],
|
173
|
+
[:image, '!', /!(#{RX_URL})!/],
|
174
|
+
|
175
|
+
[:dblbold_bracket, '[**', /\[\*\*((?:.|\n.|\n(?=\*\*\]))+?)\*\*\]/],
|
176
|
+
[:dblbold, '**', /\*\*((?:.|\n.|\n(?=\*\*))+?)\*\*/],
|
177
|
+
[:bold_bracket, '[*', /\[\*((?:.|\n.|\n(?=\*\]))+?)\*\]/],
|
178
|
+
[:bold, '*', /\*((?:.|\n.|\n(?=\*\]))+?)\*/],
|
179
|
+
[:dblitalic_bracket, '[__', /\[__((?:.|\n.|\n(?=__\]))+?)__\]/],
|
180
|
+
[:dblitalic, '__', /__((?:.|\n.|\n(?=__))+?)__/],
|
181
|
+
[:italic_bracket, '[_', /\[_((?:.|\n.|\n(?=_\]))+?)_\]/],
|
182
|
+
[:italic, '_', /_((?:.|\n.|\n(?=_))+?)_/],
|
183
|
+
[:code_bracket, '[@', /\[@((?:.|\n.|\n(?=@\]))+?)@\]/],
|
184
|
+
[:code, '@', /@((?:.|\n.|\n(?=@))+?)@/],
|
185
|
+
[:ins_bracket, '[+', /\[\+((?:.|\n.|\n(?=\+\]))+?)\+\]/],
|
186
|
+
[:ins, '+', /\+((?:.|\n.|\n(?=\+))+?)\+/],
|
187
|
+
[:sup_bracket, '[^', /\[\^((?:.|\n.|\n(?=\^\]))+?)\^\]/],
|
188
|
+
[:sup, '^', /\^((?:.|\n.|\n(?=\^))+?)\^/],
|
189
|
+
[:del_bracket, '[-', /\[\-((?:.|\n.|\n(?=\-\]))+?)\-\]/],
|
190
|
+
[:del, '-', /\-((?:.|\n.|\n(?=\-))+?)\-/],
|
191
|
+
[:sub_bracket, '[~', /\[\~((?:.|\n.|\n(?=\~\]))+?)\~\]/],
|
192
|
+
[:sub, '~', /\~((?:.|\n.|\n(?=\~))+?)\~/],
|
193
|
+
[:cite_bracket, '[??', /\[\?\?((?:.|\n.|\n(?=\?\?\]))+?)\?\?\]/],
|
194
|
+
[:cite, '??', /\?\?((?:.|\n.|\n(?=\?\?))+?)\?\?/],
|
195
|
+
]
|
196
|
+
|
197
|
+
SYM_TO_INDEX = Hash[SYMS.map { |name, index, re| [name, index] }]
|
198
|
+
SYM_TO_REGEX = Hash[SYMS.map { |name, index, re| [name, re] }]
|
199
|
+
end
|
data/textile.gemspec
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require 'textile/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'textile'
|
6
|
+
s.version = Textile::VERSION.dup
|
7
|
+
s.license = "MIT"
|
8
|
+
s.summary = "Recursive-descent style Textile parser"
|
9
|
+
s.description = "Recursive-descent style Textile parser"
|
10
|
+
s.authors = ["Liam P. White"]
|
11
|
+
s.email = 'example@example.com'
|
12
|
+
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.require_paths = ["lib"]
|
15
|
+
end
|
metadata
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: textile
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Liam P. White
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-11-14 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Recursive-descent style Textile parser
|
14
|
+
email: example@example.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- README
|
20
|
+
- lib/textile.rb
|
21
|
+
- lib/textile/nodes.rb
|
22
|
+
- lib/textile/parser.rb
|
23
|
+
- lib/textile/version.rb
|
24
|
+
- textile.gemspec
|
25
|
+
homepage:
|
26
|
+
licenses:
|
27
|
+
- MIT
|
28
|
+
metadata: {}
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 2.5.1
|
46
|
+
signing_key:
|
47
|
+
specification_version: 4
|
48
|
+
summary: Recursive-descent style Textile parser
|
49
|
+
test_files: []
|