linkify-it-rb 1.2.0 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -14
- data/lib/linkify-it-rb.rb +7 -7
- data/lib/linkify-it-rb/index.rb +46 -23
- data/lib/linkify-it-rb/re.rb +113 -58
- data/lib/linkify-it-rb/version.rb +1 -1
- data/spec/linkify-it-rb/test_spec.rb +19 -0
- data/spec/spec_helper.rb +1 -1
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 176768c4d108c26b13260d1e62c1cb30cd0491f5
|
4
|
+
data.tar.gz: a47bd97acdf25582ce67d65551a1d94678d56aa3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cbcc4a1e42a24df990484e927b1049a81ff87a8bd0a53f43d32f9d0687ec2bcfb2ec81778e1fbd4a33e11c9e41796e4787d84b8f3f55461ed853dfe4b40c05e
|
7
|
+
data.tar.gz: 5b25b74826e53b4257fdc2b4ea1b0422659d44c9ec69e877ee37b396d9597654db71811d2593193552785cdb8a71258485d4a57a6447b2036cbf324386d0380f
|
data/README.md
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
# linkify-it-rb
|
2
2
|
|
3
3
|
[](http://badge.fury.io/rb/linkify-it-rb)
|
4
|
-
|
5
|
-
Links recognition library with full unicode support. Focused on high quality link pattern detection in plain text. For use with both Ruby and RubyMotion.
|
4
|
+
[](https://travis-ci.org/digitalmoksha/linkify-it-rb)
|
6
5
|
|
7
6
|
This gem is a port of the [linkify-it javascript package](https://github.com/markdown-it/linkify-it) by Vitaly Puzrin, that is used for the [markdown-it](https://github.com/markdown-it/markdown-it) package.
|
8
7
|
|
8
|
+
_Currently synced with linkify-it 2.0.3_
|
9
|
+
|
10
|
+
---
|
11
|
+
|
12
|
+
Links recognition library with full unicode support. Focused on high quality link pattern detection in plain text. For use with both Ruby and RubyMotion.
|
13
|
+
|
9
14
|
__[Javascript Demo](http://markdown-it.github.io/linkify-it/)__
|
10
15
|
|
11
16
|
Features:
|
@@ -46,8 +51,8 @@ Usage examples
|
|
46
51
|
```ruby
|
47
52
|
linkify = Linkify.new
|
48
53
|
|
49
|
-
# add
|
50
|
-
linkify.tlds('
|
54
|
+
# Reload full tlds list & add unofficial `.onion` domain.
|
55
|
+
linkify.tlds('onion', true) # Add unofficial `.onion` domain
|
51
56
|
linkify.add('git:', 'http:') # Add `git:` ptotocol as "alias"
|
52
57
|
linkify.add('ftp:', null) # Disable `ftp:` ptotocol
|
53
58
|
linkify.set({fuzzyIP: true}) # Enable IPs in fuzzy links (without schema)
|
@@ -59,7 +64,7 @@ linkify.match('Site github.com!'))
|
|
59
64
|
=> [#<Linkify::Match @schema="", @index=5, @lastIndex=15, @raw="github.com", @text="github.com", @url="github.com">]
|
60
65
|
```
|
61
66
|
|
62
|
-
#####
|
67
|
+
##### Example 2. Add twitter mentions handler
|
63
68
|
|
64
69
|
```ruby
|
65
70
|
linkify.add('@', {
|
@@ -96,7 +101,7 @@ By default understands:
|
|
96
101
|
`schemas` is a Hash, where each key/value describes protocol/rule:
|
97
102
|
|
98
103
|
- __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
|
99
|
-
for example). `linkify-it-rb` makes
|
104
|
+
for example). `linkify-it-rb` makes sure that prefix is not preceded with
|
100
105
|
alphanumeric char.
|
101
106
|
- __value__ - rule to check tail after link prefix
|
102
107
|
- _String_ - just alias to existing rule
|
@@ -108,10 +113,11 @@ By default understands:
|
|
108
113
|
|
109
114
|
`options`:
|
110
115
|
|
111
|
-
- __fuzzyLink__ -
|
116
|
+
- __fuzzyLink__ - recognize URL-s without `http(s)://` head. Default `true`.
|
112
117
|
- __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
|
113
118
|
like version numbers. Default `false`.
|
114
|
-
- __fuzzyEmail__ - recognize emails without `mailto:` prefix.
|
119
|
+
- __fuzzyEmail__ - recognize emails without `mailto:` prefix. Default `true`.
|
120
|
+
- __---__ - set `true` to terminate link with `---` (if it's considered as long dash).
|
115
121
|
|
116
122
|
|
117
123
|
### .test(text)
|
@@ -149,16 +155,14 @@ Each match has:
|
|
149
155
|
|
150
156
|
### .tlds(list[, keepOld])
|
151
157
|
|
152
|
-
Load (or merge) new tlds list. These are
|
158
|
+
Load (or merge) new tlds list. These are needed for fuzzy links (without schema)
|
153
159
|
to avoid false positives. By default this algorithm uses:
|
154
160
|
|
155
|
-
-
|
156
|
-
- biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
|
157
|
-
are ok.
|
161
|
+
- 2-letter root zones are ok.
|
162
|
+
- biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф are ok.
|
158
163
|
- encoded (`xn--...`) root zones are ok.
|
159
164
|
|
160
|
-
If
|
161
|
-
|
165
|
+
If that's not enougth, you can reload defaults with more detailed zones list.
|
162
166
|
|
163
167
|
### .add(schema, definition)
|
164
168
|
|
data/lib/linkify-it-rb.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
if defined?(Motion::Project::Config)
|
4
|
-
|
2
|
+
|
5
3
|
lib_dir_path = File.dirname(File.expand_path(__FILE__))
|
6
4
|
Motion::Project::App.setup do |app|
|
7
|
-
app.files.unshift(Dir.glob(File.join(lib_dir_path,
|
5
|
+
app.files.unshift(Dir.glob(File.join(lib_dir_path, 'linkify-it-rb/**/*.rb')))
|
6
|
+
|
7
|
+
app.files_dependencies File.join(lib_dir_path, 'linkify-it-rb/index.rb') => File.join(lib_dir_path, 'linkify-it-rb/re.rb')
|
8
8
|
end
|
9
|
-
|
9
|
+
|
10
10
|
require 'uc.micro-rb'
|
11
11
|
|
12
12
|
else
|
13
|
-
|
13
|
+
|
14
14
|
require 'uc.micro-rb'
|
15
15
|
require 'linkify-it-rb/re'
|
16
16
|
require 'linkify-it-rb/index'
|
17
|
-
|
17
|
+
|
18
18
|
end
|
data/lib/linkify-it-rb/index.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
class Linkify
|
2
2
|
include ::LinkifyRe
|
3
|
-
|
3
|
+
|
4
4
|
attr_accessor :__index__, :__last_index__, :__text_cache__, :__schema__, :__compiled__
|
5
5
|
attr_accessor :re, :bypass_normalizer
|
6
|
-
|
6
|
+
|
7
7
|
# RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)
|
8
8
|
TLDS_2CH_SRC_RE = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]'
|
9
9
|
|
10
10
|
# DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead
|
11
11
|
TLDS_DEFAULT = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|')
|
12
|
-
|
12
|
+
|
13
13
|
DEFAULT_OPTIONS = {
|
14
14
|
fuzzyLink: true,
|
15
15
|
fuzzyEmail: true,
|
@@ -23,7 +23,7 @@ class Linkify
|
|
23
23
|
|
24
24
|
if (!obj.re[:http])
|
25
25
|
# compile lazily, because "host"-containing variables can change on tlds update.
|
26
|
-
obj.re[:http] = Regexp.new('^\\/\\/' +
|
26
|
+
obj.re[:http] = Regexp.new('^\\/\\/' + obj.re[:src_auth] + obj.re[:src_host_port_strict] + obj.re[:src_path], 'i')
|
27
27
|
end
|
28
28
|
if obj.re[:http] =~ tail
|
29
29
|
return tail.match(obj.re[:http])[0].length
|
@@ -38,13 +38,24 @@ class Linkify
|
|
38
38
|
tail = text.slice(pos..-1)
|
39
39
|
|
40
40
|
if (!obj.re[:no_http])
|
41
|
-
# compile lazily,
|
42
|
-
obj.re[:no_http] = Regexp.new(
|
41
|
+
# compile lazily, because "host"-containing variables can change on tlds update.
|
42
|
+
obj.re[:no_http] = Regexp.new(
|
43
|
+
'^' +
|
44
|
+
obj.re[:src_auth] +
|
45
|
+
# Don't allow single-level domains, because of false positives like '//test'
|
46
|
+
# with code comments
|
47
|
+
'(?:localhost|(?:(?:' + obj.re[:src_domain] + ')\\.)+' + obj.re[:src_domain_root] + ')' +
|
48
|
+
obj.re[:src_port] +
|
49
|
+
obj.re[:src_host_terminator] +
|
50
|
+
obj.re[:src_path],
|
51
|
+
'i'
|
52
|
+
)
|
43
53
|
end
|
44
54
|
|
45
55
|
if (obj.re[:no_http] =~ tail)
|
46
|
-
# should not be
|
56
|
+
# should not be `://` & `///`, that protects from errors in protocol name
|
47
57
|
return 0 if (pos >= 3 && text[pos - 3] == ':')
|
58
|
+
return 0 if (pos >= 3 && text[pos - 3] == '/')
|
48
59
|
return tail.match(obj.re[:no_http])[0].length
|
49
60
|
end
|
50
61
|
return 0
|
@@ -55,7 +66,7 @@ class Linkify
|
|
55
66
|
tail = text.slice(pos..-1)
|
56
67
|
|
57
68
|
if (!obj.re[:mailto])
|
58
|
-
obj.re[:mailto] = Regexp.new('^' +
|
69
|
+
obj.re[:mailto] = Regexp.new('^' + obj.re[:src_email_name] + '@' + obj.re[:src_host_strict], 'i')
|
59
70
|
end
|
60
71
|
if (obj.re[:mailto] =~ tail)
|
61
72
|
return tail.match(obj.re[:mailto])[0].length
|
@@ -104,18 +115,21 @@ class Linkify
|
|
104
115
|
#
|
105
116
|
#------------------------------------------------------------------------------
|
106
117
|
def compile
|
107
|
-
@re =
|
118
|
+
@re = build_re(@__opts__)
|
108
119
|
|
109
120
|
# Define dynamic patterns
|
110
121
|
tlds = @__tlds__.dup
|
122
|
+
|
123
|
+
onCompile
|
124
|
+
|
111
125
|
tlds.push(TLDS_2CH_SRC_RE) if (!@__tlds_replaced__)
|
112
126
|
tlds.push(@re[:src_xn])
|
113
127
|
|
114
|
-
@re[:src_tlds]
|
115
|
-
@re[:email_fuzzy] = Regexp.new(
|
116
|
-
@re[:link_fuzzy] = Regexp.new(
|
117
|
-
@re[:link_no_ip_fuzzy] = Regexp.new(
|
118
|
-
@re[:host_fuzzy_test] = Regexp.new(
|
128
|
+
@re[:src_tlds] = tlds.join('|')
|
129
|
+
@re[:email_fuzzy] = Regexp.new(@re[:tpl_email_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
|
130
|
+
@re[:link_fuzzy] = Regexp.new(@re[:tpl_link_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
|
131
|
+
@re[:link_no_ip_fuzzy] = Regexp.new(@re[:tpl_link_no_ip_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
|
132
|
+
@re[:host_fuzzy_test] = Regexp.new(@re[:tpl_host_fuzzy_test].gsub('%TLDS%', @re[:src_tlds]), true)
|
119
133
|
|
120
134
|
#
|
121
135
|
# Compile each schema
|
@@ -190,8 +204,8 @@ class Linkify
|
|
190
204
|
slist = @__compiled__.select {|name, val| name.length > 0 && !val.nil? }.keys.map {|str| escapeRE(str)}.join('|')
|
191
205
|
|
192
206
|
# (?!_) cause 1.5x slowdown
|
193
|
-
@re[:schema_test] = Regexp.new('(^|(?!_)(
|
194
|
-
@re[:schema_search] = Regexp.new('(^|(?!_)(
|
207
|
+
@re[:schema_test] = Regexp.new('(^|(?!_)(?:[><\uff5c]|' + @re[:src_XPCc] + '))(' + slist + ')', 'i')
|
208
|
+
@re[:schema_search] = Regexp.new('(^|(?!_)(?:[><\uff5c]|' + @re[:src_XPCc] + '))(' + slist + ')', 'ig')
|
195
209
|
|
196
210
|
@re[:pretest] = Regexp.new(
|
197
211
|
'(' + @re[:schema_test].source + ')|' +
|
@@ -203,12 +217,12 @@ class Linkify
|
|
203
217
|
|
204
218
|
resetScanCache
|
205
219
|
end
|
206
|
-
|
220
|
+
|
207
221
|
# Match result. Single element of array, returned by [[LinkifyIt#match]]
|
208
222
|
#------------------------------------------------------------------------------
|
209
223
|
class Match
|
210
224
|
attr_accessor :schema, :index, :lastIndex, :raw, :text, :url
|
211
|
-
|
225
|
+
|
212
226
|
def initialize(obj, shift)
|
213
227
|
start = obj.__index__
|
214
228
|
endt = obj.__last_index__
|
@@ -288,11 +302,14 @@ class Linkify
|
|
288
302
|
#
|
289
303
|
#------------------------------------------------------------------------------
|
290
304
|
def initialize(schemas = {}, options = {})
|
305
|
+
schemas = {} unless schemas
|
306
|
+
|
307
|
+
# not needed
|
291
308
|
# if (!(this instanceof LinkifyIt)) {
|
292
309
|
# return new LinkifyIt(schemas, options);
|
293
310
|
# }
|
294
311
|
|
295
|
-
|
312
|
+
# not needed, if you want to pass options, then must also pass schemas
|
296
313
|
# if options.empty?
|
297
314
|
# if (isOptionsObj(schemas)) {
|
298
315
|
# options = schemas;
|
@@ -321,7 +338,6 @@ class Linkify
|
|
321
338
|
compile
|
322
339
|
end
|
323
340
|
|
324
|
-
|
325
341
|
# chainable
|
326
342
|
# LinkifyIt#add(schema, definition)
|
327
343
|
# - schema (String): rule name (fixed pattern prefix)
|
@@ -356,7 +372,7 @@ class Linkify
|
|
356
372
|
@__index__ = -1
|
357
373
|
|
358
374
|
return false if (!text.length)
|
359
|
-
|
375
|
+
|
360
376
|
# try to scan for link with schema - that's the most simple rule
|
361
377
|
if @re[:schema_test] =~ text
|
362
378
|
re = @re[:schema_search]
|
@@ -449,7 +465,7 @@ class Linkify
|
|
449
465
|
# LinkifyIt#match(text) -> Array|null
|
450
466
|
#
|
451
467
|
# Returns array of found link descriptions or `null` on fail. We strongly suggest
|
452
|
-
# to use [[LinkifyIt#test]] first, for best speed.
|
468
|
+
# recommend to use [[LinkifyIt#test]] first, for best speed.
|
453
469
|
#
|
454
470
|
# ##### Result match description
|
455
471
|
#
|
@@ -527,7 +543,7 @@ class Linkify
|
|
527
543
|
#------------------------------------------------------------------------------
|
528
544
|
def normalize(match)
|
529
545
|
return if @bypass_normalizer
|
530
|
-
|
546
|
+
|
531
547
|
# Do minimal possible changes by default. Need to collect feedback prior
|
532
548
|
# to move forward https://github.com/markdown-it/linkify-it/issues/1
|
533
549
|
|
@@ -538,4 +554,11 @@ class Linkify
|
|
538
554
|
end
|
539
555
|
end
|
540
556
|
|
557
|
+
# LinkifyIt#onCompile()
|
558
|
+
#
|
559
|
+
# Override to modify basic RegExp-s.
|
560
|
+
#------------------------------------------------------------------------------
|
561
|
+
def onCompile
|
562
|
+
end
|
563
|
+
|
541
564
|
end
|
data/lib/linkify-it-rb/re.rb
CHANGED
@@ -1,57 +1,40 @@
|
|
1
1
|
module LinkifyRe
|
2
|
-
|
2
|
+
|
3
3
|
# Use direct extract instead of `regenerate` to reduce size
|
4
4
|
SRC_ANY = UCMicro::Properties::Any::REGEX.source
|
5
5
|
SRC_CC = UCMicro::Categories::Cc::REGEX.source
|
6
6
|
SRC_Z = UCMicro::Categories::Z::REGEX.source
|
7
7
|
SRC_P = UCMicro::Categories::P::REGEX.source
|
8
8
|
|
9
|
-
# \p{\Z\P\Cc} (white spaces + control + punctuation)
|
9
|
+
# \p{\Z\P\Cc\Cf} (white spaces + control + format + punctuation)
|
10
10
|
SRC_Z_P_CC = [ SRC_Z, SRC_P, SRC_CC ].join('|')
|
11
11
|
|
12
12
|
# \p{\Z\Cc} (white spaces + control)
|
13
13
|
SRC_Z_CC = [ SRC_Z, SRC_CC ].join('|')
|
14
14
|
|
15
|
+
# Experimental. List of chars, completely prohibited in links
|
16
|
+
# because can separate it from other part of text
|
17
|
+
TEXT_SEPARATORS = '[><\uff5c]'
|
18
|
+
|
15
19
|
# All possible word characters (everything without punctuation, spaces & controls)
|
16
20
|
# Defined via punctuation & spaces to save space
|
17
21
|
# Should be something like \p{\L\N\S\M} (\w but without `_`)
|
18
|
-
SRC_PSEUDO_LETTER = '(?:(?!' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
22
|
+
SRC_PSEUDO_LETTER = '(?:(?!' + TEXT_SEPARATORS + '|' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
19
23
|
# The same as above but without [0-9]
|
20
|
-
SRC_PSEUDO_LETTER_NON_D = '(?:(?![0-9]|' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
24
|
+
# SRC_PSEUDO_LETTER_NON_D = '(?:(?![0-9]|' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
21
25
|
|
22
26
|
#------------------------------------------------------------------------------
|
23
27
|
|
24
28
|
SRC_IP4 = '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
|
25
|
-
|
29
|
+
|
30
|
+
# Prohibit any of "@/[]()" in user/pass to avoid wrong domain fetch.
|
31
|
+
SRC_AUTH = '(?:(?:(?!' + SRC_Z_CC + '|[@/\\[\\]()]).)+@)?'
|
26
32
|
|
27
33
|
SRC_PORT = '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?'
|
28
34
|
|
29
|
-
SRC_HOST_TERMINATOR = '(?=$|' + SRC_Z_P_CC + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + SRC_Z_P_CC + '))'
|
35
|
+
SRC_HOST_TERMINATOR = '(?=$|' + TEXT_SEPARATORS + '|' + SRC_Z_P_CC + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + SRC_Z_P_CC + '))'
|
30
36
|
|
31
|
-
SRC_PATH
|
32
|
-
'(?:' +
|
33
|
-
'[/?#]' +
|
34
|
-
'(?:' +
|
35
|
-
'(?!' + SRC_Z_CC + '|[()\\[\\]{}.,"\'?!\\-]).|' +
|
36
|
-
'\\[(?:(?!' + SRC_Z_CC + '|\\]).)*\\]|' +
|
37
|
-
'\\((?:(?!' + SRC_Z_CC + '|[)]).)*\\)|' +
|
38
|
-
'\\{(?:(?!' + SRC_Z_CC + '|[}]).)*\\}|' +
|
39
|
-
'\\"(?:(?!' + SRC_Z_CC + '|["]).)+\\"|' +
|
40
|
-
"\\'(?:(?!" + SRC_Z_CC + "|[']).)+\\'|" +
|
41
|
-
"\\'(?=" + SRC_PSEUDO_LETTER + ').|' + # allow `I'm_king` if no pair found
|
42
|
-
'\\.{2,3}[a-zA-Z0-9%/]|' + # github has ... in commit range links. Restrict to
|
43
|
-
# - english
|
44
|
-
# - percent-encoded
|
45
|
-
# - parts of file path
|
46
|
-
# until more examples found.
|
47
|
-
'\\.(?!' + SRC_Z_CC + '|[.]).|' +
|
48
|
-
'\\-(?!--(?:[^-]|$))(?:-*)|' + # `---` => long dash, terminate
|
49
|
-
'\\,(?!' + SRC_Z_CC + ').|' + # allow `,,,` in paths
|
50
|
-
'\\!(?!' + SRC_Z_CC + '|[!]).|' +
|
51
|
-
'\\?(?!' + SRC_Z_CC + '|[?]).' +
|
52
|
-
')+' +
|
53
|
-
'|\\/' +
|
54
|
-
')?'
|
37
|
+
# moved SRC_PATH into re_src_path
|
55
38
|
|
56
39
|
SRC_EMAIL_NAME = '[\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]+'
|
57
40
|
SRC_XN = 'xn--[a-z0-9\\-]{1,59}'
|
@@ -59,15 +42,15 @@ module LinkifyRe
|
|
59
42
|
# More to read about domain names
|
60
43
|
# http://serverfault.com/questions/638260/
|
61
44
|
|
62
|
-
SRC_DOMAIN_ROOT =
|
63
|
-
#
|
45
|
+
SRC_DOMAIN_ROOT =
|
46
|
+
# Allow letters & digits (http://test1)
|
64
47
|
'(?:' +
|
65
48
|
SRC_XN +
|
66
49
|
'|' +
|
67
|
-
|
50
|
+
SRC_PSEUDO_LETTER + '{1,63}' +
|
68
51
|
')'
|
69
52
|
|
70
|
-
SRC_DOMAIN =
|
53
|
+
SRC_DOMAIN =
|
71
54
|
'(?:' +
|
72
55
|
SRC_XN +
|
73
56
|
'|' +
|
@@ -79,14 +62,15 @@ module LinkifyRe
|
|
79
62
|
'(?:' + SRC_PSEUDO_LETTER + '(?:-(?!-)|' + SRC_PSEUDO_LETTER + '){0,61}' + SRC_PSEUDO_LETTER + ')' +
|
80
63
|
')'
|
81
64
|
|
82
|
-
SRC_HOST =
|
65
|
+
SRC_HOST =
|
83
66
|
'(?:' +
|
84
|
-
|
85
|
-
|
86
|
-
|
67
|
+
# Don't need IP check, because digits are already allowed in normal domain names
|
68
|
+
# SRC_IP4 +
|
69
|
+
# '|' +
|
70
|
+
'(?:(?:(?:' + SRC_DOMAIN + ')\\.)*' + SRC_DOMAIN + ')' +
|
87
71
|
')'
|
88
72
|
|
89
|
-
TPL_HOST_FUZZY =
|
73
|
+
TPL_HOST_FUZZY =
|
90
74
|
'(?:' +
|
91
75
|
SRC_IP4 +
|
92
76
|
'|' +
|
@@ -96,27 +80,98 @@ module LinkifyRe
|
|
96
80
|
TPL_HOST_NO_IP_FUZZY =
|
97
81
|
'(?:(?:(?:' + SRC_DOMAIN + ')\\.)+(?:%TLDS%))'
|
98
82
|
|
99
|
-
SRC_HOST_STRICT
|
100
|
-
TPL_HOST_FUZZY_STRICT
|
101
|
-
SRC_HOST_PORT_STRICT
|
102
|
-
TPL_HOST_PORT_FUZZY_STRICT
|
103
|
-
TPL_HOST_PORT_NO_IP_FUZZY_STRICT
|
104
|
-
|
83
|
+
SRC_HOST_STRICT = SRC_HOST + SRC_HOST_TERMINATOR
|
84
|
+
TPL_HOST_FUZZY_STRICT = TPL_HOST_FUZZY + SRC_HOST_TERMINATOR
|
85
|
+
SRC_HOST_PORT_STRICT = SRC_HOST + SRC_PORT + SRC_HOST_TERMINATOR
|
86
|
+
TPL_HOST_PORT_FUZZY_STRICT = TPL_HOST_FUZZY + SRC_PORT + SRC_HOST_TERMINATOR
|
87
|
+
TPL_HOST_PORT_NO_IP_FUZZY_STRICT = TPL_HOST_NO_IP_FUZZY + SRC_PORT + SRC_HOST_TERMINATOR
|
88
|
+
|
105
89
|
#------------------------------------------------------------------------------
|
106
90
|
# Main rules
|
107
91
|
|
108
92
|
# Rude test fuzzy links by host, for quick deny
|
109
|
-
TPL_HOST_FUZZY_TEST = 'localhost
|
110
|
-
TPL_EMAIL_FUZZY = '(
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
93
|
+
TPL_HOST_FUZZY_TEST = 'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + SRC_Z_P_CC + '|>|$))'
|
94
|
+
TPL_EMAIL_FUZZY = '(^|' + TEXT_SEPARATORS + '|\\(|' +SRC_Z_CC + ')(' + SRC_EMAIL_NAME + '@' + TPL_HOST_FUZZY_STRICT + ')'
|
95
|
+
|
96
|
+
# moved TPL_LINK_FUZZY and TPL_LINK_NO_IP_FUZZY into build_re
|
97
|
+
|
98
|
+
#------------------------------------------------------------------------------
|
99
|
+
def build_re(opts)
|
100
|
+
re = {
|
101
|
+
src_Any: SRC_ANY,
|
102
|
+
src_Cc: SRC_CC,
|
103
|
+
src_Z: SRC_Z,
|
104
|
+
src_P: SRC_P,
|
105
|
+
src_XPCc: SRC_Z_P_CC,
|
106
|
+
src_ZCc: SRC_Z_CC,
|
107
|
+
src_pseudo_letter: SRC_PSEUDO_LETTER,
|
108
|
+
src_ip4: SRC_IP4,
|
109
|
+
src_auth: SRC_AUTH,
|
110
|
+
src_port: SRC_PORT,
|
111
|
+
src_host_terminator: SRC_HOST_TERMINATOR,
|
112
|
+
src_path: re_src_path(opts),
|
113
|
+
src_email_name: SRC_EMAIL_NAME,
|
114
|
+
src_xn: SRC_XN,
|
115
|
+
src_domain_root: SRC_DOMAIN_ROOT,
|
116
|
+
src_domain: SRC_DOMAIN,
|
117
|
+
src_host: SRC_HOST,
|
118
|
+
|
119
|
+
tpl_host_fuzzy: TPL_HOST_FUZZY,
|
120
|
+
tpl_host_no_ip_fuzzy: TPL_HOST_NO_IP_FUZZY,
|
121
|
+
src_host_strict: SRC_HOST_STRICT,
|
122
|
+
tpl_host_fuzzy_strict: TPL_HOST_FUZZY_STRICT,
|
123
|
+
src_host_port_strict: SRC_HOST_PORT_STRICT,
|
124
|
+
tpl_host_port_fuzzy_strict: TPL_HOST_PORT_FUZZY_STRICT,
|
125
|
+
tpl_host_port_no_ip_fuzzy_strict: TPL_HOST_PORT_NO_IP_FUZZY_STRICT,
|
126
|
+
|
127
|
+
tpl_host_fuzzy_test: TPL_HOST_FUZZY_TEST,
|
128
|
+
tpl_email_fuzzy: TPL_EMAIL_FUZZY
|
129
|
+
}
|
130
|
+
|
131
|
+
# Fuzzy link can't be prepended with .:/\- and non punctuation.
|
132
|
+
# but can start with > (markdown blockquote)
|
133
|
+
re[:tpl_link_fuzzy] =
|
134
|
+
'(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + SRC_Z_P_CC + '))' +
|
135
|
+
'((?![$+<=>^`|\uff5c])' + TPL_HOST_PORT_FUZZY_STRICT + re[:src_path] + ')'
|
136
|
+
|
137
|
+
# Fuzzy link can't be prepended with .:/\- and non punctuation.
|
138
|
+
# but can start with > (markdown blockquote)
|
139
|
+
re[:tpl_link_no_ip_fuzzy] =
|
140
|
+
'(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + SRC_Z_P_CC + '))' +
|
141
|
+
'((?![$+<=>^`|\uff5c])' + TPL_HOST_PORT_NO_IP_FUZZY_STRICT + re[:src_path] + ')'
|
142
|
+
|
143
|
+
return re
|
144
|
+
end
|
145
|
+
|
146
|
+
#------------------------------------------------------------------------------
|
147
|
+
def re_src_path(opts = nil)
|
148
|
+
'(?:' +
|
149
|
+
'[/?#]' +
|
150
|
+
'(?:' +
|
151
|
+
'(?!' + SRC_Z_CC + '|' + TEXT_SEPARATORS + '|[()\\[\\]{}.,"\'?!\\-]).|' +
|
152
|
+
'\\[(?:(?!' + SRC_Z_CC + '|\\]).)*\\]|' +
|
153
|
+
'\\((?:(?!' + SRC_Z_CC + '|[)]).)*\\)|' +
|
154
|
+
'\\{(?:(?!' + SRC_Z_CC + '|[}]).)*\\}|' +
|
155
|
+
'\\"(?:(?!' + SRC_Z_CC + '|["]).)+\\"|' +
|
156
|
+
"\\'(?:(?!" + SRC_Z_CC + "|[']).)+\\'|" +
|
157
|
+
"\\'(?=" + SRC_PSEUDO_LETTER + '|[-]).|' + # allow `I'm_king` if no pair found
|
158
|
+
'\\.{2,3}[a-zA-Z0-9%/]|' + # github has ... in commit range links. Restrict to
|
159
|
+
# - english
|
160
|
+
# - percent-encoded
|
161
|
+
# - parts of file path
|
162
|
+
# until more examples found.
|
163
|
+
'\\.(?!' + SRC_Z_CC + '|[.]).|' +
|
164
|
+
(opts && opts[:'---'] ?
|
165
|
+
'\\-(?!--(?:[^-]|$))(?:-*)|' # `---` => long dash, terminate
|
166
|
+
:
|
167
|
+
'\\-+|'
|
168
|
+
) +
|
169
|
+
'\\,(?!' + SRC_Z_CC + ').|' + # allow `,,,` in paths
|
170
|
+
'\\!(?!' + SRC_Z_CC + '|[!]).|' +
|
171
|
+
'\\?(?!' + SRC_Z_CC + '|[?]).' +
|
172
|
+
')+' +
|
173
|
+
'|\\/' +
|
174
|
+
')?'
|
175
|
+
end
|
176
|
+
|
122
177
|
end
|
@@ -255,4 +255,23 @@ describe 'API' do
|
|
255
255
|
expect(l.match('1.1.1.1.')[0].text).to eq '1.1.1.1'
|
256
256
|
end
|
257
257
|
|
258
|
+
#------------------------------------------------------------------------------
|
259
|
+
it 'should not hang in fuzzy mode with sequences of astrals' do
|
260
|
+
l = Linkify.new
|
261
|
+
|
262
|
+
l.set({ fuzzyLink: true })
|
263
|
+
|
264
|
+
expect(l.match('😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡 .com')).to eq []
|
265
|
+
end
|
266
|
+
|
267
|
+
#------------------------------------------------------------------------------
|
268
|
+
it 'should accept `---` if enabled' do
|
269
|
+
l = Linkify.new
|
270
|
+
|
271
|
+
expect(l.match('http://e.com/foo---bar')[0].text).to eq 'http://e.com/foo---bar'
|
272
|
+
|
273
|
+
l = Linkify.new(nil, { '---': true })
|
274
|
+
|
275
|
+
expect(l.match('http://e.com/foo---bar')[0].text).to eq 'http://e.com/foo'
|
276
|
+
end
|
258
277
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
require 'byebug'
|
1
|
+
require 'pry-byebug'
|
2
2
|
require 'linkify-it-rb'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkify-it-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Walker
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2018-04-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: uc.micro-rb
|
@@ -25,6 +25,20 @@ dependencies:
|
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '1.0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: bacon-expect
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '1.0'
|
28
42
|
description: Ruby version of linkify-it for motion-markdown-it, for Ruby and RubyMotion
|
29
43
|
email: github@digitalmoksha.com
|
30
44
|
executables: []
|
@@ -58,10 +72,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
72
|
version: '0'
|
59
73
|
requirements: []
|
60
74
|
rubyforge_project:
|
61
|
-
rubygems_version: 2.
|
75
|
+
rubygems_version: 2.6.8
|
62
76
|
signing_key:
|
63
77
|
specification_version: 4
|
64
78
|
summary: linkify-it for motion-markdown-it in Ruby
|
65
79
|
test_files:
|
66
|
-
- spec/linkify-it-rb/test_spec.rb
|
67
80
|
- spec/spec_helper.rb
|
81
|
+
- spec/linkify-it-rb/test_spec.rb
|