linkify-it-rb 1.2.0 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -14
- data/lib/linkify-it-rb.rb +7 -7
- data/lib/linkify-it-rb/index.rb +46 -23
- data/lib/linkify-it-rb/re.rb +113 -58
- data/lib/linkify-it-rb/version.rb +1 -1
- data/spec/linkify-it-rb/test_spec.rb +19 -0
- data/spec/spec_helper.rb +1 -1
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 176768c4d108c26b13260d1e62c1cb30cd0491f5
|
4
|
+
data.tar.gz: a47bd97acdf25582ce67d65551a1d94678d56aa3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cbcc4a1e42a24df990484e927b1049a81ff87a8bd0a53f43d32f9d0687ec2bcfb2ec81778e1fbd4a33e11c9e41796e4787d84b8f3f55461ed853dfe4b40c05e
|
7
|
+
data.tar.gz: 5b25b74826e53b4257fdc2b4ea1b0422659d44c9ec69e877ee37b396d9597654db71811d2593193552785cdb8a71258485d4a57a6447b2036cbf324386d0380f
|
data/README.md
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
# linkify-it-rb
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/linkify-it-rb.svg)](http://badge.fury.io/rb/linkify-it-rb)
|
4
|
-
|
5
|
-
Links recognition library with full unicode support. Focused on high quality link pattern detection in plain text. For use with both Ruby and RubyMotion.
|
4
|
+
[![Build Status](https://travis-ci.org/digitalmoksha/linkify-it-rb.svg?branch=master)](https://travis-ci.org/digitalmoksha/linkify-it-rb)
|
6
5
|
|
7
6
|
This gem is a port of the [linkify-it javascript package](https://github.com/markdown-it/linkify-it) by Vitaly Puzrin, that is used for the [markdown-it](https://github.com/markdown-it/markdown-it) package.
|
8
7
|
|
8
|
+
_Currently synced with linkify-it 2.0.3_
|
9
|
+
|
10
|
+
---
|
11
|
+
|
12
|
+
Links recognition library with full unicode support. Focused on high quality link pattern detection in plain text. For use with both Ruby and RubyMotion.
|
13
|
+
|
9
14
|
__[Javascript Demo](http://markdown-it.github.io/linkify-it/)__
|
10
15
|
|
11
16
|
Features:
|
@@ -46,8 +51,8 @@ Usage examples
|
|
46
51
|
```ruby
|
47
52
|
linkify = Linkify.new
|
48
53
|
|
49
|
-
# add
|
50
|
-
linkify.tlds('
|
54
|
+
# Reload full tlds list & add unofficial `.onion` domain.
|
55
|
+
linkify.tlds('onion', true) # Add unofficial `.onion` domain
|
51
56
|
linkify.add('git:', 'http:') # Add `git:` ptotocol as "alias"
|
52
57
|
linkify.add('ftp:', null) # Disable `ftp:` ptotocol
|
53
58
|
linkify.set({fuzzyIP: true}) # Enable IPs in fuzzy links (without schema)
|
@@ -59,7 +64,7 @@ linkify.match('Site github.com!'))
|
|
59
64
|
=> [#<Linkify::Match @schema="", @index=5, @lastIndex=15, @raw="github.com", @text="github.com", @url="github.com">]
|
60
65
|
```
|
61
66
|
|
62
|
-
#####
|
67
|
+
##### Example 2. Add twitter mentions handler
|
63
68
|
|
64
69
|
```ruby
|
65
70
|
linkify.add('@', {
|
@@ -96,7 +101,7 @@ By default understands:
|
|
96
101
|
`schemas` is a Hash, where each key/value describes protocol/rule:
|
97
102
|
|
98
103
|
- __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
|
99
|
-
for example). `linkify-it-rb` makes
|
104
|
+
for example). `linkify-it-rb` makes sure that prefix is not preceded with
|
100
105
|
alphanumeric char.
|
101
106
|
- __value__ - rule to check tail after link prefix
|
102
107
|
- _String_ - just alias to existing rule
|
@@ -108,10 +113,11 @@ By default understands:
|
|
108
113
|
|
109
114
|
`options`:
|
110
115
|
|
111
|
-
- __fuzzyLink__ -
|
116
|
+
- __fuzzyLink__ - recognize URL-s without `http(s)://` head. Default `true`.
|
112
117
|
- __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
|
113
118
|
like version numbers. Default `false`.
|
114
|
-
- __fuzzyEmail__ - recognize emails without `mailto:` prefix.
|
119
|
+
- __fuzzyEmail__ - recognize emails without `mailto:` prefix. Default `true`.
|
120
|
+
- __---__ - set `true` to terminate link with `---` (if it's considered as long dash).
|
115
121
|
|
116
122
|
|
117
123
|
### .test(text)
|
@@ -149,16 +155,14 @@ Each match has:
|
|
149
155
|
|
150
156
|
### .tlds(list[, keepOld])
|
151
157
|
|
152
|
-
Load (or merge) new tlds list. These are
|
158
|
+
Load (or merge) new tlds list. These are needed for fuzzy links (without schema)
|
153
159
|
to avoid false positives. By default this algorithm uses:
|
154
160
|
|
155
|
-
-
|
156
|
-
- biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
|
157
|
-
are ok.
|
161
|
+
- 2-letter root zones are ok.
|
162
|
+
- biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф are ok.
|
158
163
|
- encoded (`xn--...`) root zones are ok.
|
159
164
|
|
160
|
-
If
|
161
|
-
|
165
|
+
If that's not enougth, you can reload defaults with more detailed zones list.
|
162
166
|
|
163
167
|
### .add(schema, definition)
|
164
168
|
|
data/lib/linkify-it-rb.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
if defined?(Motion::Project::Config)
|
4
|
-
|
2
|
+
|
5
3
|
lib_dir_path = File.dirname(File.expand_path(__FILE__))
|
6
4
|
Motion::Project::App.setup do |app|
|
7
|
-
app.files.unshift(Dir.glob(File.join(lib_dir_path,
|
5
|
+
app.files.unshift(Dir.glob(File.join(lib_dir_path, 'linkify-it-rb/**/*.rb')))
|
6
|
+
|
7
|
+
app.files_dependencies File.join(lib_dir_path, 'linkify-it-rb/index.rb') => File.join(lib_dir_path, 'linkify-it-rb/re.rb')
|
8
8
|
end
|
9
|
-
|
9
|
+
|
10
10
|
require 'uc.micro-rb'
|
11
11
|
|
12
12
|
else
|
13
|
-
|
13
|
+
|
14
14
|
require 'uc.micro-rb'
|
15
15
|
require 'linkify-it-rb/re'
|
16
16
|
require 'linkify-it-rb/index'
|
17
|
-
|
17
|
+
|
18
18
|
end
|
data/lib/linkify-it-rb/index.rb
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
class Linkify
|
2
2
|
include ::LinkifyRe
|
3
|
-
|
3
|
+
|
4
4
|
attr_accessor :__index__, :__last_index__, :__text_cache__, :__schema__, :__compiled__
|
5
5
|
attr_accessor :re, :bypass_normalizer
|
6
|
-
|
6
|
+
|
7
7
|
# RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)
|
8
8
|
TLDS_2CH_SRC_RE = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]'
|
9
9
|
|
10
10
|
# DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead
|
11
11
|
TLDS_DEFAULT = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|')
|
12
|
-
|
12
|
+
|
13
13
|
DEFAULT_OPTIONS = {
|
14
14
|
fuzzyLink: true,
|
15
15
|
fuzzyEmail: true,
|
@@ -23,7 +23,7 @@ class Linkify
|
|
23
23
|
|
24
24
|
if (!obj.re[:http])
|
25
25
|
# compile lazily, because "host"-containing variables can change on tlds update.
|
26
|
-
obj.re[:http] = Regexp.new('^\\/\\/' +
|
26
|
+
obj.re[:http] = Regexp.new('^\\/\\/' + obj.re[:src_auth] + obj.re[:src_host_port_strict] + obj.re[:src_path], 'i')
|
27
27
|
end
|
28
28
|
if obj.re[:http] =~ tail
|
29
29
|
return tail.match(obj.re[:http])[0].length
|
@@ -38,13 +38,24 @@ class Linkify
|
|
38
38
|
tail = text.slice(pos..-1)
|
39
39
|
|
40
40
|
if (!obj.re[:no_http])
|
41
|
-
# compile lazily,
|
42
|
-
obj.re[:no_http] = Regexp.new(
|
41
|
+
# compile lazily, because "host"-containing variables can change on tlds update.
|
42
|
+
obj.re[:no_http] = Regexp.new(
|
43
|
+
'^' +
|
44
|
+
obj.re[:src_auth] +
|
45
|
+
# Don't allow single-level domains, because of false positives like '//test'
|
46
|
+
# with code comments
|
47
|
+
'(?:localhost|(?:(?:' + obj.re[:src_domain] + ')\\.)+' + obj.re[:src_domain_root] + ')' +
|
48
|
+
obj.re[:src_port] +
|
49
|
+
obj.re[:src_host_terminator] +
|
50
|
+
obj.re[:src_path],
|
51
|
+
'i'
|
52
|
+
)
|
43
53
|
end
|
44
54
|
|
45
55
|
if (obj.re[:no_http] =~ tail)
|
46
|
-
# should not be
|
56
|
+
# should not be `://` & `///`, that protects from errors in protocol name
|
47
57
|
return 0 if (pos >= 3 && text[pos - 3] == ':')
|
58
|
+
return 0 if (pos >= 3 && text[pos - 3] == '/')
|
48
59
|
return tail.match(obj.re[:no_http])[0].length
|
49
60
|
end
|
50
61
|
return 0
|
@@ -55,7 +66,7 @@ class Linkify
|
|
55
66
|
tail = text.slice(pos..-1)
|
56
67
|
|
57
68
|
if (!obj.re[:mailto])
|
58
|
-
obj.re[:mailto] = Regexp.new('^' +
|
69
|
+
obj.re[:mailto] = Regexp.new('^' + obj.re[:src_email_name] + '@' + obj.re[:src_host_strict], 'i')
|
59
70
|
end
|
60
71
|
if (obj.re[:mailto] =~ tail)
|
61
72
|
return tail.match(obj.re[:mailto])[0].length
|
@@ -104,18 +115,21 @@ class Linkify
|
|
104
115
|
#
|
105
116
|
#------------------------------------------------------------------------------
|
106
117
|
def compile
|
107
|
-
@re =
|
118
|
+
@re = build_re(@__opts__)
|
108
119
|
|
109
120
|
# Define dynamic patterns
|
110
121
|
tlds = @__tlds__.dup
|
122
|
+
|
123
|
+
onCompile
|
124
|
+
|
111
125
|
tlds.push(TLDS_2CH_SRC_RE) if (!@__tlds_replaced__)
|
112
126
|
tlds.push(@re[:src_xn])
|
113
127
|
|
114
|
-
@re[:src_tlds]
|
115
|
-
@re[:email_fuzzy] = Regexp.new(
|
116
|
-
@re[:link_fuzzy] = Regexp.new(
|
117
|
-
@re[:link_no_ip_fuzzy] = Regexp.new(
|
118
|
-
@re[:host_fuzzy_test] = Regexp.new(
|
128
|
+
@re[:src_tlds] = tlds.join('|')
|
129
|
+
@re[:email_fuzzy] = Regexp.new(@re[:tpl_email_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
|
130
|
+
@re[:link_fuzzy] = Regexp.new(@re[:tpl_link_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
|
131
|
+
@re[:link_no_ip_fuzzy] = Regexp.new(@re[:tpl_link_no_ip_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
|
132
|
+
@re[:host_fuzzy_test] = Regexp.new(@re[:tpl_host_fuzzy_test].gsub('%TLDS%', @re[:src_tlds]), true)
|
119
133
|
|
120
134
|
#
|
121
135
|
# Compile each schema
|
@@ -190,8 +204,8 @@ class Linkify
|
|
190
204
|
slist = @__compiled__.select {|name, val| name.length > 0 && !val.nil? }.keys.map {|str| escapeRE(str)}.join('|')
|
191
205
|
|
192
206
|
# (?!_) cause 1.5x slowdown
|
193
|
-
@re[:schema_test] = Regexp.new('(^|(?!_)(
|
194
|
-
@re[:schema_search] = Regexp.new('(^|(?!_)(
|
207
|
+
@re[:schema_test] = Regexp.new('(^|(?!_)(?:[><\uff5c]|' + @re[:src_XPCc] + '))(' + slist + ')', 'i')
|
208
|
+
@re[:schema_search] = Regexp.new('(^|(?!_)(?:[><\uff5c]|' + @re[:src_XPCc] + '))(' + slist + ')', 'ig')
|
195
209
|
|
196
210
|
@re[:pretest] = Regexp.new(
|
197
211
|
'(' + @re[:schema_test].source + ')|' +
|
@@ -203,12 +217,12 @@ class Linkify
|
|
203
217
|
|
204
218
|
resetScanCache
|
205
219
|
end
|
206
|
-
|
220
|
+
|
207
221
|
# Match result. Single element of array, returned by [[LinkifyIt#match]]
|
208
222
|
#------------------------------------------------------------------------------
|
209
223
|
class Match
|
210
224
|
attr_accessor :schema, :index, :lastIndex, :raw, :text, :url
|
211
|
-
|
225
|
+
|
212
226
|
def initialize(obj, shift)
|
213
227
|
start = obj.__index__
|
214
228
|
endt = obj.__last_index__
|
@@ -288,11 +302,14 @@ class Linkify
|
|
288
302
|
#
|
289
303
|
#------------------------------------------------------------------------------
|
290
304
|
def initialize(schemas = {}, options = {})
|
305
|
+
schemas = {} unless schemas
|
306
|
+
|
307
|
+
# not needed
|
291
308
|
# if (!(this instanceof LinkifyIt)) {
|
292
309
|
# return new LinkifyIt(schemas, options);
|
293
310
|
# }
|
294
311
|
|
295
|
-
|
312
|
+
# not needed, if you want to pass options, then must also pass schemas
|
296
313
|
# if options.empty?
|
297
314
|
# if (isOptionsObj(schemas)) {
|
298
315
|
# options = schemas;
|
@@ -321,7 +338,6 @@ class Linkify
|
|
321
338
|
compile
|
322
339
|
end
|
323
340
|
|
324
|
-
|
325
341
|
# chainable
|
326
342
|
# LinkifyIt#add(schema, definition)
|
327
343
|
# - schema (String): rule name (fixed pattern prefix)
|
@@ -356,7 +372,7 @@ class Linkify
|
|
356
372
|
@__index__ = -1
|
357
373
|
|
358
374
|
return false if (!text.length)
|
359
|
-
|
375
|
+
|
360
376
|
# try to scan for link with schema - that's the most simple rule
|
361
377
|
if @re[:schema_test] =~ text
|
362
378
|
re = @re[:schema_search]
|
@@ -449,7 +465,7 @@ class Linkify
|
|
449
465
|
# LinkifyIt#match(text) -> Array|null
|
450
466
|
#
|
451
467
|
# Returns array of found link descriptions or `null` on fail. We strongly suggest
|
452
|
-
# to use [[LinkifyIt#test]] first, for best speed.
|
468
|
+
# recommend to use [[LinkifyIt#test]] first, for best speed.
|
453
469
|
#
|
454
470
|
# ##### Result match description
|
455
471
|
#
|
@@ -527,7 +543,7 @@ class Linkify
|
|
527
543
|
#------------------------------------------------------------------------------
|
528
544
|
def normalize(match)
|
529
545
|
return if @bypass_normalizer
|
530
|
-
|
546
|
+
|
531
547
|
# Do minimal possible changes by default. Need to collect feedback prior
|
532
548
|
# to move forward https://github.com/markdown-it/linkify-it/issues/1
|
533
549
|
|
@@ -538,4 +554,11 @@ class Linkify
|
|
538
554
|
end
|
539
555
|
end
|
540
556
|
|
557
|
+
# LinkifyIt#onCompile()
|
558
|
+
#
|
559
|
+
# Override to modify basic RegExp-s.
|
560
|
+
#------------------------------------------------------------------------------
|
561
|
+
def onCompile
|
562
|
+
end
|
563
|
+
|
541
564
|
end
|
data/lib/linkify-it-rb/re.rb
CHANGED
@@ -1,57 +1,40 @@
|
|
1
1
|
module LinkifyRe
|
2
|
-
|
2
|
+
|
3
3
|
# Use direct extract instead of `regenerate` to reduce size
|
4
4
|
SRC_ANY = UCMicro::Properties::Any::REGEX.source
|
5
5
|
SRC_CC = UCMicro::Categories::Cc::REGEX.source
|
6
6
|
SRC_Z = UCMicro::Categories::Z::REGEX.source
|
7
7
|
SRC_P = UCMicro::Categories::P::REGEX.source
|
8
8
|
|
9
|
-
# \p{\Z\P\Cc} (white spaces + control + punctuation)
|
9
|
+
# \p{\Z\P\Cc\Cf} (white spaces + control + format + punctuation)
|
10
10
|
SRC_Z_P_CC = [ SRC_Z, SRC_P, SRC_CC ].join('|')
|
11
11
|
|
12
12
|
# \p{\Z\Cc} (white spaces + control)
|
13
13
|
SRC_Z_CC = [ SRC_Z, SRC_CC ].join('|')
|
14
14
|
|
15
|
+
# Experimental. List of chars, completely prohibited in links
|
16
|
+
# because can separate it from other part of text
|
17
|
+
TEXT_SEPARATORS = '[><\uff5c]'
|
18
|
+
|
15
19
|
# All possible word characters (everything without punctuation, spaces & controls)
|
16
20
|
# Defined via punctuation & spaces to save space
|
17
21
|
# Should be something like \p{\L\N\S\M} (\w but without `_`)
|
18
|
-
SRC_PSEUDO_LETTER = '(?:(?!' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
22
|
+
SRC_PSEUDO_LETTER = '(?:(?!' + TEXT_SEPARATORS + '|' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
19
23
|
# The same as above but without [0-9]
|
20
|
-
SRC_PSEUDO_LETTER_NON_D = '(?:(?![0-9]|' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
24
|
+
# SRC_PSEUDO_LETTER_NON_D = '(?:(?![0-9]|' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
21
25
|
|
22
26
|
#------------------------------------------------------------------------------
|
23
27
|
|
24
28
|
SRC_IP4 = '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
|
25
|
-
|
29
|
+
|
30
|
+
# Prohibit any of "@/[]()" in user/pass to avoid wrong domain fetch.
|
31
|
+
SRC_AUTH = '(?:(?:(?!' + SRC_Z_CC + '|[@/\\[\\]()]).)+@)?'
|
26
32
|
|
27
33
|
SRC_PORT = '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?'
|
28
34
|
|
29
|
-
SRC_HOST_TERMINATOR = '(?=$|' + SRC_Z_P_CC + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + SRC_Z_P_CC + '))'
|
35
|
+
SRC_HOST_TERMINATOR = '(?=$|' + TEXT_SEPARATORS + '|' + SRC_Z_P_CC + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + SRC_Z_P_CC + '))'
|
30
36
|
|
31
|
-
SRC_PATH
|
32
|
-
'(?:' +
|
33
|
-
'[/?#]' +
|
34
|
-
'(?:' +
|
35
|
-
'(?!' + SRC_Z_CC + '|[()\\[\\]{}.,"\'?!\\-]).|' +
|
36
|
-
'\\[(?:(?!' + SRC_Z_CC + '|\\]).)*\\]|' +
|
37
|
-
'\\((?:(?!' + SRC_Z_CC + '|[)]).)*\\)|' +
|
38
|
-
'\\{(?:(?!' + SRC_Z_CC + '|[}]).)*\\}|' +
|
39
|
-
'\\"(?:(?!' + SRC_Z_CC + '|["]).)+\\"|' +
|
40
|
-
"\\'(?:(?!" + SRC_Z_CC + "|[']).)+\\'|" +
|
41
|
-
"\\'(?=" + SRC_PSEUDO_LETTER + ').|' + # allow `I'm_king` if no pair found
|
42
|
-
'\\.{2,3}[a-zA-Z0-9%/]|' + # github has ... in commit range links. Restrict to
|
43
|
-
# - english
|
44
|
-
# - percent-encoded
|
45
|
-
# - parts of file path
|
46
|
-
# until more examples found.
|
47
|
-
'\\.(?!' + SRC_Z_CC + '|[.]).|' +
|
48
|
-
'\\-(?!--(?:[^-]|$))(?:-*)|' + # `---` => long dash, terminate
|
49
|
-
'\\,(?!' + SRC_Z_CC + ').|' + # allow `,,,` in paths
|
50
|
-
'\\!(?!' + SRC_Z_CC + '|[!]).|' +
|
51
|
-
'\\?(?!' + SRC_Z_CC + '|[?]).' +
|
52
|
-
')+' +
|
53
|
-
'|\\/' +
|
54
|
-
')?'
|
37
|
+
# moved SRC_PATH into re_src_path
|
55
38
|
|
56
39
|
SRC_EMAIL_NAME = '[\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]+'
|
57
40
|
SRC_XN = 'xn--[a-z0-9\\-]{1,59}'
|
@@ -59,15 +42,15 @@ module LinkifyRe
|
|
59
42
|
# More to read about domain names
|
60
43
|
# http://serverfault.com/questions/638260/
|
61
44
|
|
62
|
-
SRC_DOMAIN_ROOT =
|
63
|
-
#
|
45
|
+
SRC_DOMAIN_ROOT =
|
46
|
+
# Allow letters & digits (http://test1)
|
64
47
|
'(?:' +
|
65
48
|
SRC_XN +
|
66
49
|
'|' +
|
67
|
-
|
50
|
+
SRC_PSEUDO_LETTER + '{1,63}' +
|
68
51
|
')'
|
69
52
|
|
70
|
-
SRC_DOMAIN =
|
53
|
+
SRC_DOMAIN =
|
71
54
|
'(?:' +
|
72
55
|
SRC_XN +
|
73
56
|
'|' +
|
@@ -79,14 +62,15 @@ module LinkifyRe
|
|
79
62
|
'(?:' + SRC_PSEUDO_LETTER + '(?:-(?!-)|' + SRC_PSEUDO_LETTER + '){0,61}' + SRC_PSEUDO_LETTER + ')' +
|
80
63
|
')'
|
81
64
|
|
82
|
-
SRC_HOST =
|
65
|
+
SRC_HOST =
|
83
66
|
'(?:' +
|
84
|
-
|
85
|
-
|
86
|
-
|
67
|
+
# Don't need IP check, because digits are already allowed in normal domain names
|
68
|
+
# SRC_IP4 +
|
69
|
+
# '|' +
|
70
|
+
'(?:(?:(?:' + SRC_DOMAIN + ')\\.)*' + SRC_DOMAIN + ')' +
|
87
71
|
')'
|
88
72
|
|
89
|
-
TPL_HOST_FUZZY =
|
73
|
+
TPL_HOST_FUZZY =
|
90
74
|
'(?:' +
|
91
75
|
SRC_IP4 +
|
92
76
|
'|' +
|
@@ -96,27 +80,98 @@ module LinkifyRe
|
|
96
80
|
TPL_HOST_NO_IP_FUZZY =
|
97
81
|
'(?:(?:(?:' + SRC_DOMAIN + ')\\.)+(?:%TLDS%))'
|
98
82
|
|
99
|
-
SRC_HOST_STRICT
|
100
|
-
TPL_HOST_FUZZY_STRICT
|
101
|
-
SRC_HOST_PORT_STRICT
|
102
|
-
TPL_HOST_PORT_FUZZY_STRICT
|
103
|
-
TPL_HOST_PORT_NO_IP_FUZZY_STRICT
|
104
|
-
|
83
|
+
SRC_HOST_STRICT = SRC_HOST + SRC_HOST_TERMINATOR
|
84
|
+
TPL_HOST_FUZZY_STRICT = TPL_HOST_FUZZY + SRC_HOST_TERMINATOR
|
85
|
+
SRC_HOST_PORT_STRICT = SRC_HOST + SRC_PORT + SRC_HOST_TERMINATOR
|
86
|
+
TPL_HOST_PORT_FUZZY_STRICT = TPL_HOST_FUZZY + SRC_PORT + SRC_HOST_TERMINATOR
|
87
|
+
TPL_HOST_PORT_NO_IP_FUZZY_STRICT = TPL_HOST_NO_IP_FUZZY + SRC_PORT + SRC_HOST_TERMINATOR
|
88
|
+
|
105
89
|
#------------------------------------------------------------------------------
|
106
90
|
# Main rules
|
107
91
|
|
108
92
|
# Rude test fuzzy links by host, for quick deny
|
109
|
-
TPL_HOST_FUZZY_TEST = 'localhost
|
110
|
-
TPL_EMAIL_FUZZY = '(
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
93
|
+
TPL_HOST_FUZZY_TEST = 'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + SRC_Z_P_CC + '|>|$))'
|
94
|
+
TPL_EMAIL_FUZZY = '(^|' + TEXT_SEPARATORS + '|\\(|' +SRC_Z_CC + ')(' + SRC_EMAIL_NAME + '@' + TPL_HOST_FUZZY_STRICT + ')'
|
95
|
+
|
96
|
+
# moved TPL_LINK_FUZZY and TPL_LINK_NO_IP_FUZZY into build_re
|
97
|
+
|
98
|
+
#------------------------------------------------------------------------------
|
99
|
+
def build_re(opts)
|
100
|
+
re = {
|
101
|
+
src_Any: SRC_ANY,
|
102
|
+
src_Cc: SRC_CC,
|
103
|
+
src_Z: SRC_Z,
|
104
|
+
src_P: SRC_P,
|
105
|
+
src_XPCc: SRC_Z_P_CC,
|
106
|
+
src_ZCc: SRC_Z_CC,
|
107
|
+
src_pseudo_letter: SRC_PSEUDO_LETTER,
|
108
|
+
src_ip4: SRC_IP4,
|
109
|
+
src_auth: SRC_AUTH,
|
110
|
+
src_port: SRC_PORT,
|
111
|
+
src_host_terminator: SRC_HOST_TERMINATOR,
|
112
|
+
src_path: re_src_path(opts),
|
113
|
+
src_email_name: SRC_EMAIL_NAME,
|
114
|
+
src_xn: SRC_XN,
|
115
|
+
src_domain_root: SRC_DOMAIN_ROOT,
|
116
|
+
src_domain: SRC_DOMAIN,
|
117
|
+
src_host: SRC_HOST,
|
118
|
+
|
119
|
+
tpl_host_fuzzy: TPL_HOST_FUZZY,
|
120
|
+
tpl_host_no_ip_fuzzy: TPL_HOST_NO_IP_FUZZY,
|
121
|
+
src_host_strict: SRC_HOST_STRICT,
|
122
|
+
tpl_host_fuzzy_strict: TPL_HOST_FUZZY_STRICT,
|
123
|
+
src_host_port_strict: SRC_HOST_PORT_STRICT,
|
124
|
+
tpl_host_port_fuzzy_strict: TPL_HOST_PORT_FUZZY_STRICT,
|
125
|
+
tpl_host_port_no_ip_fuzzy_strict: TPL_HOST_PORT_NO_IP_FUZZY_STRICT,
|
126
|
+
|
127
|
+
tpl_host_fuzzy_test: TPL_HOST_FUZZY_TEST,
|
128
|
+
tpl_email_fuzzy: TPL_EMAIL_FUZZY
|
129
|
+
}
|
130
|
+
|
131
|
+
# Fuzzy link can't be prepended with .:/\- and non punctuation.
|
132
|
+
# but can start with > (markdown blockquote)
|
133
|
+
re[:tpl_link_fuzzy] =
|
134
|
+
'(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + SRC_Z_P_CC + '))' +
|
135
|
+
'((?![$+<=>^`|\uff5c])' + TPL_HOST_PORT_FUZZY_STRICT + re[:src_path] + ')'
|
136
|
+
|
137
|
+
# Fuzzy link can't be prepended with .:/\- and non punctuation.
|
138
|
+
# but can start with > (markdown blockquote)
|
139
|
+
re[:tpl_link_no_ip_fuzzy] =
|
140
|
+
'(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + SRC_Z_P_CC + '))' +
|
141
|
+
'((?![$+<=>^`|\uff5c])' + TPL_HOST_PORT_NO_IP_FUZZY_STRICT + re[:src_path] + ')'
|
142
|
+
|
143
|
+
return re
|
144
|
+
end
|
145
|
+
|
146
|
+
#------------------------------------------------------------------------------
|
147
|
+
def re_src_path(opts = nil)
|
148
|
+
'(?:' +
|
149
|
+
'[/?#]' +
|
150
|
+
'(?:' +
|
151
|
+
'(?!' + SRC_Z_CC + '|' + TEXT_SEPARATORS + '|[()\\[\\]{}.,"\'?!\\-]).|' +
|
152
|
+
'\\[(?:(?!' + SRC_Z_CC + '|\\]).)*\\]|' +
|
153
|
+
'\\((?:(?!' + SRC_Z_CC + '|[)]).)*\\)|' +
|
154
|
+
'\\{(?:(?!' + SRC_Z_CC + '|[}]).)*\\}|' +
|
155
|
+
'\\"(?:(?!' + SRC_Z_CC + '|["]).)+\\"|' +
|
156
|
+
"\\'(?:(?!" + SRC_Z_CC + "|[']).)+\\'|" +
|
157
|
+
"\\'(?=" + SRC_PSEUDO_LETTER + '|[-]).|' + # allow `I'm_king` if no pair found
|
158
|
+
'\\.{2,3}[a-zA-Z0-9%/]|' + # github has ... in commit range links. Restrict to
|
159
|
+
# - english
|
160
|
+
# - percent-encoded
|
161
|
+
# - parts of file path
|
162
|
+
# until more examples found.
|
163
|
+
'\\.(?!' + SRC_Z_CC + '|[.]).|' +
|
164
|
+
(opts && opts[:'---'] ?
|
165
|
+
'\\-(?!--(?:[^-]|$))(?:-*)|' # `---` => long dash, terminate
|
166
|
+
:
|
167
|
+
'\\-+|'
|
168
|
+
) +
|
169
|
+
'\\,(?!' + SRC_Z_CC + ').|' + # allow `,,,` in paths
|
170
|
+
'\\!(?!' + SRC_Z_CC + '|[!]).|' +
|
171
|
+
'\\?(?!' + SRC_Z_CC + '|[?]).' +
|
172
|
+
')+' +
|
173
|
+
'|\\/' +
|
174
|
+
')?'
|
175
|
+
end
|
176
|
+
|
122
177
|
end
|
@@ -255,4 +255,23 @@ describe 'API' do
|
|
255
255
|
expect(l.match('1.1.1.1.')[0].text).to eq '1.1.1.1'
|
256
256
|
end
|
257
257
|
|
258
|
+
#------------------------------------------------------------------------------
|
259
|
+
it 'should not hang in fuzzy mode with sequences of astrals' do
|
260
|
+
l = Linkify.new
|
261
|
+
|
262
|
+
l.set({ fuzzyLink: true })
|
263
|
+
|
264
|
+
expect(l.match('😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡 .com')).to eq []
|
265
|
+
end
|
266
|
+
|
267
|
+
#------------------------------------------------------------------------------
|
268
|
+
it 'should accept `---` if enabled' do
|
269
|
+
l = Linkify.new
|
270
|
+
|
271
|
+
expect(l.match('http://e.com/foo---bar')[0].text).to eq 'http://e.com/foo---bar'
|
272
|
+
|
273
|
+
l = Linkify.new(nil, { '---': true })
|
274
|
+
|
275
|
+
expect(l.match('http://e.com/foo---bar')[0].text).to eq 'http://e.com/foo'
|
276
|
+
end
|
258
277
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
require 'byebug'
|
1
|
+
require 'pry-byebug'
|
2
2
|
require 'linkify-it-rb'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkify-it-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Walker
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2018-04-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: uc.micro-rb
|
@@ -25,6 +25,20 @@ dependencies:
|
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '1.0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: bacon-expect
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '1.0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '1.0'
|
28
42
|
description: Ruby version of linkify-it for motion-markdown-it, for Ruby and RubyMotion
|
29
43
|
email: github@digitalmoksha.com
|
30
44
|
executables: []
|
@@ -58,10 +72,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
72
|
version: '0'
|
59
73
|
requirements: []
|
60
74
|
rubyforge_project:
|
61
|
-
rubygems_version: 2.
|
75
|
+
rubygems_version: 2.6.8
|
62
76
|
signing_key:
|
63
77
|
specification_version: 4
|
64
78
|
summary: linkify-it for motion-markdown-it in Ruby
|
65
79
|
test_files:
|
66
|
-
- spec/linkify-it-rb/test_spec.rb
|
67
80
|
- spec/spec_helper.rb
|
81
|
+
- spec/linkify-it-rb/test_spec.rb
|