linkify-it-rb 0.1.0.0 → 1.0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +66 -73
- data/lib/linkify-it-rb/index.rb +18 -26
- data/lib/linkify-it-rb/re.rb +28 -29
- data/lib/linkify-it-rb/version.rb +1 -1
- data/spec/linkify-it-rb/test_spec.rb +153 -166
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c47469712f3d6de315f05caeca61b4c02c52a32
|
4
|
+
data.tar.gz: 3b51dd1d0651a17efb07cacb27df11615f17683e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ace911d1c5873013966ad4c0438e6d933270d126adf686d2544205b906bd2364fd4290b5585967fa06716798061943161af029d3ece148809eef4f47261fa54
|
7
|
+
data.tar.gz: 3c3fdf523803ce1641a40ae659fd8d40508469f44843c2d435ef4fc775c7bc6423638a8c32c66cd03f149b3df35f48ea46671e799b0f2c42af61dbfc6299451f
|
data/README.md
CHANGED
@@ -1,115 +1,108 @@
|
|
1
1
|
# linkify-it-rb
|
2
2
|
|
3
|
-
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/linkify-it-rb.svg)](http://badge.fury.io/rb/linkify-it-rb)
|
4
|
+
|
5
|
+
Links recognition library with full unicode support. Focused on high quality link pattern detection in plain text. For use with both Ruby and RubyMotion.
|
4
6
|
|
5
7
|
This gem is a port of the [linkify-it javascript package](https://github.com/markdown-it/linkify-it) by Vitaly Puzrin, that is used for the [markdown-it](https://github.com/markdown-it/markdown-it) package.
|
6
8
|
|
7
9
|
__[Javascript Demo](http://markdown-it.github.io/linkify-it/)__
|
8
10
|
|
9
|
-
|
11
|
+
Features:
|
10
12
|
|
13
|
+
- Full unicode support, with astral characters
|
14
|
+
- International domain support
|
15
|
+
- Allows rules extension & custom normalizers
|
11
16
|
|
12
|
-
## To be updated: Original Javascript package documentation
|
13
17
|
|
14
|
-
|
18
|
+
Install
|
19
|
+
-------
|
15
20
|
|
16
|
-
|
17
|
-
- International domains support.
|
18
|
-
- Allows rules extension & custom normalizers.
|
21
|
+
### Ruby
|
19
22
|
|
23
|
+
Add it to your project's `Gemfile`
|
20
24
|
|
21
|
-
|
22
|
-
-------
|
25
|
+
gem 'linkify-it-rb'
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
+
and run `bundle install`
|
28
|
+
|
29
|
+
### RubyMotion
|
27
30
|
|
28
|
-
|
31
|
+
Add it to your project's `Gemfile`
|
29
32
|
|
33
|
+
gem 'linkify-it-rb'
|
34
|
+
|
35
|
+
Edit your `Rakefile` and add
|
36
|
+
|
37
|
+
require 'linkify-it-rb'
|
38
|
+
|
39
|
+
and run `bundle install`
|
30
40
|
|
31
41
|
Usage examples
|
32
42
|
--------------
|
33
43
|
|
34
44
|
##### Example 1
|
35
45
|
|
36
|
-
```
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
linkify
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
// schema: "",
|
50
|
-
// index: 5,
|
51
|
-
// lastIndex: 15,
|
52
|
-
// raw: "github.com",
|
53
|
-
// text: "github.com",
|
54
|
-
// url: "http://github.com",
|
55
|
-
// } ]
|
46
|
+
```ruby
|
47
|
+
linkify = Linkify.new
|
48
|
+
|
49
|
+
# add unoffocial `.mydomain` domain.
|
50
|
+
linkify.tlds('.mydomain', true) # Add unofficial `.mydomain` domain
|
51
|
+
linkify.add('git:', 'http:') # Add `git:` ptotocol as "alias"
|
52
|
+
linkify.add('ftp:', null) # Disable `ftp:` ptotocol
|
53
|
+
|
54
|
+
linkify.test('Site github.com!'))
|
55
|
+
=> true
|
56
|
+
|
57
|
+
linkify.match('Site github.com!'))
|
58
|
+
=> [#<Linkify::Match @schema="", @index=5, @lastIndex=15, @raw="github.com", @text="github.com", @url="github.com">]
|
56
59
|
```
|
57
60
|
|
58
61
|
##### Exmple 2. Add twitter mentions handler
|
59
62
|
|
60
|
-
```
|
63
|
+
```ruby
|
61
64
|
linkify.add('@', {
|
62
|
-
validate:
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
}
|
78
|
-
return 0;
|
79
|
-
},
|
80
|
-
normalize: function (match) {
|
81
|
-
match.url = 'https://twitter.com/' + match.url.replace(/^@/, '');
|
82
|
-
}
|
83
|
-
});
|
65
|
+
validate: lambda do |text, pos, obj|
|
66
|
+
tail = text.slice(pos..-1)
|
67
|
+
if (!obj.re[:twitter])
|
68
|
+
obj.re[:twitter] = Regexp.new('^([a-zA-Z0-9_]){1,15}(?!_)(?=$|' + LinkifyRe::SRC_Z_P_CC + ')')
|
69
|
+
end
|
70
|
+
if (obj.re[:twitter] =~ tail)
|
71
|
+
return 0 if (pos >= 2 && text[pos - 2] == '@')
|
72
|
+
return tail.match(obj.re[:twitter])[0].length
|
73
|
+
end
|
74
|
+
return 0
|
75
|
+
end,
|
76
|
+
normalize: lambda do |m, obj|
|
77
|
+
m.url = 'https://twitter.com/' + m.url.sub(/^@/, '')
|
78
|
+
end
|
79
|
+
})
|
84
80
|
```
|
85
81
|
|
86
82
|
|
87
83
|
API
|
88
84
|
---
|
89
85
|
|
90
|
-
|
91
|
-
|
92
|
-
### new LinkifyIt(schemas)
|
86
|
+
### LinkifyIt.new(schemas)
|
93
87
|
|
94
88
|
Creates new linkifier instance with optional additional schemas.
|
95
|
-
Can be called without `new` keyword for convenience.
|
96
89
|
|
97
90
|
By default understands:
|
98
91
|
|
99
92
|
- `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links
|
100
93
|
- "fuzzy" links and emails (google.com, foo@bar.com).
|
101
94
|
|
102
|
-
`schemas` is
|
95
|
+
`schemas` is a Hash, where each key/value describes protocol/rule:
|
103
96
|
|
104
97
|
- __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
|
105
|
-
for example). `linkify-it` makes shure that prefix is not preceeded with
|
98
|
+
for example). `linkify-it-rb` makes shure that prefix is not preceeded with
|
106
99
|
alphanumeric char.
|
107
100
|
- __value__ - rule to check tail after link prefix
|
108
101
|
- _String_ - just alias to existing rule
|
109
|
-
-
|
110
|
-
- _validate_ - validator
|
102
|
+
- _Hash_
|
103
|
+
- _validate_ - validator block (should return matched length on success),
|
111
104
|
or `RegExp`.
|
112
|
-
- _normalize_ - optional
|
105
|
+
- _normalize_ - optional block to normalize text & url of matched result
|
113
106
|
(for example, for twitter mentions).
|
114
107
|
|
115
108
|
|
@@ -120,20 +113,20 @@ Searches linkifiable pattern and returns `true` on success or `false` on fail.
|
|
120
113
|
|
121
114
|
### .pretest(text)
|
122
115
|
|
123
|
-
Quick check if link
|
124
|
-
`.test
|
116
|
+
Quick check if link MAYBE can exist. Can be used to optimize more expensive
|
117
|
+
`.test` calls. Return `false` if link can not be found, `true` - if `.test`
|
125
118
|
call needed to know exactly.
|
126
119
|
|
127
120
|
|
128
121
|
### .testSchemaAt(text, name, offset)
|
129
122
|
|
130
|
-
Similar to `.test
|
123
|
+
Similar to `.test` but checks only specific protocol tail exactly at given
|
131
124
|
position. Returns length of found pattern (0 on fail).
|
132
125
|
|
133
126
|
|
134
127
|
### .match(text)
|
135
128
|
|
136
|
-
Returns `Array` of found link matches or
|
129
|
+
Returns `Array` of found link matches or nil if nothing found.
|
137
130
|
|
138
131
|
Each match has:
|
139
132
|
|
@@ -148,8 +141,8 @@ Each match has:
|
|
148
141
|
|
149
142
|
### .tlds(list[, keepOld])
|
150
143
|
|
151
|
-
Load (or merge) new tlds list.
|
152
|
-
to avoid false positives. By default this
|
144
|
+
Load (or merge) new tlds list. These are used for fuzzy links (without prefix)
|
145
|
+
to avoid false positives. By default this algorithm uses:
|
153
146
|
|
154
147
|
- hostname with any 2-letter root zones are ok.
|
155
148
|
- biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
|
@@ -162,9 +155,9 @@ If list is replaced, then exact match for 2-chars root zones will be checked.
|
|
162
155
|
### .add(schema, definition)
|
163
156
|
|
164
157
|
Add new rule with `schema` prefix. For definition details see constructor
|
165
|
-
description. To disable existing rule use `.add(name,
|
158
|
+
description. To disable existing rule use `.add(name, nil)`
|
166
159
|
|
167
160
|
|
168
161
|
## License
|
169
162
|
|
170
|
-
[MIT](https://github.com/
|
163
|
+
[MIT](https://github.com/digitalmoksha/linkify-it-rb/blob/master/LICENSE)
|
data/lib/linkify-it-rb/index.rb
CHANGED
@@ -87,25 +87,17 @@ class Linkify
|
|
87
87
|
#
|
88
88
|
#------------------------------------------------------------------------------
|
89
89
|
def compile
|
90
|
-
|
91
|
-
# Load & clone RE patterns.
|
92
|
-
re = @re = {} #.merge!(require('./lib/re'))
|
90
|
+
@re = { src_xn: LinkifyRe::SRC_XN }
|
93
91
|
|
94
92
|
# Define dynamic patterns
|
95
93
|
tlds = @__tlds__.dup
|
94
|
+
tlds.push('[a-z]{2}') if (!@__tlds_replaced__)
|
95
|
+
tlds.push(@re[:src_xn])
|
96
96
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
re[:src_tlds] = tlds.join('|')
|
103
|
-
|
104
|
-
untpl = lambda { |tpl| tpl.gsub('%TLDS%', re[:src_tlds]) }
|
105
|
-
|
106
|
-
re[:email_fuzzy] = Regexp.new(LinkifyRe::TPL_EMAIL_FUZZY.gsub('%TLDS%', re[:src_tlds]), true)
|
107
|
-
re[:link_fuzzy] = Regexp.new(LinkifyRe::TPL_LINK_FUZZY.gsub('%TLDS%', re[:src_tlds]), true)
|
108
|
-
re[:host_fuzzy_test] = Regexp.new(LinkifyRe::TPL_HOST_FUZZY_TEST.gsub('%TLDS%', re[:src_tlds]), true)
|
97
|
+
@re[:src_tlds] = tlds.join('|')
|
98
|
+
@re[:email_fuzzy] = Regexp.new(LinkifyRe::TPL_EMAIL_FUZZY.gsub('%TLDS%', @re[:src_tlds]), true)
|
99
|
+
@re[:link_fuzzy] = Regexp.new(LinkifyRe::TPL_LINK_FUZZY.gsub('%TLDS%', @re[:src_tlds]), true)
|
100
|
+
@re[:host_fuzzy_test] = Regexp.new(LinkifyRe::TPL_HOST_FUZZY_TEST.gsub('%TLDS%', @re[:src_tlds]), true)
|
109
101
|
|
110
102
|
#
|
111
103
|
# Compile each schema
|
@@ -180,8 +172,8 @@ class Linkify
|
|
180
172
|
slist = @__compiled__.select {|name, val| name.length > 0 && !val.nil? }.keys.map {|str| escapeRE(str)}.join('|')
|
181
173
|
|
182
174
|
# (?!_) cause 1.5x slowdown
|
183
|
-
@re[:schema_test] = Regexp.new('(^|(?!_)(?:>|' + LinkifyRe::
|
184
|
-
@re[:schema_search] = Regexp.new('(^|(?!_)(?:>|' + LinkifyRe::
|
175
|
+
@re[:schema_test] = Regexp.new('(^|(?!_)(?:>|' + LinkifyRe::SRC_Z_P_CC + '))(' + slist + ')', 'i')
|
176
|
+
@re[:schema_search] = Regexp.new('(^|(?!_)(?:>|' + LinkifyRe::SRC_Z_P_CC + '))(' + slist + ')', 'ig')
|
185
177
|
|
186
178
|
@re[:pretest] = Regexp.new(
|
187
179
|
'(' + @re[:schema_test].source + ')|' +
|
@@ -318,14 +310,15 @@ class Linkify
|
|
318
310
|
@__index__ = -1
|
319
311
|
|
320
312
|
return false if (!text.length)
|
321
|
-
|
313
|
+
|
322
314
|
# try to scan for link with schema - that's the most simple rule
|
323
315
|
if @re[:schema_test] =~ text
|
324
316
|
re = @re[:schema_search]
|
325
|
-
|
326
|
-
while ((m = re.match(text)) != nil)
|
327
|
-
|
328
|
-
|
317
|
+
lastIndex = 0
|
318
|
+
while ((m = re.match(text, lastIndex)) != nil)
|
319
|
+
lastIndex = m.end(0)
|
320
|
+
len = testSchemaAt(text, m[2], lastIndex)
|
321
|
+
if len > 0
|
329
322
|
@__schema__ = m[2]
|
330
323
|
@__index__ = m.begin(0) + m[1].length
|
331
324
|
@__last_index__ = m.begin(0) + m[0].length + len
|
@@ -334,9 +327,8 @@ class Linkify
|
|
334
327
|
end
|
335
328
|
end
|
336
329
|
|
330
|
+
# guess schemaless links
|
337
331
|
if (@__compiled__['http:'])
|
338
|
-
# guess schemaless links
|
339
|
-
|
340
332
|
tld_pos = text.index(@re[:host_fuzzy_test])
|
341
333
|
if !tld_pos.nil?
|
342
334
|
# if tld is located after found link - no need to check fuzzy pattern
|
@@ -355,8 +347,8 @@ class Linkify
|
|
355
347
|
end
|
356
348
|
end
|
357
349
|
|
350
|
+
# guess schemaless emails
|
358
351
|
if (@__compiled__['mailto:'])
|
359
|
-
# guess schemaless emails
|
360
352
|
at_pos = text.index('@')
|
361
353
|
if !at_pos.nil?
|
362
354
|
# We can't skip this check, because this cases are possible:
|
@@ -410,7 +402,7 @@ class Linkify
|
|
410
402
|
|
411
403
|
# LinkifyIt#match(text) -> Array|null
|
412
404
|
#
|
413
|
-
# Returns array of found link descriptions or `null` on fail. We strongly
|
405
|
+
# Returns array of found link descriptions or `null` on fail. We strongly suggest
|
414
406
|
# to use [[LinkifyIt#test]] first, for best speed.
|
415
407
|
#
|
416
408
|
# ##### Result match description
|
data/lib/linkify-it-rb/re.rb
CHANGED
@@ -1,58 +1,57 @@
|
|
1
1
|
module LinkifyRe
|
2
2
|
|
3
3
|
# Use direct extract instead of `regenerate` to reduce size
|
4
|
-
SRC_ANY = UCMicro::Properties::Any::REGEX
|
5
|
-
SRC_CC = UCMicro::Categories::Cc::REGEX
|
6
|
-
|
7
|
-
|
8
|
-
SRC_P = UCMicro::Categories::P::REGEX
|
4
|
+
SRC_ANY = UCMicro::Properties::Any::REGEX.source
|
5
|
+
SRC_CC = UCMicro::Categories::Cc::REGEX.source
|
6
|
+
SRC_Z = UCMicro::Categories::Z::REGEX.source
|
7
|
+
SRC_P = UCMicro::Categories::P::REGEX.source
|
9
8
|
|
10
|
-
# \p{\Z\P\Cc
|
11
|
-
|
9
|
+
# \p{\Z\P\Cc} (white spaces + control + punctuation)
|
10
|
+
SRC_Z_P_CC = [ SRC_Z, SRC_P, SRC_CC ].join('|')
|
12
11
|
|
13
|
-
# \p{\Z\Cc
|
14
|
-
|
12
|
+
# \p{\Z\Cc} (white spaces + control)
|
13
|
+
SRC_Z_CC = [ SRC_Z, SRC_CC ].join('|')
|
15
14
|
|
16
15
|
# All possible word characters (everything without punctuation, spaces & controls)
|
17
16
|
# Defined via punctuation & spaces to save space
|
18
17
|
# Should be something like \p{\L\N\S\M} (\w but without `_`)
|
19
|
-
SRC_PSEUDO_LETTER = '(?:(?!' +
|
18
|
+
SRC_PSEUDO_LETTER = '(?:(?!' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
20
19
|
# The same as above but without [0-9]
|
21
|
-
SRC_PSEUDO_LETTER_NON_D = '(?:(?![0-9]|' +
|
20
|
+
SRC_PSEUDO_LETTER_NON_D = '(?:(?![0-9]|' + SRC_Z_P_CC + ')' + SRC_ANY + ')'
|
22
21
|
|
23
22
|
#------------------------------------------------------------------------------
|
24
23
|
|
25
24
|
SRC_IP4 = '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
|
26
|
-
SRC_AUTH = '(?:(?:(?!' +
|
25
|
+
SRC_AUTH = '(?:(?:(?!' + SRC_Z_CC + ').)+@)?'
|
27
26
|
|
28
27
|
SRC_PORT = '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?'
|
29
28
|
|
30
|
-
SRC_HOST_TERMINATOR = '(?=$|' +
|
29
|
+
SRC_HOST_TERMINATOR = '(?=$|' + SRC_Z_P_CC + ')(?!-|_|:\\d|\\.-|\\.(?!$|' + SRC_Z_P_CC + '))'
|
31
30
|
|
32
31
|
SRC_PATH =
|
33
32
|
'(?:' +
|
34
33
|
'[/?#]' +
|
35
34
|
'(?:' +
|
36
|
-
'(?!' +
|
37
|
-
'\\[(?:(?!' +
|
38
|
-
'\\((?:(?!' +
|
39
|
-
'\\{(?:(?!' +
|
40
|
-
'\\"(?:(?!' +
|
41
|
-
"\\'(?:(?!" +
|
35
|
+
'(?!' + SRC_Z_CC + '|[()\\[\\]{}.,"\'?!\\-]).|' +
|
36
|
+
'\\[(?:(?!' + SRC_Z_CC + '|\\]).)*\\]|' +
|
37
|
+
'\\((?:(?!' + SRC_Z_CC + '|[)]).)*\\)|' +
|
38
|
+
'\\{(?:(?!' + SRC_Z_CC + '|[}]).)*\\}|' +
|
39
|
+
'\\"(?:(?!' + SRC_Z_CC + '|["]).)+\\"|' +
|
40
|
+
"\\'(?:(?!" + SRC_Z_CC + "|[']).)+\\'|" +
|
42
41
|
"\\'(?=" + SRC_PSEUDO_LETTER + ').|' + # allow `I'm_king` if no pair found
|
43
42
|
'\\.{2,3}[a-zA-Z0-9%]|' + # github has ... in commit range links. Restrict to
|
44
43
|
# english & percent-encoded only, until more examples found.
|
45
|
-
'\\.(?!' +
|
46
|
-
'\\-(?!' +
|
47
|
-
'\\,(?!' +
|
48
|
-
'\\!(?!' +
|
49
|
-
'\\?(?!' +
|
44
|
+
'\\.(?!' + SRC_Z_CC + '|[.]).|' +
|
45
|
+
'\\-(?!' + SRC_Z_CC + '|--(?:[^-]|$))(?:[-]+|.)|' + # `---` => long dash, terminate
|
46
|
+
'\\,(?!' + SRC_Z_CC + ').|' + # allow `,,,` in paths
|
47
|
+
'\\!(?!' + SRC_Z_CC + '|[!]).|' +
|
48
|
+
'\\?(?!' + SRC_Z_CC + '|[?]).' +
|
50
49
|
')+' +
|
51
50
|
'|\\/' +
|
52
51
|
')?'
|
53
52
|
|
54
53
|
SRC_EMAIL_NAME = '[\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]+'
|
55
|
-
SRC_XN = 'xn--[a-z0-9\\-]{1,59}'
|
54
|
+
SRC_XN = 'xn--[a-z0-9\\-]{1,59}'
|
56
55
|
|
57
56
|
# More to read about domain names
|
58
57
|
# http://serverfault.com/questions/638260/
|
@@ -89,7 +88,7 @@ module LinkifyRe
|
|
89
88
|
SRC_IP4 +
|
90
89
|
'|' +
|
91
90
|
'(?:(?:(?:' + SRC_DOMAIN + ')\\.)+(?:%TLDS%))' +
|
92
|
-
')'
|
91
|
+
')'
|
93
92
|
|
94
93
|
SRC_HOST_STRICT = SRC_HOST + SRC_HOST_TERMINATOR
|
95
94
|
TPL_HOST_FUZZY_STRICT = TPL_HOST_FUZZY + SRC_HOST_TERMINATOR
|
@@ -100,12 +99,12 @@ module LinkifyRe
|
|
100
99
|
# Main rules
|
101
100
|
|
102
101
|
# Rude test fuzzy links by host, for quick deny
|
103
|
-
TPL_HOST_FUZZY_TEST = 'localhost|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' +
|
104
|
-
TPL_EMAIL_FUZZY = '(^|>|' +
|
102
|
+
TPL_HOST_FUZZY_TEST = 'localhost|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + SRC_Z_P_CC + '|$))'
|
103
|
+
TPL_EMAIL_FUZZY = '(^|>|' + SRC_Z_CC + ')(' + SRC_EMAIL_NAME + '@' + TPL_HOST_FUZZY_STRICT + ')'
|
105
104
|
TPL_LINK_FUZZY =
|
106
105
|
# Fuzzy link can't be prepended with .:/\- and non punctuation.
|
107
106
|
# but can start with > (markdown blockquote)
|
108
|
-
'(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' +
|
107
|
+
'(^|(?![.:/\\-_@])(?:[$+<=>^`|]|' + SRC_Z_P_CC + '))' +
|
109
108
|
'((?![$+<=>^`|])' + TPL_HOST_PORT_FUZZY_STRICT + SRC_PATH + ')'
|
110
109
|
|
111
110
|
end
|
@@ -1,49 +1,43 @@
|
|
1
|
+
fixture_dir = File.join(File.dirname(__FILE__), 'fixtures')
|
2
|
+
|
1
3
|
#------------------------------------------------------------------------------
|
2
4
|
describe 'links' do
|
3
5
|
|
4
|
-
# TODO tests which can't seem to get passing at the moment, so skip them
|
5
|
-
failing_test = [
|
6
|
-
95, # GOOGLE.COM. unable to get final . to be removed
|
7
|
-
214 # xn--d1abbgf6aiiy.xn--p1ai
|
8
|
-
]
|
9
|
-
|
10
6
|
l = Linkify.new
|
11
7
|
l.bypass_normalizer = true # kill the normalizer
|
12
|
-
|
8
|
+
|
13
9
|
skipNext = false
|
14
|
-
linkfile = File.join(
|
10
|
+
linkfile = File.join(fixture_dir, 'links.txt')
|
15
11
|
lines = File.read(linkfile).split(/\r?\n/)
|
16
12
|
lines.each_with_index do |line, idx|
|
17
13
|
if skipNext
|
18
14
|
skipNext = false
|
19
15
|
next
|
20
16
|
end
|
21
|
-
|
17
|
+
|
22
18
|
line = line.sub(/^%.*/, '')
|
23
19
|
next_line = (lines[idx + 1] || '').sub(/^%.*/, '')
|
24
20
|
|
25
21
|
next if line.strip.empty?
|
26
22
|
|
27
|
-
|
28
|
-
if !next_line.strip.empty?
|
23
|
+
if !next_line.strip.empty?
|
29
24
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
25
|
+
it "line #{idx + 1}" do
|
26
|
+
expect(l.pretest(line)).to eq true # "(pretest failed in `#{line}`)"
|
27
|
+
expect(l.test("\n#{line}\n")).to eq true # "(link not found in `\n#{line}\n`)"
|
28
|
+
expect(l.test(line)).to eq true # "(link not found in `#{line}`)"
|
29
|
+
expect(l.match(line)[0].url).to eq next_line
|
30
|
+
end
|
36
31
|
|
37
|
-
|
38
|
-
|
39
|
-
else
|
32
|
+
skipNext = true
|
40
33
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
34
|
+
else
|
35
|
+
|
36
|
+
it "line #{idx + 1}" do
|
37
|
+
expect(l.pretest(line)).to eq true # "(pretest failed in `#{line}`)"
|
38
|
+
expect(l.test("\n#{line}\n")).to eq true # "(link not found in `\n#{line}\n`)"
|
39
|
+
expect(l.test(line)).to eq true # "(link not found in `#{line}`)"
|
40
|
+
expect(l.match(line)[0].url).to eq line
|
47
41
|
end
|
48
42
|
end
|
49
43
|
end
|
@@ -54,26 +48,18 @@ end
|
|
54
48
|
#------------------------------------------------------------------------------
|
55
49
|
describe 'not links' do
|
56
50
|
|
57
|
-
# TODO tests which can't seem to get passing at the moment, so skip them
|
58
|
-
failing_test = [ 6, 7, 8, 12, 16, 19, 22, 23, 24, 25, 26, 27, 28, 29, 48 ]
|
59
|
-
|
60
51
|
l = Linkify.new
|
61
52
|
l.bypass_normalizer = true # kill the normalizer
|
62
53
|
|
63
|
-
linkfile = File.join(
|
54
|
+
linkfile = File.join(fixture_dir, 'not_links.txt')
|
64
55
|
lines = File.read(linkfile).split(/\r?\n/)
|
65
56
|
lines.each_with_index do |line, idx|
|
66
57
|
line = line.sub(/^%.*/, '')
|
67
58
|
|
68
59
|
next if line.strip.empty?
|
69
60
|
|
70
|
-
|
71
|
-
|
72
|
-
# assert.notOk(l.test(line),
|
73
|
-
# '(should not find link in `' + line + '`, but found `' +
|
74
|
-
# JSON.stringify((l.match(line) || [])[0]) + '`)');
|
75
|
-
expect(l.test(line)).not_to eq true
|
76
|
-
end
|
61
|
+
it "line #{idx + 1}" do
|
62
|
+
expect(l.test(line)).not_to eq true
|
77
63
|
end
|
78
64
|
end
|
79
65
|
|
@@ -93,142 +79,143 @@ describe 'API' do
|
|
93
79
|
expect(l.test('google.myroot')).to eq true
|
94
80
|
expect(l.test('google.xyz')).to_not eq true
|
95
81
|
|
96
|
-
# this is some other package of tlds which we don't have
|
82
|
+
# TODO this is some other package of tlds which we don't have
|
83
|
+
# https://github.com/stephenmathieson/node-tlds
|
84
|
+
# instead we should be using Public Suffix List
|
85
|
+
# https://github.com/weppos/publicsuffix-ruby
|
97
86
|
# l.tlds(require('tlds'));
|
98
87
|
# assert.ok(l.test('google.xyz'));
|
99
88
|
# assert.notOk(l.test('google.myroot'));
|
100
89
|
end
|
101
90
|
|
102
91
|
|
103
|
-
# TODO Tests not passing
|
104
92
|
#------------------------------------------------------------------------------
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
93
|
+
it 'add rule as regexp, with default normalizer' do
|
94
|
+
l = Linkify.new.add('my:', {validate: /^\/\/[a-z]+/} )
|
95
|
+
|
96
|
+
match = l.match('google.com. my:// my://asdf!')
|
97
|
+
|
98
|
+
expect(match[0].text).to eq 'google.com'
|
99
|
+
expect(match[1].text).to eq 'my://asdf'
|
100
|
+
end
|
101
|
+
|
102
|
+
#------------------------------------------------------------------------------
|
103
|
+
it 'add rule with normalizer' do
|
104
|
+
l = Linkify.new.add('my:', {
|
105
|
+
validate: /^\/\/[a-z]+/,
|
106
|
+
normalize: lambda do |m, obj|
|
107
|
+
m.text = m.text.sub(/^my:\/\//, '').upcase
|
108
|
+
m.url = m.url.upcase
|
109
|
+
end
|
110
|
+
})
|
111
|
+
|
112
|
+
match = l.match('google.com. my:// my://asdf!')
|
113
|
+
|
114
|
+
expect(match[1].text).to eq 'ASDF'
|
115
|
+
expect(match[1].url).to eq 'MY://ASDF'
|
116
|
+
end
|
117
|
+
|
115
118
|
#------------------------------------------------------------------------------
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
#
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
#
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
# return false;
|
218
|
-
# }
|
219
|
-
# return tail.match(self.re.twitter)[0].length;
|
220
|
-
# }
|
221
|
-
# return 0;
|
222
|
-
# },
|
223
|
-
# normalize: function (m) {
|
224
|
-
# m.url = 'https://twitter.com/' + m.url.replace(/^@/, '');
|
225
|
-
# }
|
226
|
-
# });
|
227
|
-
#
|
228
|
-
# assert.equal(l.match('hello, @gamajoba_!')[0].text, '@gamajoba_');
|
229
|
-
# assert.equal(l.match(':@givi')[0].text, '@givi');
|
230
|
-
# assert.equal(l.match(':@givi')[0].url, 'https://twitter.com/givi');
|
231
|
-
# assert.notOk(l.test('@@invalid'));
|
232
|
-
# });
|
119
|
+
it 'disable rule' do
|
120
|
+
l = Linkify.new
|
121
|
+
|
122
|
+
expect(l.test('http://google.com')).to eq true
|
123
|
+
expect(l.test('foo@bar.com')).to eq true
|
124
|
+
l.add('http:', nil)
|
125
|
+
l.add('mailto:', nil)
|
126
|
+
expect(l.test('http://google.com')).to eq false
|
127
|
+
expect(l.test('foo@bar.com')).to eq false
|
128
|
+
end
|
129
|
+
|
130
|
+
#------------------------------------------------------------------------------
|
131
|
+
it 'add bad definition' do
|
132
|
+
l = Linkify.new
|
133
|
+
|
134
|
+
expect {
|
135
|
+
l.add('test:', [])
|
136
|
+
}.to raise_error(StandardError)
|
137
|
+
|
138
|
+
l = Linkify.new
|
139
|
+
|
140
|
+
expect {
|
141
|
+
l.add('test:', {validate: []})
|
142
|
+
}.to raise_error(StandardError)
|
143
|
+
|
144
|
+
l = Linkify.new
|
145
|
+
|
146
|
+
expect {
|
147
|
+
l.add('test:', {validate: []})
|
148
|
+
}.to raise_error(StandardError)
|
149
|
+
|
150
|
+
expect {
|
151
|
+
l.add('test:', {
|
152
|
+
validate: lambda { return false },
|
153
|
+
normalize: 'bad'
|
154
|
+
})
|
155
|
+
}.to raise_error(StandardError)
|
156
|
+
end
|
157
|
+
|
158
|
+
|
159
|
+
#------------------------------------------------------------------------------
|
160
|
+
it 'test at position' do
|
161
|
+
l = Linkify.new
|
162
|
+
expect(l.testSchemaAt('http://google.com', 'http:', 5) > 0).to eq true
|
163
|
+
expect(l.testSchemaAt('http://google.com', 'HTTP:', 5) > 0).to eq true
|
164
|
+
expect(l.testSchemaAt('http://google.com', 'http:', 6) > 0).to eq false
|
165
|
+
expect(l.testSchemaAt('http://google.com', 'bad_schema:', 6) > 0).to eq false
|
166
|
+
end
|
167
|
+
|
168
|
+
#------------------------------------------------------------------------------
|
169
|
+
it 'correct cache value' do
|
170
|
+
l = Linkify.new
|
171
|
+
match = l.match('.com. http://google.com google.com ftp://google.com')
|
172
|
+
|
173
|
+
expect(match[0].text).to eq 'http://google.com'
|
174
|
+
expect(match[1].text).to eq 'google.com'
|
175
|
+
expect(match[2].text).to eq 'ftp://google.com'
|
176
|
+
end
|
177
|
+
|
178
|
+
#------------------------------------------------------------------------------
|
179
|
+
it 'normalize' do
|
180
|
+
l = Linkify.new
|
181
|
+
m = l.match('mailto:foo@bar.com')[0]
|
182
|
+
|
183
|
+
# assert.equal(m.text, 'foo@bar.com');
|
184
|
+
expect(m.url).to eq 'mailto:foo@bar.com'
|
185
|
+
|
186
|
+
m = l.match('foo@bar.com')[0]
|
187
|
+
|
188
|
+
# assert.equal(m.text, 'foo@bar.com');
|
189
|
+
expect(m.url).to eq 'mailto:foo@bar.com'
|
190
|
+
end
|
191
|
+
|
192
|
+
#------------------------------------------------------------------------------
|
193
|
+
it 'test @twitter rule' do
|
194
|
+
l = Linkify.new.add('@', {
|
195
|
+
validate: lambda do |text, pos, obj|
|
196
|
+
tail = text.slice(pos..-1)
|
197
|
+
if (!obj.re[:twitter])
|
198
|
+
obj.re[:twitter] = Regexp.new(
|
199
|
+
'^([a-zA-Z0-9_]){1,15}(?!_)(?=$|' + LinkifyRe::SRC_Z_P_CC + ')'
|
200
|
+
)
|
201
|
+
end
|
202
|
+
if (obj.re[:twitter] =~ tail)
|
203
|
+
if (pos >= 2 && text[pos - 2] == '@')
|
204
|
+
return 0
|
205
|
+
end
|
206
|
+
return tail.match(obj.re[:twitter])[0].length
|
207
|
+
end
|
208
|
+
return 0
|
209
|
+
end,
|
210
|
+
normalize: lambda do |m, obj|
|
211
|
+
m.url = 'https://twitter.com/' + m.url.sub(/^@/, '')
|
212
|
+
end
|
213
|
+
})
|
214
|
+
|
215
|
+
expect(l.match('hello, @gamajoba_!')[0].text).to eq '@gamajoba_'
|
216
|
+
expect(l.match(':@givi')[0].text).to eq '@givi'
|
217
|
+
expect(l.match(':@givi')[0].url).to eq 'https://twitter.com/givi'
|
218
|
+
expect(l.test('@@invalid')).to eq false
|
219
|
+
end
|
233
220
|
|
234
221
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkify-it-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Walker
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-04-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: uc.micro-rb
|