twitter-text-js-rails 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +22 -0
- data/README.md +23 -0
- data/lib/twitter-text-js-rails.rb +8 -0
- data/lib/twitter-text-js-rails/version.rb +5 -0
- data/vendor/assets/javascripts/twitter-text.js +1328 -0
- metadata +50 -0
data/MIT-LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Angular.JS RubyGem Copyright (c) 2012 Hirav Gandhi
|
|
2
|
+
|
|
3
|
+
Angular.JS and related components Copyright (c) 2010-2012 Google Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
6
|
+
a copy of this software and associated documentation files (the
|
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
11
|
+
the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be
|
|
14
|
+
included in all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# twitter-text-js-rails
|
|
2
|
+
|
|
3
|
+
twitter-text-js-rails is a simple wrapper for the
|
|
4
|
+
[twitter-text-js](https://github.com/twitter/twitter-text-js) library for use in Rails 3.1 and later.
|
|
5
|
+
|
|
6
|
+
## Usage
|
|
7
|
+
|
|
8
|
+
Add the following to your gemfile:
|
|
9
|
+
|
|
10
|
+
gem 'twitter-text-js-rails'
|
|
11
|
+
|
|
12
|
+
Add the following directive to your Javascript manifest file (application.js):
|
|
13
|
+
|
|
14
|
+
//= require twitter-text-js
|
|
15
|
+
|
|
16
|
+
## Versioning
|
|
17
|
+
We will attempt to mirror the versioning of twitter-text-js to the best
|
|
18
|
+
of our ability. The major, minor, and patch version numbers mirror the wrapped
|
|
19
|
+
twitter-text-js version number
|
|
20
|
+
|
|
21
|
+
## Credits
|
|
22
|
+
This gem blatantly copies from the
|
|
23
|
+
[angularjs-rails](https://github.com/hiravgandhi/angularjs-rails) gem.
|
|
@@ -0,0 +1,1328 @@
|
|
|
1
|
+
(function() {
|
|
2
|
+
if (typeof twttr === "undefined" || twttr === null) {
|
|
3
|
+
var twttr = {};
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
twttr.txt = {};
|
|
7
|
+
twttr.txt.regexen = {};
|
|
8
|
+
|
|
9
|
+
var HTML_ENTITIES = {
|
|
10
|
+
'&': '&',
|
|
11
|
+
'>': '>',
|
|
12
|
+
'<': '<',
|
|
13
|
+
'"': '"',
|
|
14
|
+
"'": '''
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
// HTML escaping
|
|
18
|
+
twttr.txt.htmlEscape = function(text) {
|
|
19
|
+
return text && text.replace(/[&"'><]/g, function(character) {
|
|
20
|
+
return HTML_ENTITIES[character];
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
// Builds a RegExp
|
|
25
|
+
function regexSupplant(regex, flags) {
|
|
26
|
+
flags = flags || "";
|
|
27
|
+
if (typeof regex !== "string") {
|
|
28
|
+
if (regex.global && flags.indexOf("g") < 0) {
|
|
29
|
+
flags += "g";
|
|
30
|
+
}
|
|
31
|
+
if (regex.ignoreCase && flags.indexOf("i") < 0) {
|
|
32
|
+
flags += "i";
|
|
33
|
+
}
|
|
34
|
+
if (regex.multiline && flags.indexOf("m") < 0) {
|
|
35
|
+
flags += "m";
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
regex = regex.source;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return new RegExp(regex.replace(/#\{(\w+)\}/g, function(match, name) {
|
|
42
|
+
var newRegex = twttr.txt.regexen[name] || "";
|
|
43
|
+
if (typeof newRegex !== "string") {
|
|
44
|
+
newRegex = newRegex.source;
|
|
45
|
+
}
|
|
46
|
+
return newRegex;
|
|
47
|
+
}), flags);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
twttr.txt.regexSupplant = regexSupplant;
|
|
51
|
+
|
|
52
|
+
// simple string interpolation
|
|
53
|
+
function stringSupplant(str, values) {
|
|
54
|
+
return str.replace(/#\{(\w+)\}/g, function(match, name) {
|
|
55
|
+
return values[name] || "";
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
twttr.txt.stringSupplant = stringSupplant;
|
|
60
|
+
|
|
61
|
+
function addCharsToCharClass(charClass, start, end) {
|
|
62
|
+
var s = String.fromCharCode(start);
|
|
63
|
+
if (end !== start) {
|
|
64
|
+
s += "-" + String.fromCharCode(end);
|
|
65
|
+
}
|
|
66
|
+
charClass.push(s);
|
|
67
|
+
return charClass;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
twttr.txt.addCharsToCharClass = addCharsToCharClass;
|
|
71
|
+
|
|
72
|
+
// Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand
|
|
73
|
+
// to access both the list of characters and a pattern suitible for use with String#split
|
|
74
|
+
// Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE
|
|
75
|
+
var fromCode = String.fromCharCode;
|
|
76
|
+
var UNICODE_SPACES = [
|
|
77
|
+
fromCode(0x0020), // White_Space # Zs SPACE
|
|
78
|
+
fromCode(0x0085), // White_Space # Cc <control-0085>
|
|
79
|
+
fromCode(0x00A0), // White_Space # Zs NO-BREAK SPACE
|
|
80
|
+
fromCode(0x1680), // White_Space # Zs OGHAM SPACE MARK
|
|
81
|
+
fromCode(0x180E), // White_Space # Zs MONGOLIAN VOWEL SEPARATOR
|
|
82
|
+
fromCode(0x2028), // White_Space # Zl LINE SEPARATOR
|
|
83
|
+
fromCode(0x2029), // White_Space # Zp PARAGRAPH SEPARATOR
|
|
84
|
+
fromCode(0x202F), // White_Space # Zs NARROW NO-BREAK SPACE
|
|
85
|
+
fromCode(0x205F), // White_Space # Zs MEDIUM MATHEMATICAL SPACE
|
|
86
|
+
fromCode(0x3000) // White_Space # Zs IDEOGRAPHIC SPACE
|
|
87
|
+
];
|
|
88
|
+
addCharsToCharClass(UNICODE_SPACES, 0x009, 0x00D); // White_Space # Cc [5] <control-0009>..<control-000D>
|
|
89
|
+
addCharsToCharClass(UNICODE_SPACES, 0x2000, 0x200A); // White_Space # Zs [11] EN QUAD..HAIR SPACE
|
|
90
|
+
|
|
91
|
+
var INVALID_CHARS = [
|
|
92
|
+
fromCode(0xFFFE),
|
|
93
|
+
fromCode(0xFEFF), // BOM
|
|
94
|
+
fromCode(0xFFFF) // Special
|
|
95
|
+
];
|
|
96
|
+
addCharsToCharClass(INVALID_CHARS, 0x202A, 0x202E); // Directional change
|
|
97
|
+
|
|
98
|
+
twttr.txt.regexen.spaces_group = regexSupplant(UNICODE_SPACES.join(""));
|
|
99
|
+
twttr.txt.regexen.spaces = regexSupplant("[" + UNICODE_SPACES.join("") + "]");
|
|
100
|
+
twttr.txt.regexen.invalid_chars_group = regexSupplant(INVALID_CHARS.join(""));
|
|
101
|
+
twttr.txt.regexen.punct = /\!'#%&'\(\)*\+,\\\-\.\/:;<=>\?@\[\]\^_{|}~\$/;
|
|
102
|
+
twttr.txt.regexen.rtl_chars = /[\u0600-\u06FF]|[\u0750-\u077F]|[\u0590-\u05FF]|[\uFE70-\uFEFF]/mg;
|
|
103
|
+
twttr.txt.regexen.non_bmp_code_pairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/mg;
|
|
104
|
+
|
|
105
|
+
var nonLatinHashtagChars = [];
|
|
106
|
+
// Cyrillic
|
|
107
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x0400, 0x04ff); // Cyrillic
|
|
108
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x0500, 0x0527); // Cyrillic Supplement
|
|
109
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x2de0, 0x2dff); // Cyrillic Extended A
|
|
110
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xa640, 0xa69f); // Cyrillic Extended B
|
|
111
|
+
// Hebrew
|
|
112
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x0591, 0x05bf); // Hebrew
|
|
113
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x05c1, 0x05c2);
|
|
114
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x05c4, 0x05c5);
|
|
115
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x05c7, 0x05c7);
|
|
116
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x05d0, 0x05ea);
|
|
117
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x05f0, 0x05f4);
|
|
118
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfb12, 0xfb28); // Hebrew Presentation Forms
|
|
119
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfb2a, 0xfb36);
|
|
120
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfb38, 0xfb3c);
|
|
121
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfb3e, 0xfb3e);
|
|
122
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfb40, 0xfb41);
|
|
123
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfb43, 0xfb44);
|
|
124
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfb46, 0xfb4f);
|
|
125
|
+
// Arabic
|
|
126
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x0610, 0x061a); // Arabic
|
|
127
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x0620, 0x065f);
|
|
128
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x066e, 0x06d3);
|
|
129
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x06d5, 0x06dc);
|
|
130
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x06de, 0x06e8);
|
|
131
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x06ea, 0x06ef);
|
|
132
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x06fa, 0x06fc);
|
|
133
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x06ff, 0x06ff);
|
|
134
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x0750, 0x077f); // Arabic Supplement
|
|
135
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x08a0, 0x08a0); // Arabic Extended A
|
|
136
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x08a2, 0x08ac);
|
|
137
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x08e4, 0x08fe);
|
|
138
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfb50, 0xfbb1); // Arabic Pres. Forms A
|
|
139
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfbd3, 0xfd3d);
|
|
140
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfd50, 0xfd8f);
|
|
141
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfd92, 0xfdc7);
|
|
142
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfdf0, 0xfdfb);
|
|
143
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfe70, 0xfe74); // Arabic Pres. Forms B
|
|
144
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xfe76, 0xfefc);
|
|
145
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x200c, 0x200c); // Zero-Width Non-Joiner
|
|
146
|
+
// Thai
|
|
147
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x0e01, 0x0e3a);
|
|
148
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x0e40, 0x0e4e);
|
|
149
|
+
// Hangul (Korean)
|
|
150
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x1100, 0x11ff); // Hangul Jamo
|
|
151
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x3130, 0x3185); // Hangul Compatibility Jamo
|
|
152
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xA960, 0xA97F); // Hangul Jamo Extended-A
|
|
153
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xAC00, 0xD7AF); // Hangul Syllables
|
|
154
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xD7B0, 0xD7FF); // Hangul Jamo Extended-B
|
|
155
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xFFA1, 0xFFDC); // half-width Hangul
|
|
156
|
+
// Japanese and Chinese
|
|
157
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x30A1, 0x30FA); // Katakana (full-width)
|
|
158
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x30FC, 0x30FE); // Katakana Chouon and iteration marks (full-width)
|
|
159
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xFF66, 0xFF9F); // Katakana (half-width)
|
|
160
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xFF70, 0xFF70); // Katakana Chouon (half-width)
|
|
161
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xFF10, 0xFF19); // \
|
|
162
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xFF21, 0xFF3A); // - Latin (full-width)
|
|
163
|
+
addCharsToCharClass(nonLatinHashtagChars, 0xFF41, 0xFF5A); // /
|
|
164
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x3041, 0x3096); // Hiragana
|
|
165
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x3099, 0x309E); // Hiragana voicing and iteration mark
|
|
166
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x3400, 0x4DBF); // Kanji (CJK Extension A)
|
|
167
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x4E00, 0x9FFF); // Kanji (Unified)
|
|
168
|
+
// -- Disabled as it breaks the Regex.
|
|
169
|
+
//addCharsToCharClass(nonLatinHashtagChars, 0x20000, 0x2A6DF); // Kanji (CJK Extension B)
|
|
170
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x2A700, 0x2B73F); // Kanji (CJK Extension C)
|
|
171
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x2B740, 0x2B81F); // Kanji (CJK Extension D)
|
|
172
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x2F800, 0x2FA1F); // Kanji (CJK supplement)
|
|
173
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x3003, 0x3003); // Kanji iteration mark
|
|
174
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x3005, 0x3005); // Kanji iteration mark
|
|
175
|
+
addCharsToCharClass(nonLatinHashtagChars, 0x303B, 0x303B); // Han iteration mark
|
|
176
|
+
|
|
177
|
+
twttr.txt.regexen.nonLatinHashtagChars = regexSupplant(nonLatinHashtagChars.join(""));
|
|
178
|
+
|
|
179
|
+
var latinAccentChars = [];
|
|
180
|
+
// Latin accented characters (subtracted 0xD7 from the range, it's a confusable multiplication sign. Looks like "x")
|
|
181
|
+
addCharsToCharClass(latinAccentChars, 0x00c0, 0x00d6);
|
|
182
|
+
addCharsToCharClass(latinAccentChars, 0x00d8, 0x00f6);
|
|
183
|
+
addCharsToCharClass(latinAccentChars, 0x00f8, 0x00ff);
|
|
184
|
+
// Latin Extended A and B
|
|
185
|
+
addCharsToCharClass(latinAccentChars, 0x0100, 0x024f);
|
|
186
|
+
// assorted IPA Extensions
|
|
187
|
+
addCharsToCharClass(latinAccentChars, 0x0253, 0x0254);
|
|
188
|
+
addCharsToCharClass(latinAccentChars, 0x0256, 0x0257);
|
|
189
|
+
addCharsToCharClass(latinAccentChars, 0x0259, 0x0259);
|
|
190
|
+
addCharsToCharClass(latinAccentChars, 0x025b, 0x025b);
|
|
191
|
+
addCharsToCharClass(latinAccentChars, 0x0263, 0x0263);
|
|
192
|
+
addCharsToCharClass(latinAccentChars, 0x0268, 0x0268);
|
|
193
|
+
addCharsToCharClass(latinAccentChars, 0x026f, 0x026f);
|
|
194
|
+
addCharsToCharClass(latinAccentChars, 0x0272, 0x0272);
|
|
195
|
+
addCharsToCharClass(latinAccentChars, 0x0289, 0x0289);
|
|
196
|
+
addCharsToCharClass(latinAccentChars, 0x028b, 0x028b);
|
|
197
|
+
// Okina for Hawaiian (it *is* a letter character)
|
|
198
|
+
addCharsToCharClass(latinAccentChars, 0x02bb, 0x02bb);
|
|
199
|
+
// Combining diacritics
|
|
200
|
+
addCharsToCharClass(latinAccentChars, 0x0300, 0x036f);
|
|
201
|
+
// Latin Extended Additional
|
|
202
|
+
addCharsToCharClass(latinAccentChars, 0x1e00, 0x1eff);
|
|
203
|
+
twttr.txt.regexen.latinAccentChars = regexSupplant(latinAccentChars.join(""));
|
|
204
|
+
|
|
205
|
+
// A hashtag must contain characters, numbers and underscores, but not all numbers.
|
|
206
|
+
twttr.txt.regexen.hashSigns = /[##]/;
|
|
207
|
+
twttr.txt.regexen.hashtagAlpha = regexSupplant(/[a-z_#{latinAccentChars}#{nonLatinHashtagChars}]/i);
|
|
208
|
+
twttr.txt.regexen.hashtagAlphaNumeric = regexSupplant(/[a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}]/i);
|
|
209
|
+
twttr.txt.regexen.endHashtagMatch = regexSupplant(/^(?:#{hashSigns}|:\/\/)/);
|
|
210
|
+
twttr.txt.regexen.hashtagBoundary = regexSupplant(/(?:^|$|[^&a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}])/);
|
|
211
|
+
twttr.txt.regexen.validHashtag = regexSupplant(/(#{hashtagBoundary})(#{hashSigns})(#{hashtagAlphaNumeric}*#{hashtagAlpha}#{hashtagAlphaNumeric}*)/gi);
|
|
212
|
+
|
|
213
|
+
// Mention related regex collection
|
|
214
|
+
twttr.txt.regexen.validMentionPrecedingChars = /(?:^|[^a-zA-Z0-9_!#$%&*@@]|RT:?)/;
|
|
215
|
+
twttr.txt.regexen.atSigns = /[@@]/;
|
|
216
|
+
twttr.txt.regexen.validMentionOrList = regexSupplant(
|
|
217
|
+
'(#{validMentionPrecedingChars})' + // $1: Preceding character
|
|
218
|
+
'(#{atSigns})' + // $2: At mark
|
|
219
|
+
'([a-zA-Z0-9_]{1,20})' + // $3: Screen name
|
|
220
|
+
'(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?' // $4: List (optional)
|
|
221
|
+
, 'g');
|
|
222
|
+
twttr.txt.regexen.validReply = regexSupplant(/^(?:#{spaces})*#{atSigns}([a-zA-Z0-9_]{1,20})/);
|
|
223
|
+
twttr.txt.regexen.endMentionMatch = regexSupplant(/^(?:#{atSigns}|[#{latinAccentChars}]|:\/\/)/);
|
|
224
|
+
|
|
225
|
+
// URL related regex collection
|
|
226
|
+
twttr.txt.regexen.validUrlPrecedingChars = regexSupplant(/(?:[^A-Za-z0-9@@$###{invalid_chars_group}]|^)/);
|
|
227
|
+
twttr.txt.regexen.invalidUrlWithoutProtocolPrecedingChars = /[-_.\/]$/;
|
|
228
|
+
twttr.txt.regexen.invalidDomainChars = stringSupplant("#{punct}#{spaces_group}#{invalid_chars_group}", twttr.txt.regexen);
|
|
229
|
+
twttr.txt.regexen.validDomainChars = regexSupplant(/[^#{invalidDomainChars}]/);
|
|
230
|
+
twttr.txt.regexen.validSubdomain = regexSupplant(/(?:(?:#{validDomainChars}(?:[_-]|#{validDomainChars})*)?#{validDomainChars}\.)/);
|
|
231
|
+
twttr.txt.regexen.validDomainName = regexSupplant(/(?:(?:#{validDomainChars}(?:-|#{validDomainChars})*)?#{validDomainChars}\.)/);
|
|
232
|
+
twttr.txt.regexen.validGTLD = regexSupplant(/(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^0-9a-zA-Z]|$))/);
|
|
233
|
+
twttr.txt.regexen.validCCTLD = regexSupplant(RegExp(
|
|
234
|
+
"(?:(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|" +
|
|
235
|
+
"ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|" +
|
|
236
|
+
"ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|" +
|
|
237
|
+
"ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|" +
|
|
238
|
+
"na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|" +
|
|
239
|
+
"sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|" +
|
|
240
|
+
"ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)(?=[^0-9a-zA-Z]|$))"));
|
|
241
|
+
twttr.txt.regexen.validPunycode = regexSupplant(/(?:xn--[0-9a-z]+)/);
|
|
242
|
+
twttr.txt.regexen.validDomain = regexSupplant(/(?:#{validSubdomain}*#{validDomainName}(?:#{validGTLD}|#{validCCTLD}|#{validPunycode}))/);
|
|
243
|
+
twttr.txt.regexen.validAsciiDomain = regexSupplant(/(?:(?:[\-a-z0-9#{latinAccentChars}]+)\.)+(?:#{validGTLD}|#{validCCTLD}|#{validPunycode})/gi);
|
|
244
|
+
twttr.txt.regexen.invalidShortDomain = regexSupplant(/^#{validDomainName}#{validCCTLD}$/);
|
|
245
|
+
|
|
246
|
+
twttr.txt.regexen.validPortNumber = regexSupplant(/[0-9]+/);
|
|
247
|
+
|
|
248
|
+
twttr.txt.regexen.validGeneralUrlPathChars = regexSupplant(/[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&#{latinAccentChars}]/i);
|
|
249
|
+
// Allow URL paths to contain balanced parens
|
|
250
|
+
// 1. Used in Wikipedia URLs like /Primer_(film)
|
|
251
|
+
// 2. Used in IIS sessions like /S(dfd346)/
|
|
252
|
+
twttr.txt.regexen.validUrlBalancedParens = regexSupplant(/\(#{validGeneralUrlPathChars}+\)/i);
|
|
253
|
+
// Valid end-of-path chracters (so /foo. does not gobble the period).
|
|
254
|
+
// 1. Allow =&# for empty URL parameters and other URL-join artifacts
|
|
255
|
+
twttr.txt.regexen.validUrlPathEndingChars = regexSupplant(/[\+\-a-z0-9=_#\/#{latinAccentChars}]|(?:#{validUrlBalancedParens})/i);
|
|
256
|
+
// Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
|
|
257
|
+
twttr.txt.regexen.validUrlPath = regexSupplant('(?:' +
|
|
258
|
+
'(?:' +
|
|
259
|
+
'#{validGeneralUrlPathChars}*' +
|
|
260
|
+
'(?:#{validUrlBalancedParens}#{validGeneralUrlPathChars}*)*' +
|
|
261
|
+
'#{validUrlPathEndingChars}'+
|
|
262
|
+
')|(?:@#{validGeneralUrlPathChars}+\/)'+
|
|
263
|
+
')', 'i');
|
|
264
|
+
|
|
265
|
+
twttr.txt.regexen.validUrlQueryChars = /[a-z0-9!?\*'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i;
|
|
266
|
+
twttr.txt.regexen.validUrlQueryEndingChars = /[a-z0-9_&=#\/]/i;
|
|
267
|
+
twttr.txt.regexen.extractUrl = regexSupplant(
|
|
268
|
+
'(' + // $1 total match
|
|
269
|
+
'(#{validUrlPrecedingChars})' + // $2 Preceeding chracter
|
|
270
|
+
'(' + // $3 URL
|
|
271
|
+
'(https?:\\/\\/)?' + // $4 Protocol (optional)
|
|
272
|
+
'(#{validDomain})' + // $5 Domain(s)
|
|
273
|
+
'(?::(#{validPortNumber}))?' + // $6 Port number (optional)
|
|
274
|
+
'(\\/#{validUrlPath}*)?' + // $7 URL Path
|
|
275
|
+
'(\\?#{validUrlQueryChars}*#{validUrlQueryEndingChars})?' + // $8 Query String
|
|
276
|
+
')' +
|
|
277
|
+
')'
|
|
278
|
+
, 'gi');
|
|
279
|
+
|
|
280
|
+
twttr.txt.regexen.validTcoUrl = /^https?:\/\/t\.co\/[a-z0-9]+/i;
|
|
281
|
+
twttr.txt.regexen.urlHasProtocol = /^https?:\/\//i;
|
|
282
|
+
twttr.txt.regexen.urlHasHttps = /^https:\/\//i;
|
|
283
|
+
|
|
284
|
+
// cashtag related regex
|
|
285
|
+
twttr.txt.regexen.cashtag = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i;
|
|
286
|
+
twttr.txt.regexen.validCashtag = regexSupplant('(^|#{spaces})(\\$)(#{cashtag})(?=$|\\s|[#{punct}])', 'gi');
|
|
287
|
+
|
|
288
|
+
// These URL validation pattern strings are based on the ABNF from RFC 3986
|
|
289
|
+
twttr.txt.regexen.validateUrlUnreserved = /[a-z0-9\-._~]/i;
|
|
290
|
+
twttr.txt.regexen.validateUrlPctEncoded = /(?:%[0-9a-f]{2})/i;
|
|
291
|
+
twttr.txt.regexen.validateUrlSubDelims = /[!$&'()*+,;=]/i;
|
|
292
|
+
twttr.txt.regexen.validateUrlPchar = regexSupplant('(?:' +
|
|
293
|
+
'#{validateUrlUnreserved}|' +
|
|
294
|
+
'#{validateUrlPctEncoded}|' +
|
|
295
|
+
'#{validateUrlSubDelims}|' +
|
|
296
|
+
'[:|@]' +
|
|
297
|
+
')', 'i');
|
|
298
|
+
|
|
299
|
+
twttr.txt.regexen.validateUrlScheme = /(?:[a-z][a-z0-9+\-.]*)/i;
|
|
300
|
+
twttr.txt.regexen.validateUrlUserinfo = regexSupplant('(?:' +
|
|
301
|
+
'#{validateUrlUnreserved}|' +
|
|
302
|
+
'#{validateUrlPctEncoded}|' +
|
|
303
|
+
'#{validateUrlSubDelims}|' +
|
|
304
|
+
':' +
|
|
305
|
+
')*', 'i');
|
|
306
|
+
|
|
307
|
+
twttr.txt.regexen.validateUrlDecOctet = /(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))/i;
|
|
308
|
+
twttr.txt.regexen.validateUrlIpv4 = regexSupplant(/(?:#{validateUrlDecOctet}(?:\.#{validateUrlDecOctet}){3})/i);
|
|
309
|
+
|
|
310
|
+
// Punting on real IPv6 validation for now
|
|
311
|
+
twttr.txt.regexen.validateUrlIpv6 = /(?:\[[a-f0-9:\.]+\])/i;
|
|
312
|
+
|
|
313
|
+
// Also punting on IPvFuture for now
|
|
314
|
+
twttr.txt.regexen.validateUrlIp = regexSupplant('(?:' +
|
|
315
|
+
'#{validateUrlIpv4}|' +
|
|
316
|
+
'#{validateUrlIpv6}' +
|
|
317
|
+
')', 'i');
|
|
318
|
+
|
|
319
|
+
// This is more strict than the rfc specifies
|
|
320
|
+
twttr.txt.regexen.validateUrlSubDomainSegment = /(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)/i;
|
|
321
|
+
twttr.txt.regexen.validateUrlDomainSegment = /(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)/i;
|
|
322
|
+
twttr.txt.regexen.validateUrlDomainTld = /(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)/i;
|
|
323
|
+
twttr.txt.regexen.validateUrlDomain = regexSupplant(/(?:(?:#{validateUrlSubDomainSegment]}\.)*(?:#{validateUrlDomainSegment]}\.)#{validateUrlDomainTld})/i);
|
|
324
|
+
|
|
325
|
+
twttr.txt.regexen.validateUrlHost = regexSupplant('(?:' +
|
|
326
|
+
'#{validateUrlIp}|' +
|
|
327
|
+
'#{validateUrlDomain}' +
|
|
328
|
+
')', 'i');
|
|
329
|
+
|
|
330
|
+
// Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences
|
|
331
|
+
twttr.txt.regexen.validateUrlUnicodeSubDomainSegment = /(?:(?:[a-z0-9]|[^\u0000-\u007f])(?:(?:[a-z0-9_\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i;
|
|
332
|
+
twttr.txt.regexen.validateUrlUnicodeDomainSegment = /(?:(?:[a-z0-9]|[^\u0000-\u007f])(?:(?:[a-z0-9\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i;
|
|
333
|
+
twttr.txt.regexen.validateUrlUnicodeDomainTld = /(?:(?:[a-z]|[^\u0000-\u007f])(?:(?:[a-z0-9\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i;
|
|
334
|
+
twttr.txt.regexen.validateUrlUnicodeDomain = regexSupplant(/(?:(?:#{validateUrlUnicodeSubDomainSegment}\.)*(?:#{validateUrlUnicodeDomainSegment}\.)#{validateUrlUnicodeDomainTld})/i);
|
|
335
|
+
|
|
336
|
+
twttr.txt.regexen.validateUrlUnicodeHost = regexSupplant('(?:' +
|
|
337
|
+
'#{validateUrlIp}|' +
|
|
338
|
+
'#{validateUrlUnicodeDomain}' +
|
|
339
|
+
')', 'i');
|
|
340
|
+
|
|
341
|
+
twttr.txt.regexen.validateUrlPort = /[0-9]{1,5}/;
|
|
342
|
+
|
|
343
|
+
twttr.txt.regexen.validateUrlUnicodeAuthority = regexSupplant(
|
|
344
|
+
'(?:(#{validateUrlUserinfo})@)?' + // $1 userinfo
|
|
345
|
+
'(#{validateUrlUnicodeHost})' + // $2 host
|
|
346
|
+
'(?::(#{validateUrlPort}))?' //$3 port
|
|
347
|
+
, "i");
|
|
348
|
+
|
|
349
|
+
twttr.txt.regexen.validateUrlAuthority = regexSupplant(
|
|
350
|
+
'(?:(#{validateUrlUserinfo})@)?' + // $1 userinfo
|
|
351
|
+
'(#{validateUrlHost})' + // $2 host
|
|
352
|
+
'(?::(#{validateUrlPort}))?' // $3 port
|
|
353
|
+
, "i");
|
|
354
|
+
|
|
355
|
+
twttr.txt.regexen.validateUrlPath = regexSupplant(/(\/#{validateUrlPchar}*)*/i);
|
|
356
|
+
twttr.txt.regexen.validateUrlQuery = regexSupplant(/(#{validateUrlPchar}|\/|\?)*/i);
|
|
357
|
+
twttr.txt.regexen.validateUrlFragment = regexSupplant(/(#{validateUrlPchar}|\/|\?)*/i);
|
|
358
|
+
|
|
359
|
+
// Modified version of RFC 3986 Appendix B
|
|
360
|
+
twttr.txt.regexen.validateUrlUnencoded = regexSupplant(
|
|
361
|
+
'^' + // Full URL
|
|
362
|
+
'(?:' +
|
|
363
|
+
'([^:/?#]+):\\/\\/' + // $1 Scheme
|
|
364
|
+
')?' +
|
|
365
|
+
'([^/?#]*)' + // $2 Authority
|
|
366
|
+
'([^?#]*)' + // $3 Path
|
|
367
|
+
'(?:' +
|
|
368
|
+
'\\?([^#]*)' + // $4 Query
|
|
369
|
+
')?' +
|
|
370
|
+
'(?:' +
|
|
371
|
+
'#(.*)' + // $5 Fragment
|
|
372
|
+
')?$'
|
|
373
|
+
, "i");
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
// Default CSS class for auto-linked lists (along with the url class)
|
|
377
|
+
var DEFAULT_LIST_CLASS = "tweet-url list-slug";
|
|
378
|
+
// Default CSS class for auto-linked usernames (along with the url class)
|
|
379
|
+
var DEFAULT_USERNAME_CLASS = "tweet-url username";
|
|
380
|
+
// Default CSS class for auto-linked hashtags (along with the url class)
|
|
381
|
+
var DEFAULT_HASHTAG_CLASS = "tweet-url hashtag";
|
|
382
|
+
// Default CSS class for auto-linked cashtags (along with the url class)
|
|
383
|
+
var DEFAULT_CASHTAG_CLASS = "tweet-url cashtag";
|
|
384
|
+
// Options which should not be passed as HTML attributes
|
|
385
|
+
var OPTIONS_NOT_ATTRIBUTES = {'urlClass':true, 'listClass':true, 'usernameClass':true, 'hashtagClass':true, 'cashtagClass':true,
|
|
386
|
+
'usernameUrlBase':true, 'listUrlBase':true, 'hashtagUrlBase':true, 'cashtagUrlBase':true,
|
|
387
|
+
'usernameUrlBlock':true, 'listUrlBlock':true, 'hashtagUrlBlock':true, 'linkUrlBlock':true,
|
|
388
|
+
'usernameIncludeSymbol':true, 'suppressLists':true, 'suppressNoFollow':true, 'targetBlank':true,
|
|
389
|
+
'suppressDataScreenName':true, 'urlEntities':true, 'symbolTag':true, 'textWithSymbolTag':true, 'urlTarget':true,
|
|
390
|
+
'invisibleTagAttrs':true, 'linkAttributeBlock':true, 'linkTextBlock': true, 'htmlEscapeNonEntities': true
|
|
391
|
+
};
|
|
392
|
+
|
|
393
|
+
var BOOLEAN_ATTRIBUTES = {'disabled':true, 'readonly':true, 'multiple':true, 'checked':true};
|
|
394
|
+
|
|
395
|
+
// Simple object cloning function for simple objects
|
|
396
|
+
function clone(o) {
|
|
397
|
+
var r = {};
|
|
398
|
+
for (var k in o) {
|
|
399
|
+
if (o.hasOwnProperty(k)) {
|
|
400
|
+
r[k] = o[k];
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
return r;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
twttr.txt.tagAttrs = function(attributes) {
|
|
408
|
+
var htmlAttrs = "";
|
|
409
|
+
for (var k in attributes) {
|
|
410
|
+
var v = attributes[k];
|
|
411
|
+
if (BOOLEAN_ATTRIBUTES[k]) {
|
|
412
|
+
v = v ? k : null;
|
|
413
|
+
}
|
|
414
|
+
if (v == null) continue;
|
|
415
|
+
htmlAttrs += " " + twttr.txt.htmlEscape(k) + "=\"" + twttr.txt.htmlEscape(v.toString()) + "\"";
|
|
416
|
+
}
|
|
417
|
+
return htmlAttrs;
|
|
418
|
+
};
|
|
419
|
+
|
|
420
|
+
twttr.txt.linkToText = function(entity, text, attributes, options) {
|
|
421
|
+
if (!options.suppressNoFollow) {
|
|
422
|
+
attributes.rel = "nofollow";
|
|
423
|
+
}
|
|
424
|
+
// if linkAttributeBlock is specified, call it to modify the attributes
|
|
425
|
+
if (options.linkAttributeBlock) {
|
|
426
|
+
options.linkAttributeBlock(entity, attributes);
|
|
427
|
+
}
|
|
428
|
+
// if linkTextBlock is specified, call it to get a new/modified link text
|
|
429
|
+
if (options.linkTextBlock) {
|
|
430
|
+
text = options.linkTextBlock(entity, text);
|
|
431
|
+
}
|
|
432
|
+
var d = {
|
|
433
|
+
text: text,
|
|
434
|
+
attr: twttr.txt.tagAttrs(attributes)
|
|
435
|
+
};
|
|
436
|
+
return stringSupplant("<a#{attr}>#{text}</a>", d);
|
|
437
|
+
};
|
|
438
|
+
|
|
439
|
+
twttr.txt.linkToTextWithSymbol = function(entity, symbol, text, attributes, options) {
|
|
440
|
+
var taggedSymbol = options.symbolTag ? "<" + options.symbolTag + ">" + symbol + "</"+ options.symbolTag + ">" : symbol;
|
|
441
|
+
text = twttr.txt.htmlEscape(text);
|
|
442
|
+
var taggedText = options.textWithSymbolTag ? "<" + options.textWithSymbolTag + ">" + text + "</"+ options.textWithSymbolTag + ">" : text;
|
|
443
|
+
|
|
444
|
+
if (options.usernameIncludeSymbol || !symbol.match(twttr.txt.regexen.atSigns)) {
|
|
445
|
+
return twttr.txt.linkToText(entity, taggedSymbol + taggedText, attributes, options);
|
|
446
|
+
} else {
|
|
447
|
+
return taggedSymbol + twttr.txt.linkToText(entity, taggedText, attributes, options);
|
|
448
|
+
}
|
|
449
|
+
};
|
|
450
|
+
|
|
451
|
+
twttr.txt.linkToHashtag = function(entity, text, options) {
|
|
452
|
+
var hash = text.substring(entity.indices[0], entity.indices[0] + 1);
|
|
453
|
+
var hashtag = twttr.txt.htmlEscape(entity.hashtag);
|
|
454
|
+
var attrs = clone(options.htmlAttrs || {});
|
|
455
|
+
attrs.href = options.hashtagUrlBase + hashtag;
|
|
456
|
+
attrs.title = "#" + hashtag;
|
|
457
|
+
attrs["class"] = options.hashtagClass;
|
|
458
|
+
if (hashtag[0].match(twttr.txt.regexen.rtl_chars)){
|
|
459
|
+
attrs["class"] += " rtl";
|
|
460
|
+
}
|
|
461
|
+
if (options.targetBlank) {
|
|
462
|
+
attrs.target = '_blank';
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return twttr.txt.linkToTextWithSymbol(entity, hash, hashtag, attrs, options);
|
|
466
|
+
};
|
|
467
|
+
|
|
468
|
+
twttr.txt.linkToCashtag = function(entity, text, options) {
|
|
469
|
+
var cashtag = twttr.txt.htmlEscape(entity.cashtag);
|
|
470
|
+
var attrs = clone(options.htmlAttrs || {});
|
|
471
|
+
attrs.href = options.cashtagUrlBase + cashtag;
|
|
472
|
+
attrs.title = "$" + cashtag;
|
|
473
|
+
attrs["class"] = options.cashtagClass;
|
|
474
|
+
if (options.targetBlank) {
|
|
475
|
+
attrs.target = '_blank';
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
return twttr.txt.linkToTextWithSymbol(entity, "$", cashtag, attrs, options);
|
|
479
|
+
};
|
|
480
|
+
|
|
481
|
+
twttr.txt.linkToMentionAndList = function(entity, text, options) {
|
|
482
|
+
var at = text.substring(entity.indices[0], entity.indices[0] + 1);
|
|
483
|
+
var user = twttr.txt.htmlEscape(entity.screenName);
|
|
484
|
+
var slashListname = twttr.txt.htmlEscape(entity.listSlug);
|
|
485
|
+
var isList = entity.listSlug && !options.suppressLists;
|
|
486
|
+
var attrs = clone(options.htmlAttrs || {});
|
|
487
|
+
attrs["class"] = (isList ? options.listClass : options.usernameClass);
|
|
488
|
+
attrs.href = isList ? options.listUrlBase + user + slashListname : options.usernameUrlBase + user;
|
|
489
|
+
if (!isList && !options.suppressDataScreenName) {
|
|
490
|
+
attrs['data-screen-name'] = user;
|
|
491
|
+
}
|
|
492
|
+
if (options.targetBlank) {
|
|
493
|
+
attrs.target = '_blank';
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return twttr.txt.linkToTextWithSymbol(entity, at, isList ? user + slashListname : user, attrs, options);
|
|
497
|
+
};
|
|
498
|
+
|
|
499
|
+
twttr.txt.linkToUrl = function(entity, text, options) {
|
|
500
|
+
var url = entity.url;
|
|
501
|
+
var displayUrl = url;
|
|
502
|
+
var linkText = twttr.txt.htmlEscape(displayUrl);
|
|
503
|
+
|
|
504
|
+
// If the caller passed a urlEntities object (provided by a Twitter API
|
|
505
|
+
// response with include_entities=true), we use that to render the display_url
|
|
506
|
+
// for each URL instead of it's underlying t.co URL.
|
|
507
|
+
var urlEntity = (options.urlEntities && options.urlEntities[url]) || entity;
|
|
508
|
+
if (urlEntity.display_url) {
|
|
509
|
+
linkText = twttr.txt.linkTextWithEntity(urlEntity, options);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
var attrs = clone(options.htmlAttrs || {});
|
|
513
|
+
|
|
514
|
+
if (!url.match(twttr.txt.regexen.urlHasProtocol)) {
|
|
515
|
+
url = "http://" + url;
|
|
516
|
+
}
|
|
517
|
+
attrs.href = url;
|
|
518
|
+
|
|
519
|
+
if (options.targetBlank) {
|
|
520
|
+
attrs.target = '_blank';
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
// set class only if urlClass is specified.
|
|
524
|
+
if (options.urlClass) {
|
|
525
|
+
attrs["class"] = options.urlClass;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
// set target only if urlTarget is specified.
|
|
529
|
+
if (options.urlTarget) {
|
|
530
|
+
attrs.target = options.urlTarget;
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
if (!options.title && urlEntity.display_url) {
|
|
534
|
+
attrs.title = urlEntity.expanded_url;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
return twttr.txt.linkToText(entity, linkText, attrs, options);
|
|
538
|
+
};
|
|
539
|
+
|
|
540
|
+
twttr.txt.linkTextWithEntity = function (entity, options) {
|
|
541
|
+
var displayUrl = entity.display_url;
|
|
542
|
+
var expandedUrl = entity.expanded_url;
|
|
543
|
+
|
|
544
|
+
// Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
|
|
545
|
+
// should contain the full original URL (expanded_url), not the display URL.
|
|
546
|
+
//
|
|
547
|
+
// Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
|
|
548
|
+
// font-size:0 to hide those parts that should not be displayed (because they are not part of display_url).
|
|
549
|
+
// Elements with font-size:0 get copied even though they are not visible.
|
|
550
|
+
// Note that display:none doesn't work here. Elements with display:none don't get copied.
|
|
551
|
+
//
|
|
552
|
+
// Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we
|
|
553
|
+
// wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on
|
|
554
|
+
// everything with the tco-ellipsis class.
|
|
555
|
+
//
|
|
556
|
+
// Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1
|
|
557
|
+
// For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
|
|
558
|
+
// For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
|
|
559
|
+
|
|
560
|
+
var displayUrlSansEllipses = displayUrl.replace(/…/g, ""); // We have to disregard ellipses for matching
|
|
561
|
+
// Note: we currently only support eliding parts of the URL at the beginning or the end.
|
|
562
|
+
// Eventually we may want to elide parts of the URL in the *middle*. If so, this code will
|
|
563
|
+
// become more complicated. We will probably want to create a regexp out of display URL,
|
|
564
|
+
// replacing every ellipsis with a ".*".
|
|
565
|
+
if (expandedUrl.indexOf(displayUrlSansEllipses) != -1) {
|
|
566
|
+
var displayUrlIndex = expandedUrl.indexOf(displayUrlSansEllipses);
|
|
567
|
+
var v = {
|
|
568
|
+
displayUrlSansEllipses: displayUrlSansEllipses,
|
|
569
|
+
// Portion of expandedUrl that precedes the displayUrl substring
|
|
570
|
+
beforeDisplayUrl: expandedUrl.substr(0, displayUrlIndex),
|
|
571
|
+
// Portion of expandedUrl that comes after displayUrl
|
|
572
|
+
afterDisplayUrl: expandedUrl.substr(displayUrlIndex + displayUrlSansEllipses.length),
|
|
573
|
+
precedingEllipsis: displayUrl.match(/^…/) ? "…" : "",
|
|
574
|
+
followingEllipsis: displayUrl.match(/…$/) ? "…" : ""
|
|
575
|
+
};
|
|
576
|
+
for (var k in v) {
|
|
577
|
+
if (v.hasOwnProperty(k)) {
|
|
578
|
+
v[k] = twttr.txt.htmlEscape(v[k]);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
// As an example: The user tweets "hi http://longdomainname.com/foo"
|
|
582
|
+
// This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
|
|
583
|
+
// This will get rendered as:
|
|
584
|
+
// <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
|
|
585
|
+
// …
|
|
586
|
+
// <!-- There's a chance the onCopy event handler might not fire. In case that happens,
|
|
587
|
+
// we include an here so that the … doesn't bump up against the URL and ruin it.
|
|
588
|
+
// The is inside the tco-ellipsis span so that when the onCopy handler *does*
|
|
589
|
+
// fire, it doesn't get copied. Otherwise the copied text would have two spaces in a row,
|
|
590
|
+
// e.g. "hi http://longdomainname.com/foo".
|
|
591
|
+
// <span style='font-size:0'> </span>
|
|
592
|
+
// </span>
|
|
593
|
+
// <span style='font-size:0'> <!-- This stuff should get copied but not displayed -->
|
|
594
|
+
// http://longdomai
|
|
595
|
+
// </span>
|
|
596
|
+
// <span class='js-display-url'> <!-- This stuff should get displayed *and* copied -->
|
|
597
|
+
// nname.com/foo
|
|
598
|
+
// </span>
|
|
599
|
+
// <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
|
|
600
|
+
// <span style='font-size:0'> </span>
|
|
601
|
+
// …
|
|
602
|
+
// </span>
|
|
603
|
+
v['invisible'] = options.invisibleTagAttrs;
|
|
604
|
+
return stringSupplant("<span class='tco-ellipsis'>#{precedingEllipsis}<span #{invisible}> </span></span><span #{invisible}>#{beforeDisplayUrl}</span><span class='js-display-url'>#{displayUrlSansEllipses}</span><span #{invisible}>#{afterDisplayUrl}</span><span class='tco-ellipsis'><span #{invisible}> </span>#{followingEllipsis}</span>", v);
|
|
605
|
+
}
|
|
606
|
+
return displayUrl;
|
|
607
|
+
};
|
|
608
|
+
|
|
609
|
+
twttr.txt.autoLinkEntities = function(text, entities, options) {
|
|
610
|
+
options = clone(options || {});
|
|
611
|
+
|
|
612
|
+
options.hashtagClass = options.hashtagClass || DEFAULT_HASHTAG_CLASS;
|
|
613
|
+
options.hashtagUrlBase = options.hashtagUrlBase || "https://twitter.com/#!/search?q=%23";
|
|
614
|
+
options.cashtagClass = options.cashtagClass || DEFAULT_CASHTAG_CLASS;
|
|
615
|
+
options.cashtagUrlBase = options.cashtagUrlBase || "https://twitter.com/#!/search?q=%24";
|
|
616
|
+
options.listClass = options.listClass || DEFAULT_LIST_CLASS;
|
|
617
|
+
options.usernameClass = options.usernameClass || DEFAULT_USERNAME_CLASS;
|
|
618
|
+
options.usernameUrlBase = options.usernameUrlBase || "https://twitter.com/";
|
|
619
|
+
options.listUrlBase = options.listUrlBase || "https://twitter.com/";
|
|
620
|
+
options.htmlAttrs = twttr.txt.extractHtmlAttrsFromOptions(options);
|
|
621
|
+
options.invisibleTagAttrs = options.invisibleTagAttrs || "style='position:absolute;left:-9999px;'";
|
|
622
|
+
|
|
623
|
+
// remap url entities to hash
|
|
624
|
+
var urlEntities, i, len;
|
|
625
|
+
if(options.urlEntities) {
|
|
626
|
+
urlEntities = {};
|
|
627
|
+
for(i = 0, len = options.urlEntities.length; i < len; i++) {
|
|
628
|
+
urlEntities[options.urlEntities[i].url] = options.urlEntities[i];
|
|
629
|
+
}
|
|
630
|
+
options.urlEntities = urlEntities;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
var result = "";
|
|
634
|
+
var beginIndex = 0;
|
|
635
|
+
|
|
636
|
+
// sort entities by start index
|
|
637
|
+
entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; });
|
|
638
|
+
|
|
639
|
+
var nonEntity = options.htmlEscapeNonEntities ? twttr.txt.htmlEscape : function(text) {
|
|
640
|
+
return text;
|
|
641
|
+
};
|
|
642
|
+
|
|
643
|
+
for (var i = 0; i < entities.length; i++) {
|
|
644
|
+
var entity = entities[i];
|
|
645
|
+
result += nonEntity(text.substring(beginIndex, entity.indices[0]));
|
|
646
|
+
|
|
647
|
+
if (entity.url) {
|
|
648
|
+
result += twttr.txt.linkToUrl(entity, text, options);
|
|
649
|
+
} else if (entity.hashtag) {
|
|
650
|
+
result += twttr.txt.linkToHashtag(entity, text, options);
|
|
651
|
+
} else if (entity.screenName) {
|
|
652
|
+
result += twttr.txt.linkToMentionAndList(entity, text, options);
|
|
653
|
+
} else if (entity.cashtag) {
|
|
654
|
+
result += twttr.txt.linkToCashtag(entity, text, options);
|
|
655
|
+
}
|
|
656
|
+
beginIndex = entity.indices[1];
|
|
657
|
+
}
|
|
658
|
+
result += nonEntity(text.substring(beginIndex, text.length));
|
|
659
|
+
return result;
|
|
660
|
+
};
|
|
661
|
+
|
|
662
|
+
twttr.txt.autoLinkWithJSON = function(text, json, options) {
|
|
663
|
+
// concatenate all entities
|
|
664
|
+
var entities = [];
|
|
665
|
+
for (var key in json) {
|
|
666
|
+
entities = entities.concat(json[key]);
|
|
667
|
+
}
|
|
668
|
+
// map JSON entity to twitter-text entity
|
|
669
|
+
for (var i = 0; i < entities.length; i++) {
|
|
670
|
+
entity = entities[i];
|
|
671
|
+
if (entity.screen_name) {
|
|
672
|
+
// this is @mention
|
|
673
|
+
entity.screenName = entity.screen_name;
|
|
674
|
+
} else if (entity.text) {
|
|
675
|
+
// this is #hashtag
|
|
676
|
+
entity.hashtag = entity.text;
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
// modify indices to UTF-16
|
|
680
|
+
twttr.txt.modifyIndicesFromUnicodeToUTF16(text, entities);
|
|
681
|
+
|
|
682
|
+
return twttr.txt.autoLinkEntities(text, entities, options);
|
|
683
|
+
};
|
|
684
|
+
|
|
685
|
+
twttr.txt.extractHtmlAttrsFromOptions = function(options) {
|
|
686
|
+
var htmlAttrs = {};
|
|
687
|
+
for (var k in options) {
|
|
688
|
+
var v = options[k];
|
|
689
|
+
if (OPTIONS_NOT_ATTRIBUTES[k]) continue;
|
|
690
|
+
if (BOOLEAN_ATTRIBUTES[k]) {
|
|
691
|
+
v = v ? k : null;
|
|
692
|
+
}
|
|
693
|
+
if (v == null) continue;
|
|
694
|
+
htmlAttrs[k] = v;
|
|
695
|
+
}
|
|
696
|
+
return htmlAttrs;
|
|
697
|
+
};
|
|
698
|
+
|
|
699
|
+
twttr.txt.autoLink = function(text, options) {
|
|
700
|
+
var entities = twttr.txt.extractEntitiesWithIndices(text, {extractUrlsWithoutProtocol: false});
|
|
701
|
+
return twttr.txt.autoLinkEntities(text, entities, options);
|
|
702
|
+
};
|
|
703
|
+
|
|
704
|
+
twttr.txt.autoLinkUsernamesOrLists = function(text, options) {
|
|
705
|
+
var entities = twttr.txt.extractMentionsOrListsWithIndices(text);
|
|
706
|
+
return twttr.txt.autoLinkEntities(text, entities, options);
|
|
707
|
+
};
|
|
708
|
+
|
|
709
|
+
twttr.txt.autoLinkHashtags = function(text, options) {
|
|
710
|
+
var entities = twttr.txt.extractHashtagsWithIndices(text);
|
|
711
|
+
return twttr.txt.autoLinkEntities(text, entities, options);
|
|
712
|
+
};
|
|
713
|
+
|
|
714
|
+
twttr.txt.autoLinkCashtags = function(text, options) {
|
|
715
|
+
var entities = twttr.txt.extractCashtagsWithIndices(text);
|
|
716
|
+
return twttr.txt.autoLinkEntities(text, entities, options);
|
|
717
|
+
};
|
|
718
|
+
|
|
719
|
+
twttr.txt.autoLinkUrlsCustom = function(text, options) {
|
|
720
|
+
var entities = twttr.txt.extractUrlsWithIndices(text, {extractUrlsWithoutProtocol: false});
|
|
721
|
+
return twttr.txt.autoLinkEntities(text, entities, options);
|
|
722
|
+
};
|
|
723
|
+
|
|
724
|
+
twttr.txt.removeOverlappingEntities = function(entities) {
|
|
725
|
+
entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; });
|
|
726
|
+
|
|
727
|
+
var prev = entities[0];
|
|
728
|
+
for (var i = 1; i < entities.length; i++) {
|
|
729
|
+
if (prev.indices[1] > entities[i].indices[0]) {
|
|
730
|
+
entities.splice(i, 1);
|
|
731
|
+
i--;
|
|
732
|
+
} else {
|
|
733
|
+
prev = entities[i];
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
};
|
|
737
|
+
|
|
738
|
+
twttr.txt.extractEntitiesWithIndices = function(text, options) {
|
|
739
|
+
var entities = twttr.txt.extractUrlsWithIndices(text, options)
|
|
740
|
+
.concat(twttr.txt.extractMentionsOrListsWithIndices(text))
|
|
741
|
+
.concat(twttr.txt.extractHashtagsWithIndices(text, {checkUrlOverlap: false}))
|
|
742
|
+
.concat(twttr.txt.extractCashtagsWithIndices(text));
|
|
743
|
+
|
|
744
|
+
if (entities.length == 0) {
|
|
745
|
+
return [];
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
twttr.txt.removeOverlappingEntities(entities);
|
|
749
|
+
return entities;
|
|
750
|
+
};
|
|
751
|
+
|
|
752
|
+
twttr.txt.extractMentions = function(text) {
|
|
753
|
+
var screenNamesOnly = [],
|
|
754
|
+
screenNamesWithIndices = twttr.txt.extractMentionsWithIndices(text);
|
|
755
|
+
|
|
756
|
+
for (var i = 0; i < screenNamesWithIndices.length; i++) {
|
|
757
|
+
var screenName = screenNamesWithIndices[i].screenName;
|
|
758
|
+
screenNamesOnly.push(screenName);
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
return screenNamesOnly;
|
|
762
|
+
};
|
|
763
|
+
|
|
764
|
+
twttr.txt.extractMentionsWithIndices = function(text) {
|
|
765
|
+
var mentions = [],
|
|
766
|
+
mentionOrList,
|
|
767
|
+
mentionsOrLists = twttr.txt.extractMentionsOrListsWithIndices(text);
|
|
768
|
+
|
|
769
|
+
for (var i = 0 ; i < mentionsOrLists.length; i++) {
|
|
770
|
+
mentionOrList = mentionsOrLists[i];
|
|
771
|
+
if (mentionOrList.listSlug == '') {
|
|
772
|
+
mentions.push({
|
|
773
|
+
screenName: mentionOrList.screenName,
|
|
774
|
+
indices: mentionOrList.indices
|
|
775
|
+
});
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
return mentions;
|
|
780
|
+
};
|
|
781
|
+
|
|
782
|
+
/**
|
|
783
|
+
* Extract list or user mentions.
|
|
784
|
+
* (Presence of listSlug indicates a list)
|
|
785
|
+
*/
|
|
786
|
+
twttr.txt.extractMentionsOrListsWithIndices = function(text) {
|
|
787
|
+
if (!text || !text.match(twttr.txt.regexen.atSigns)) {
|
|
788
|
+
return [];
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
var possibleNames = [],
|
|
792
|
+
slashListname;
|
|
793
|
+
|
|
794
|
+
text.replace(twttr.txt.regexen.validMentionOrList, function(match, before, atSign, screenName, slashListname, offset, chunk) {
|
|
795
|
+
var after = chunk.slice(offset + match.length);
|
|
796
|
+
if (!after.match(twttr.txt.regexen.endMentionMatch)) {
|
|
797
|
+
slashListname = slashListname || '';
|
|
798
|
+
var startPosition = offset + before.length;
|
|
799
|
+
var endPosition = startPosition + screenName.length + slashListname.length + 1;
|
|
800
|
+
possibleNames.push({
|
|
801
|
+
screenName: screenName,
|
|
802
|
+
listSlug: slashListname,
|
|
803
|
+
indices: [startPosition, endPosition]
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
});
|
|
807
|
+
|
|
808
|
+
return possibleNames;
|
|
809
|
+
};
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
twttr.txt.extractReplies = function(text) {
|
|
813
|
+
if (!text) {
|
|
814
|
+
return null;
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
var possibleScreenName = text.match(twttr.txt.regexen.validReply);
|
|
818
|
+
if (!possibleScreenName ||
|
|
819
|
+
RegExp.rightContext.match(twttr.txt.regexen.endMentionMatch)) {
|
|
820
|
+
return null;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
return possibleScreenName[1];
|
|
824
|
+
};
|
|
825
|
+
|
|
826
|
+
twttr.txt.extractUrls = function(text, options) {
|
|
827
|
+
var urlsOnly = [],
|
|
828
|
+
urlsWithIndices = twttr.txt.extractUrlsWithIndices(text, options);
|
|
829
|
+
|
|
830
|
+
for (var i = 0; i < urlsWithIndices.length; i++) {
|
|
831
|
+
urlsOnly.push(urlsWithIndices[i].url);
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
return urlsOnly;
|
|
835
|
+
};
|
|
836
|
+
|
|
837
|
+
twttr.txt.extractUrlsWithIndices = function(text, options) {
|
|
838
|
+
if (!options) {
|
|
839
|
+
options = {extractUrlsWithoutProtocol: true};
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (!text || (options.extractUrlsWithoutProtocol ? !text.match(/\./) : !text.match(/:/))) {
|
|
843
|
+
return [];
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
var urls = [];
|
|
847
|
+
|
|
848
|
+
while (twttr.txt.regexen.extractUrl.exec(text)) {
|
|
849
|
+
var before = RegExp.$2, url = RegExp.$3, protocol = RegExp.$4, domain = RegExp.$5, path = RegExp.$7;
|
|
850
|
+
var endPosition = twttr.txt.regexen.extractUrl.lastIndex,
|
|
851
|
+
startPosition = endPosition - url.length;
|
|
852
|
+
|
|
853
|
+
// if protocol is missing and domain contains non-ASCII characters,
|
|
854
|
+
// extract ASCII-only domains.
|
|
855
|
+
if (!protocol) {
|
|
856
|
+
if (!options.extractUrlsWithoutProtocol
|
|
857
|
+
|| before.match(twttr.txt.regexen.invalidUrlWithoutProtocolPrecedingChars)) {
|
|
858
|
+
continue;
|
|
859
|
+
}
|
|
860
|
+
var lastUrl = null,
|
|
861
|
+
lastUrlInvalidMatch = false,
|
|
862
|
+
asciiEndPosition = 0;
|
|
863
|
+
domain.replace(twttr.txt.regexen.validAsciiDomain, function(asciiDomain) {
|
|
864
|
+
var asciiStartPosition = domain.indexOf(asciiDomain, asciiEndPosition);
|
|
865
|
+
asciiEndPosition = asciiStartPosition + asciiDomain.length;
|
|
866
|
+
lastUrl = {
|
|
867
|
+
url: asciiDomain,
|
|
868
|
+
indices: [startPosition + asciiStartPosition, startPosition + asciiEndPosition]
|
|
869
|
+
};
|
|
870
|
+
lastUrlInvalidMatch = asciiDomain.match(twttr.txt.regexen.invalidShortDomain);
|
|
871
|
+
if (!lastUrlInvalidMatch) {
|
|
872
|
+
urls.push(lastUrl);
|
|
873
|
+
}
|
|
874
|
+
});
|
|
875
|
+
|
|
876
|
+
// no ASCII-only domain found. Skip the entire URL.
|
|
877
|
+
if (lastUrl == null) {
|
|
878
|
+
continue;
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
// lastUrl only contains domain. Need to add path and query if they exist.
|
|
882
|
+
if (path) {
|
|
883
|
+
if (lastUrlInvalidMatch) {
|
|
884
|
+
urls.push(lastUrl);
|
|
885
|
+
}
|
|
886
|
+
lastUrl.url = url.replace(domain, lastUrl.url);
|
|
887
|
+
lastUrl.indices[1] = endPosition;
|
|
888
|
+
}
|
|
889
|
+
} else {
|
|
890
|
+
// In the case of t.co URLs, don't allow additional path characters.
|
|
891
|
+
if (url.match(twttr.txt.regexen.validTcoUrl)) {
|
|
892
|
+
url = RegExp.lastMatch;
|
|
893
|
+
endPosition = startPosition + url.length;
|
|
894
|
+
}
|
|
895
|
+
urls.push({
|
|
896
|
+
url: url,
|
|
897
|
+
indices: [startPosition, endPosition]
|
|
898
|
+
});
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
return urls;
|
|
903
|
+
};
|
|
904
|
+
|
|
905
|
+
twttr.txt.extractHashtags = function(text) {
|
|
906
|
+
var hashtagsOnly = [],
|
|
907
|
+
hashtagsWithIndices = twttr.txt.extractHashtagsWithIndices(text);
|
|
908
|
+
|
|
909
|
+
for (var i = 0; i < hashtagsWithIndices.length; i++) {
|
|
910
|
+
hashtagsOnly.push(hashtagsWithIndices[i].hashtag);
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
return hashtagsOnly;
|
|
914
|
+
};
|
|
915
|
+
|
|
916
|
+
twttr.txt.extractHashtagsWithIndices = function(text, options) {
|
|
917
|
+
if (!options) {
|
|
918
|
+
options = {checkUrlOverlap: true};
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
if (!text || !text.match(twttr.txt.regexen.hashSigns)) {
|
|
922
|
+
return [];
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
var tags = [];
|
|
926
|
+
|
|
927
|
+
text.replace(twttr.txt.regexen.validHashtag, function(match, before, hash, hashText, offset, chunk) {
|
|
928
|
+
var after = chunk.slice(offset + match.length);
|
|
929
|
+
if (after.match(twttr.txt.regexen.endHashtagMatch))
|
|
930
|
+
return;
|
|
931
|
+
var startPosition = offset + before.length;
|
|
932
|
+
var endPosition = startPosition + hashText.length + 1;
|
|
933
|
+
tags.push({
|
|
934
|
+
hashtag: hashText,
|
|
935
|
+
indices: [startPosition, endPosition]
|
|
936
|
+
});
|
|
937
|
+
});
|
|
938
|
+
|
|
939
|
+
if (options.checkUrlOverlap) {
|
|
940
|
+
// also extract URL entities
|
|
941
|
+
var urls = twttr.txt.extractUrlsWithIndices(text);
|
|
942
|
+
if (urls.length > 0) {
|
|
943
|
+
var entities = tags.concat(urls);
|
|
944
|
+
// remove overlap
|
|
945
|
+
twttr.txt.removeOverlappingEntities(entities);
|
|
946
|
+
// only push back hashtags
|
|
947
|
+
tags = [];
|
|
948
|
+
for (var i = 0; i < entities.length; i++) {
|
|
949
|
+
if (entities[i].hashtag) {
|
|
950
|
+
tags.push(entities[i]);
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
return tags;
|
|
957
|
+
};
|
|
958
|
+
|
|
959
|
+
twttr.txt.extractCashtags = function(text) {
|
|
960
|
+
var cashtagsOnly = [],
|
|
961
|
+
cashtagsWithIndices = twttr.txt.extractCashtagsWithIndices(text);
|
|
962
|
+
|
|
963
|
+
for (var i = 0; i < cashtagsWithIndices.length; i++) {
|
|
964
|
+
cashtagsOnly.push(cashtagsWithIndices[i].cashtag);
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
return cashtagsOnly;
|
|
968
|
+
};
|
|
969
|
+
|
|
970
|
+
twttr.txt.extractCashtagsWithIndices = function(text) {
|
|
971
|
+
if (!text || text.indexOf("$") == -1) {
|
|
972
|
+
return [];
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
var tags = [];
|
|
976
|
+
|
|
977
|
+
text.replace(twttr.txt.regexen.validCashtag, function(match, before, dollar, cashtag, offset, chunk) {
|
|
978
|
+
var startPosition = offset + before.length;
|
|
979
|
+
var endPosition = startPosition + cashtag.length + 1;
|
|
980
|
+
tags.push({
|
|
981
|
+
cashtag: cashtag,
|
|
982
|
+
indices: [startPosition, endPosition]
|
|
983
|
+
});
|
|
984
|
+
});
|
|
985
|
+
|
|
986
|
+
return tags;
|
|
987
|
+
};
|
|
988
|
+
|
|
989
|
+
twttr.txt.modifyIndicesFromUnicodeToUTF16 = function(text, entities) {
|
|
990
|
+
twttr.txt.convertUnicodeIndices(text, entities, false);
|
|
991
|
+
};
|
|
992
|
+
|
|
993
|
+
twttr.txt.modifyIndicesFromUTF16ToUnicode = function(text, entities) {
|
|
994
|
+
twttr.txt.convertUnicodeIndices(text, entities, true);
|
|
995
|
+
};
|
|
996
|
+
|
|
997
|
+
twttr.txt.getUnicodeTextLength = function(text) {
|
|
998
|
+
return text.replace(twttr.txt.regexen.non_bmp_code_pairs, ' ').length;
|
|
999
|
+
};
|
|
1000
|
+
|
|
1001
|
+
twttr.txt.convertUnicodeIndices = function(text, entities, indicesInUTF16) {
|
|
1002
|
+
if (entities.length == 0) {
|
|
1003
|
+
return;
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
var charIndex = 0;
|
|
1007
|
+
var codePointIndex = 0;
|
|
1008
|
+
|
|
1009
|
+
// sort entities by start index
|
|
1010
|
+
entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; });
|
|
1011
|
+
var entityIndex = 0;
|
|
1012
|
+
var entity = entities[0];
|
|
1013
|
+
|
|
1014
|
+
while (charIndex < text.length) {
|
|
1015
|
+
if (entity.indices[0] == (indicesInUTF16 ? charIndex : codePointIndex)) {
|
|
1016
|
+
var len = entity.indices[1] - entity.indices[0];
|
|
1017
|
+
entity.indices[0] = indicesInUTF16 ? codePointIndex : charIndex;
|
|
1018
|
+
entity.indices[1] = entity.indices[0] + len;
|
|
1019
|
+
|
|
1020
|
+
entityIndex++;
|
|
1021
|
+
if (entityIndex == entities.length) {
|
|
1022
|
+
// no more entity
|
|
1023
|
+
break;
|
|
1024
|
+
}
|
|
1025
|
+
entity = entities[entityIndex];
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
var c = text.charCodeAt(charIndex);
|
|
1029
|
+
if (0xD800 <= c && c <= 0xDBFF && charIndex < text.length - 1) {
|
|
1030
|
+
// Found high surrogate char
|
|
1031
|
+
c = text.charCodeAt(charIndex + 1);
|
|
1032
|
+
if (0xDC00 <= c && c <= 0xDFFF) {
|
|
1033
|
+
// Found surrogate pair
|
|
1034
|
+
charIndex++;
|
|
1035
|
+
}
|
|
1036
|
+
}
|
|
1037
|
+
codePointIndex++;
|
|
1038
|
+
charIndex++;
|
|
1039
|
+
}
|
|
1040
|
+
};
|
|
1041
|
+
|
|
1042
|
+
// this essentially does text.split(/<|>/)
|
|
1043
|
+
// except that won't work in IE, where empty strings are ommitted
|
|
1044
|
+
// so "<>".split(/<|>/) => [] in IE, but is ["", "", ""] in all others
|
|
1045
|
+
// but "<<".split("<") => ["", "", ""]
|
|
1046
|
+
twttr.txt.splitTags = function(text) {
|
|
1047
|
+
var firstSplits = text.split("<"),
|
|
1048
|
+
secondSplits,
|
|
1049
|
+
allSplits = [],
|
|
1050
|
+
split;
|
|
1051
|
+
|
|
1052
|
+
for (var i = 0; i < firstSplits.length; i += 1) {
|
|
1053
|
+
split = firstSplits[i];
|
|
1054
|
+
if (!split) {
|
|
1055
|
+
allSplits.push("");
|
|
1056
|
+
} else {
|
|
1057
|
+
secondSplits = split.split(">");
|
|
1058
|
+
for (var j = 0; j < secondSplits.length; j += 1) {
|
|
1059
|
+
allSplits.push(secondSplits[j]);
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
return allSplits;
|
|
1065
|
+
};
|
|
1066
|
+
|
|
1067
|
+
twttr.txt.hitHighlight = function(text, hits, options) {
|
|
1068
|
+
var defaultHighlightTag = "em";
|
|
1069
|
+
|
|
1070
|
+
hits = hits || [];
|
|
1071
|
+
options = options || {};
|
|
1072
|
+
|
|
1073
|
+
if (hits.length === 0) {
|
|
1074
|
+
return text;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
var tagName = options.tag || defaultHighlightTag,
|
|
1078
|
+
tags = ["<" + tagName + ">", "</" + tagName + ">"],
|
|
1079
|
+
chunks = twttr.txt.splitTags(text),
|
|
1080
|
+
i,
|
|
1081
|
+
j,
|
|
1082
|
+
result = "",
|
|
1083
|
+
chunkIndex = 0,
|
|
1084
|
+
chunk = chunks[0],
|
|
1085
|
+
prevChunksLen = 0,
|
|
1086
|
+
chunkCursor = 0,
|
|
1087
|
+
startInChunk = false,
|
|
1088
|
+
chunkChars = chunk,
|
|
1089
|
+
flatHits = [],
|
|
1090
|
+
index,
|
|
1091
|
+
hit,
|
|
1092
|
+
tag,
|
|
1093
|
+
placed,
|
|
1094
|
+
hitSpot;
|
|
1095
|
+
|
|
1096
|
+
for (i = 0; i < hits.length; i += 1) {
|
|
1097
|
+
for (j = 0; j < hits[i].length; j += 1) {
|
|
1098
|
+
flatHits.push(hits[i][j]);
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
for (index = 0; index < flatHits.length; index += 1) {
|
|
1103
|
+
hit = flatHits[index];
|
|
1104
|
+
tag = tags[index % 2];
|
|
1105
|
+
placed = false;
|
|
1106
|
+
|
|
1107
|
+
while (chunk != null && hit >= prevChunksLen + chunk.length) {
|
|
1108
|
+
result += chunkChars.slice(chunkCursor);
|
|
1109
|
+
if (startInChunk && hit === prevChunksLen + chunkChars.length) {
|
|
1110
|
+
result += tag;
|
|
1111
|
+
placed = true;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
if (chunks[chunkIndex + 1]) {
|
|
1115
|
+
result += "<" + chunks[chunkIndex + 1] + ">";
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
prevChunksLen += chunkChars.length;
|
|
1119
|
+
chunkCursor = 0;
|
|
1120
|
+
chunkIndex += 2;
|
|
1121
|
+
chunk = chunks[chunkIndex];
|
|
1122
|
+
chunkChars = chunk;
|
|
1123
|
+
startInChunk = false;
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
if (!placed && chunk != null) {
|
|
1127
|
+
hitSpot = hit - prevChunksLen;
|
|
1128
|
+
result += chunkChars.slice(chunkCursor, hitSpot) + tag;
|
|
1129
|
+
chunkCursor = hitSpot;
|
|
1130
|
+
if (index % 2 === 0) {
|
|
1131
|
+
startInChunk = true;
|
|
1132
|
+
} else {
|
|
1133
|
+
startInChunk = false;
|
|
1134
|
+
}
|
|
1135
|
+
} else if(!placed) {
|
|
1136
|
+
placed = true;
|
|
1137
|
+
result += tag;
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
if (chunk != null) {
|
|
1142
|
+
if (chunkCursor < chunkChars.length) {
|
|
1143
|
+
result += chunkChars.slice(chunkCursor);
|
|
1144
|
+
}
|
|
1145
|
+
for (index = chunkIndex + 1; index < chunks.length; index += 1) {
|
|
1146
|
+
result += (index % 2 === 0 ? chunks[index] : "<" + chunks[index] + ">");
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
return result;
|
|
1151
|
+
};
|
|
1152
|
+
|
|
1153
|
+
var MAX_LENGTH = 140;
|
|
1154
|
+
|
|
1155
|
+
// Characters not allowed in Tweets
|
|
1156
|
+
var INVALID_CHARACTERS = [
|
|
1157
|
+
// BOM
|
|
1158
|
+
fromCode(0xFFFE),
|
|
1159
|
+
fromCode(0xFEFF),
|
|
1160
|
+
|
|
1161
|
+
// Special
|
|
1162
|
+
fromCode(0xFFFF),
|
|
1163
|
+
|
|
1164
|
+
// Directional Change
|
|
1165
|
+
fromCode(0x202A),
|
|
1166
|
+
fromCode(0x202B),
|
|
1167
|
+
fromCode(0x202C),
|
|
1168
|
+
fromCode(0x202D),
|
|
1169
|
+
fromCode(0x202E)
|
|
1170
|
+
];
|
|
1171
|
+
|
|
1172
|
+
// Returns the length of Tweet text with consideration to t.co URL replacement
|
|
1173
|
+
// and chars outside the basic multilingual plane that use 2 UTF16 code points
|
|
1174
|
+
twttr.txt.getTweetLength = function(text, options) {
|
|
1175
|
+
if (!options) {
|
|
1176
|
+
options = {
|
|
1177
|
+
// These come from https://api.twitter.com/1/help/configuration.json
|
|
1178
|
+
// described by https://dev.twitter.com/docs/api/1/get/help/configuration
|
|
1179
|
+
short_url_length: 22,
|
|
1180
|
+
short_url_length_https: 23
|
|
1181
|
+
};
|
|
1182
|
+
}
|
|
1183
|
+
var textLength = twttr.txt.getUnicodeTextLength(text),
|
|
1184
|
+
urlsWithIndices = twttr.txt.extractUrlsWithIndices(text);
|
|
1185
|
+
twttr.txt.modifyIndicesFromUTF16ToUnicode(text, urlsWithIndices);
|
|
1186
|
+
|
|
1187
|
+
for (var i = 0; i < urlsWithIndices.length; i++) {
|
|
1188
|
+
// Subtract the length of the original URL
|
|
1189
|
+
textLength += urlsWithIndices[i].indices[0] - urlsWithIndices[i].indices[1];
|
|
1190
|
+
|
|
1191
|
+
// Add 23 characters for URL starting with https://
|
|
1192
|
+
// Otherwise add 22 characters
|
|
1193
|
+
if (urlsWithIndices[i].url.toLowerCase().match(twttr.txt.regexen.urlHasHttps)) {
|
|
1194
|
+
textLength += options.short_url_length_https;
|
|
1195
|
+
} else {
|
|
1196
|
+
textLength += options.short_url_length;
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
return textLength;
|
|
1201
|
+
};
|
|
1202
|
+
|
|
1203
|
+
// Check the text for any reason that it may not be valid as a Tweet. This is meant as a pre-validation
|
|
1204
|
+
// before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation
|
|
1205
|
+
// will allow quicker feedback.
|
|
1206
|
+
//
|
|
1207
|
+
// Returns false if this text is valid. Otherwise one of the following strings will be returned:
|
|
1208
|
+
//
|
|
1209
|
+
// "too_long": if the text is too long
|
|
1210
|
+
// "empty": if the text is nil or empty
|
|
1211
|
+
// "invalid_characters": if the text contains non-Unicode or any of the disallowed Unicode characters
|
|
1212
|
+
twttr.txt.isInvalidTweet = function(text) {
|
|
1213
|
+
if (!text) {
|
|
1214
|
+
return "empty";
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1217
|
+
// Determine max length independent of URL length
|
|
1218
|
+
if (twttr.txt.getTweetLength(text) > MAX_LENGTH) {
|
|
1219
|
+
return "too_long";
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
for (var i = 0; i < INVALID_CHARACTERS.length; i++) {
|
|
1223
|
+
if (text.indexOf(INVALID_CHARACTERS[i]) >= 0) {
|
|
1224
|
+
return "invalid_characters";
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
return false;
|
|
1229
|
+
};
|
|
1230
|
+
|
|
1231
|
+
twttr.txt.isValidTweetText = function(text) {
|
|
1232
|
+
return !twttr.txt.isInvalidTweet(text);
|
|
1233
|
+
};
|
|
1234
|
+
|
|
1235
|
+
twttr.txt.isValidUsername = function(username) {
|
|
1236
|
+
if (!username) {
|
|
1237
|
+
return false;
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
var extracted = twttr.txt.extractMentions(username);
|
|
1241
|
+
|
|
1242
|
+
// Should extract the username minus the @ sign, hence the .slice(1)
|
|
1243
|
+
return extracted.length === 1 && extracted[0] === username.slice(1);
|
|
1244
|
+
};
|
|
1245
|
+
|
|
1246
|
+
var VALID_LIST_RE = regexSupplant(/^#{validMentionOrList}$/);
|
|
1247
|
+
|
|
1248
|
+
twttr.txt.isValidList = function(usernameList) {
|
|
1249
|
+
var match = usernameList.match(VALID_LIST_RE);
|
|
1250
|
+
|
|
1251
|
+
// Must have matched and had nothing before or after
|
|
1252
|
+
return !!(match && match[1] == "" && match[4]);
|
|
1253
|
+
};
|
|
1254
|
+
|
|
1255
|
+
twttr.txt.isValidHashtag = function(hashtag) {
|
|
1256
|
+
if (!hashtag) {
|
|
1257
|
+
return false;
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
var extracted = twttr.txt.extractHashtags(hashtag);
|
|
1261
|
+
|
|
1262
|
+
// Should extract the hashtag minus the # sign, hence the .slice(1)
|
|
1263
|
+
return extracted.length === 1 && extracted[0] === hashtag.slice(1);
|
|
1264
|
+
};
|
|
1265
|
+
|
|
1266
|
+
twttr.txt.isValidUrl = function(url, unicodeDomains, requireProtocol) {
|
|
1267
|
+
if (unicodeDomains == null) {
|
|
1268
|
+
unicodeDomains = true;
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
if (requireProtocol == null) {
|
|
1272
|
+
requireProtocol = true;
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
if (!url) {
|
|
1276
|
+
return false;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
var urlParts = url.match(twttr.txt.regexen.validateUrlUnencoded);
|
|
1280
|
+
|
|
1281
|
+
if (!urlParts || urlParts[0] !== url) {
|
|
1282
|
+
return false;
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
var scheme = urlParts[1],
|
|
1286
|
+
authority = urlParts[2],
|
|
1287
|
+
path = urlParts[3],
|
|
1288
|
+
query = urlParts[4],
|
|
1289
|
+
fragment = urlParts[5];
|
|
1290
|
+
|
|
1291
|
+
if (!(
|
|
1292
|
+
(!requireProtocol || (isValidMatch(scheme, twttr.txt.regexen.validateUrlScheme) && scheme.match(/^https?$/i))) &&
|
|
1293
|
+
isValidMatch(path, twttr.txt.regexen.validateUrlPath) &&
|
|
1294
|
+
isValidMatch(query, twttr.txt.regexen.validateUrlQuery, true) &&
|
|
1295
|
+
isValidMatch(fragment, twttr.txt.regexen.validateUrlFragment, true)
|
|
1296
|
+
)) {
|
|
1297
|
+
return false;
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
return (unicodeDomains && isValidMatch(authority, twttr.txt.regexen.validateUrlUnicodeAuthority)) ||
|
|
1301
|
+
(!unicodeDomains && isValidMatch(authority, twttr.txt.regexen.validateUrlAuthority));
|
|
1302
|
+
};
|
|
1303
|
+
|
|
1304
|
+
function isValidMatch(string, regex, optional) {
|
|
1305
|
+
if (!optional) {
|
|
1306
|
+
// RegExp["$&"] is the text of the last match
|
|
1307
|
+
// blank strings are ok, but are falsy, so we check stringiness instead of truthiness
|
|
1308
|
+
return ((typeof string === "string") && string.match(regex) && RegExp["$&"] === string);
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
// RegExp["$&"] is the text of the last match
|
|
1312
|
+
return (!string || (string.match(regex) && RegExp["$&"] === string));
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
if (typeof module != 'undefined' && module.exports) {
|
|
1316
|
+
module.exports = twttr.txt;
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
if (typeof window != 'undefined') {
|
|
1320
|
+
if (window.twttr) {
|
|
1321
|
+
for (var prop in twttr) {
|
|
1322
|
+
window.twttr[prop] = twttr[prop];
|
|
1323
|
+
}
|
|
1324
|
+
} else {
|
|
1325
|
+
window.twttr = twttr;
|
|
1326
|
+
}
|
|
1327
|
+
}
|
|
1328
|
+
})();
|