xregexp-rails 1.5.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/assets/javascripts/xregexp.js +1125 -524
- data/app/assets/javascripts/xregexp/build.js +146 -0
- data/app/assets/javascripts/xregexp/matchrecursive.js +181 -0
- data/app/assets/javascripts/xregexp/prototypes.js +115 -0
- data/app/assets/javascripts/xregexp/unicode-base.js +152 -0
- data/app/assets/javascripts/xregexp/unicode-blocks.js +183 -0
- data/app/assets/javascripts/xregexp/unicode-categories.js +102 -0
- data/app/assets/javascripts/xregexp/unicode-properties.js +39 -0
- data/app/assets/javascripts/xregexp/unicode-scripts.js +98 -0
- data/lib/xregexp-rails/version.rb +1 -1
- metadata +9 -6
- data/app/assets/javascripts/xregexp-matchrecursive.js +0 -151
- data/app/assets/javascripts/xregexp-unicode-base.js +0 -73
- data/app/assets/javascripts/xregexp-unicode-blocks.js +0 -187
- data/app/assets/javascripts/xregexp-unicode-categories.js +0 -102
- data/app/assets/javascripts/xregexp-unicode-scripts.js +0 -102
@@ -0,0 +1,146 @@
|
|
1
|
+
/*!
|
2
|
+
* XRegExp.build v0.1.0
|
3
|
+
* (c) 2012 Steven Levithan <http://xregexp.com/>
|
4
|
+
* MIT License
|
5
|
+
* Inspired by RegExp.create by Lea Verou <http://lea.verou.me/>
|
6
|
+
*/
|
7
|
+
|
8
|
+
(function (XRegExp) {
|
9
|
+
"use strict";
|
10
|
+
|
11
|
+
var subparts = /(\()(?!\?)|\\([1-9]\d*)|\\[\s\S]|\[(?:[^\\\]]|\\[\s\S])*]/g,
|
12
|
+
parts = XRegExp.union([/\({{([\w$]+)}}\)|{{([\w$]+)}}/, subparts], "g");
|
13
|
+
|
14
|
+
/**
|
15
|
+
* Strips a leading `^` and trailing unescaped `$`, if both are present.
|
16
|
+
* @private
|
17
|
+
* @param {String} pattern Pattern to process.
|
18
|
+
* @returns {String} Pattern with edge anchors removed.
|
19
|
+
*/
|
20
|
+
function deanchor(pattern) {
|
21
|
+
var startAnchor = /^(?:\(\?:\))?\^/, // Leading `^` or `(?:)^` (handles /x cruft)
|
22
|
+
endAnchor = /\$(?:\(\?:\))?$/; // Trailing `$` or `$(?:)` (handles /x cruft)
|
23
|
+
if (endAnchor.test(pattern.replace(/\\[\s\S]/g, ""))) { // Ensure trailing `$` isn't escaped
|
24
|
+
return pattern.replace(startAnchor, "").replace(endAnchor, "");
|
25
|
+
}
|
26
|
+
return pattern;
|
27
|
+
}
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Converts the provided value to an XRegExp.
|
31
|
+
* @private
|
32
|
+
* @param {String|RegExp} value Value to convert.
|
33
|
+
* @returns {RegExp} XRegExp object with XRegExp syntax applied.
|
34
|
+
*/
|
35
|
+
function asXRegExp(value) {
|
36
|
+
return XRegExp.isRegExp(value) ?
|
37
|
+
(value.xregexp && !value.xregexp.isNative ? value : XRegExp(value.source)) :
|
38
|
+
XRegExp(value);
|
39
|
+
}
|
40
|
+
|
41
|
+
/**
|
42
|
+
* Builds regexes using named subpatterns, for readability and pattern reuse. Backreferences in the
|
43
|
+
* outer pattern and provided subpatterns are automatically renumbered to work correctly. Native
|
44
|
+
* flags used by provided subpatterns are ignored in favor of the `flags` argument.
|
45
|
+
* @memberOf XRegExp
|
46
|
+
* @param {String} pattern XRegExp pattern using `{{name}}` for embedded subpatterns. Allows
|
47
|
+
* `({{name}})` as shorthand for `(?<name>{{name}})`. Patterns cannot be embedded within
|
48
|
+
* character classes.
|
49
|
+
* @param {Object} subs Lookup object for named subpatterns. Values can be strings or regexes. A
|
50
|
+
* leading `^` and trailing unescaped `$` are stripped from subpatterns, if both are present.
|
51
|
+
* @param {String} [flags] Any combination of XRegExp flags.
|
52
|
+
* @returns {RegExp} Regex with interpolated subpatterns.
|
53
|
+
* @example
|
54
|
+
*
|
55
|
+
* var time = XRegExp.build('(?x)^ {{hours}} ({{minutes}}) $', {
|
56
|
+
* hours: XRegExp.build('{{h12}} : | {{h24}}', {
|
57
|
+
* h12: /1[0-2]|0?[1-9]/,
|
58
|
+
* h24: /2[0-3]|[01][0-9]/
|
59
|
+
* }, 'x'),
|
60
|
+
* minutes: /^[0-5][0-9]$/
|
61
|
+
* });
|
62
|
+
* time.test('10:59'); // -> true
|
63
|
+
* XRegExp.exec('10:59', time).minutes; // -> '59'
|
64
|
+
*/
|
65
|
+
XRegExp.build = function (pattern, subs, flags) {
|
66
|
+
var inlineFlags = /^\(\?([\w$]+)\)/.exec(pattern),
|
67
|
+
data = {},
|
68
|
+
numCaps = 0, // Caps is short for captures
|
69
|
+
numPriorCaps,
|
70
|
+
numOuterCaps = 0,
|
71
|
+
outerCapsMap = [0],
|
72
|
+
outerCapNames,
|
73
|
+
sub,
|
74
|
+
p;
|
75
|
+
|
76
|
+
// Add flags within a leading mode modifier to the overall pattern's flags
|
77
|
+
if (inlineFlags) {
|
78
|
+
flags = flags || "";
|
79
|
+
inlineFlags[1].replace(/./g, function (flag) {
|
80
|
+
flags += (flags.indexOf(flag) > -1 ? "" : flag); // Don't add duplicates
|
81
|
+
});
|
82
|
+
}
|
83
|
+
|
84
|
+
for (p in subs) {
|
85
|
+
if (subs.hasOwnProperty(p)) {
|
86
|
+
// Passing to XRegExp enables entended syntax for subpatterns provided as strings
|
87
|
+
// and ensures independent validity, lest an unescaped `(`, `)`, `[`, or trailing
|
88
|
+
// `\` breaks the `(?:)` wrapper. For subpatterns provided as regexes, it dies on
|
89
|
+
// octals and adds the `xregexp` property, for simplicity
|
90
|
+
sub = asXRegExp(subs[p]);
|
91
|
+
// Deanchoring allows embedding independently useful anchored regexes. If you
|
92
|
+
// really need to keep your anchors, double them (i.e., `^^...$$`)
|
93
|
+
data[p] = {pattern: deanchor(sub.source), names: sub.xregexp.captureNames || []};
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
// Passing to XRegExp dies on octals and ensures the outer pattern is independently valid;
|
98
|
+
// helps keep this simple. Named captures will be put back
|
99
|
+
pattern = asXRegExp(pattern);
|
100
|
+
outerCapNames = pattern.xregexp.captureNames || [];
|
101
|
+
pattern = pattern.source.replace(parts, function ($0, $1, $2, $3, $4) {
|
102
|
+
var subName = $1 || $2, capName, intro;
|
103
|
+
if (subName) { // Named subpattern
|
104
|
+
if (!data.hasOwnProperty(subName)) {
|
105
|
+
throw new ReferenceError("undefined property " + $0);
|
106
|
+
}
|
107
|
+
if ($1) { // Named subpattern was wrapped in a capturing group
|
108
|
+
capName = outerCapNames[numOuterCaps];
|
109
|
+
outerCapsMap[++numOuterCaps] = ++numCaps;
|
110
|
+
// If it's a named group, preserve the name. Otherwise, use the subpattern name
|
111
|
+
// as the capture name
|
112
|
+
intro = "(?<" + (capName || subName) + ">";
|
113
|
+
} else {
|
114
|
+
intro = "(?:";
|
115
|
+
}
|
116
|
+
numPriorCaps = numCaps;
|
117
|
+
return intro + data[subName].pattern.replace(subparts, function (match, paren, backref) {
|
118
|
+
if (paren) { // Capturing group
|
119
|
+
capName = data[subName].names[numCaps - numPriorCaps];
|
120
|
+
++numCaps;
|
121
|
+
if (capName) { // If the current capture has a name, preserve the name
|
122
|
+
return "(?<" + capName + ">";
|
123
|
+
}
|
124
|
+
} else if (backref) { // Backreference
|
125
|
+
return "\\" + (+backref + numPriorCaps); // Rewrite the backreference
|
126
|
+
}
|
127
|
+
return match;
|
128
|
+
}) + ")";
|
129
|
+
}
|
130
|
+
if ($3) { // Capturing group
|
131
|
+
capName = outerCapNames[numOuterCaps];
|
132
|
+
outerCapsMap[++numOuterCaps] = ++numCaps;
|
133
|
+
if (capName) { // If the current capture has a name, preserve the name
|
134
|
+
return "(?<" + capName + ">";
|
135
|
+
}
|
136
|
+
} else if ($4) { // Backreference
|
137
|
+
return "\\" + outerCapsMap[+$4]; // Rewrite the backreference
|
138
|
+
}
|
139
|
+
return $0;
|
140
|
+
});
|
141
|
+
|
142
|
+
return XRegExp(pattern, flags);
|
143
|
+
};
|
144
|
+
|
145
|
+
}(XRegExp));
|
146
|
+
|
@@ -0,0 +1,181 @@
|
|
1
|
+
/*!
|
2
|
+
* XRegExp.matchRecursive v0.2.0
|
3
|
+
* (c) 2009-2012 Steven Levithan <http://xregexp.com/>
|
4
|
+
* MIT License
|
5
|
+
*/
|
6
|
+
|
7
|
+
(function (XRegExp) {
|
8
|
+
"use strict";
|
9
|
+
|
10
|
+
/**
|
11
|
+
* Returns a match detail object composed of the provided values.
|
12
|
+
* @private
|
13
|
+
*/
|
14
|
+
function row(value, name, start, end) {
|
15
|
+
return {value:value, name:name, start:start, end:end};
|
16
|
+
}
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Returns an array of match strings between outermost left and right delimiters, or an array of
|
20
|
+
* objects with detailed match parts and position data. An error is thrown if delimiters are
|
21
|
+
* unbalanced within the data.
|
22
|
+
* @memberOf XRegExp
|
23
|
+
* @param {String} str String to search.
|
24
|
+
* @param {String} left Left delimiter as an XRegExp pattern.
|
25
|
+
* @param {String} right Right delimiter as an XRegExp pattern.
|
26
|
+
* @param {String} [flags] Flags for the left and right delimiters. Use any of: `gimnsxy`.
|
27
|
+
* @param {Object} [options] Lets you specify `valueNames` and `escapeChar` options.
|
28
|
+
* @returns {Array} Array of matches, or an empty array.
|
29
|
+
* @example
|
30
|
+
*
|
31
|
+
* // Basic usage
|
32
|
+
* var str = '(t((e))s)t()(ing)';
|
33
|
+
* XRegExp.matchRecursive(str, '\\(', '\\)', 'g');
|
34
|
+
* // -> ['t((e))s', '', 'ing']
|
35
|
+
*
|
36
|
+
* // Extended information mode with valueNames
|
37
|
+
* str = 'Here is <div> <div>an</div></div> example';
|
38
|
+
* XRegExp.matchRecursive(str, '<div\\s*>', '</div>', 'gi', {
|
39
|
+
* valueNames: ['between', 'left', 'match', 'right']
|
40
|
+
* });
|
41
|
+
* // -> [
|
42
|
+
* // {name: 'between', value: 'Here is ', start: 0, end: 8},
|
43
|
+
* // {name: 'left', value: '<div>', start: 8, end: 13},
|
44
|
+
* // {name: 'match', value: ' <div>an</div>', start: 13, end: 27},
|
45
|
+
* // {name: 'right', value: '</div>', start: 27, end: 33},
|
46
|
+
* // {name: 'between', value: ' example', start: 33, end: 41}
|
47
|
+
* // ]
|
48
|
+
*
|
49
|
+
* // Omitting unneeded parts with null valueNames, and using escapeChar
|
50
|
+
* str = '...{1}\\{{function(x,y){return y+x;}}';
|
51
|
+
* XRegExp.matchRecursive(str, '{', '}', 'g', {
|
52
|
+
* valueNames: ['literal', null, 'value', null],
|
53
|
+
* escapeChar: '\\'
|
54
|
+
* });
|
55
|
+
* // -> [
|
56
|
+
* // {name: 'literal', value: '...', start: 0, end: 3},
|
57
|
+
* // {name: 'value', value: '1', start: 4, end: 5},
|
58
|
+
* // {name: 'literal', value: '\\{', start: 6, end: 8},
|
59
|
+
* // {name: 'value', value: 'function(x,y){return y+x;}', start: 9, end: 35}
|
60
|
+
* // ]
|
61
|
+
*
|
62
|
+
* // Sticky mode via flag y
|
63
|
+
* str = '<1><<<2>>><3>4<5>';
|
64
|
+
* XRegExp.matchRecursive(str, '<', '>', 'gy');
|
65
|
+
* // -> ['1', '<<2>>', '3']
|
66
|
+
*/
|
67
|
+
XRegExp.matchRecursive = function (str, left, right, flags, options) {
|
68
|
+
flags = flags || "";
|
69
|
+
options = options || {};
|
70
|
+
var global = flags.indexOf("g") > -1,
|
71
|
+
sticky = flags.indexOf("y") > -1,
|
72
|
+
basicFlags = flags.replace(/y/g, ""), // Flag y controlled internally
|
73
|
+
escapeChar = options.escapeChar,
|
74
|
+
vN = options.valueNames,
|
75
|
+
output = [],
|
76
|
+
openTokens = 0,
|
77
|
+
delimStart = 0,
|
78
|
+
delimEnd = 0,
|
79
|
+
lastOuterEnd = 0,
|
80
|
+
outerStart,
|
81
|
+
innerStart,
|
82
|
+
leftMatch,
|
83
|
+
rightMatch,
|
84
|
+
esc;
|
85
|
+
left = XRegExp(left, basicFlags);
|
86
|
+
right = XRegExp(right, basicFlags);
|
87
|
+
|
88
|
+
if (escapeChar) {
|
89
|
+
if (escapeChar.length > 1) {
|
90
|
+
throw new SyntaxError("can't use more than one escape character");
|
91
|
+
}
|
92
|
+
escapeChar = XRegExp.escape(escapeChar);
|
93
|
+
// Using XRegExp.union safely rewrites backreferences in `left` and `right`
|
94
|
+
esc = new RegExp(
|
95
|
+
"(?:" + escapeChar + "[\\S\\s]|(?:(?!" + XRegExp.union([left, right]).source + ")[^" + escapeChar + "])+)+",
|
96
|
+
flags.replace(/[^im]+/g, "") // Flags gy not needed here; flags nsx handled by XRegExp
|
97
|
+
);
|
98
|
+
}
|
99
|
+
|
100
|
+
while (true) {
|
101
|
+
// If using an escape character, advance to the delimiter's next starting position,
|
102
|
+
// skipping any escaped characters in between
|
103
|
+
if (escapeChar) {
|
104
|
+
delimEnd += (XRegExp.exec(str, esc, delimEnd, "sticky") || [""])[0].length;
|
105
|
+
}
|
106
|
+
leftMatch = XRegExp.exec(str, left, delimEnd);
|
107
|
+
rightMatch = XRegExp.exec(str, right, delimEnd);
|
108
|
+
// Keep the leftmost match only
|
109
|
+
if (leftMatch && rightMatch) {
|
110
|
+
if (leftMatch.index <= rightMatch.index) {
|
111
|
+
rightMatch = null;
|
112
|
+
} else {
|
113
|
+
leftMatch = null;
|
114
|
+
}
|
115
|
+
}
|
116
|
+
/* Paths (LM:leftMatch, RM:rightMatch, OT:openTokens):
|
117
|
+
LM | RM | OT | Result
|
118
|
+
1 | 0 | 1 | loop
|
119
|
+
1 | 0 | 0 | loop
|
120
|
+
0 | 1 | 1 | loop
|
121
|
+
0 | 1 | 0 | throw
|
122
|
+
0 | 0 | 1 | throw
|
123
|
+
0 | 0 | 0 | break
|
124
|
+
* Doesn't include the sticky mode special case
|
125
|
+
* Loop ends after the first completed match if `!global` */
|
126
|
+
if (leftMatch || rightMatch) {
|
127
|
+
delimStart = (leftMatch || rightMatch).index;
|
128
|
+
delimEnd = delimStart + (leftMatch || rightMatch)[0].length;
|
129
|
+
} else if (!openTokens) {
|
130
|
+
break;
|
131
|
+
}
|
132
|
+
if (sticky && !openTokens && delimStart > lastOuterEnd) {
|
133
|
+
break;
|
134
|
+
}
|
135
|
+
if (leftMatch) {
|
136
|
+
if (!openTokens) {
|
137
|
+
outerStart = delimStart;
|
138
|
+
innerStart = delimEnd;
|
139
|
+
}
|
140
|
+
++openTokens;
|
141
|
+
} else if (rightMatch && openTokens) {
|
142
|
+
if (!--openTokens) {
|
143
|
+
if (vN) {
|
144
|
+
if (vN[0] && outerStart > lastOuterEnd) {
|
145
|
+
output.push(row(vN[0], str.slice(lastOuterEnd, outerStart), lastOuterEnd, outerStart));
|
146
|
+
}
|
147
|
+
if (vN[1]) {
|
148
|
+
output.push(row(vN[1], str.slice(outerStart, innerStart), outerStart, innerStart));
|
149
|
+
}
|
150
|
+
if (vN[2]) {
|
151
|
+
output.push(row(vN[2], str.slice(innerStart, delimStart), innerStart, delimStart));
|
152
|
+
}
|
153
|
+
if (vN[3]) {
|
154
|
+
output.push(row(vN[3], str.slice(delimStart, delimEnd), delimStart, delimEnd));
|
155
|
+
}
|
156
|
+
} else {
|
157
|
+
output.push(str.slice(innerStart, delimStart));
|
158
|
+
}
|
159
|
+
lastOuterEnd = delimEnd;
|
160
|
+
if (!global) {
|
161
|
+
break;
|
162
|
+
}
|
163
|
+
}
|
164
|
+
} else {
|
165
|
+
throw new Error("string contains unbalanced delimiters");
|
166
|
+
}
|
167
|
+
// If the delimiter matched an empty string, avoid an infinite loop
|
168
|
+
if (delimStart === delimEnd) {
|
169
|
+
++delimEnd;
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
if (global && !sticky && vN && vN[0] && str.length > lastOuterEnd) {
|
174
|
+
output.push(row(vN[0], str.slice(lastOuterEnd), lastOuterEnd, str.length));
|
175
|
+
}
|
176
|
+
|
177
|
+
return output;
|
178
|
+
};
|
179
|
+
|
180
|
+
}(XRegExp));
|
181
|
+
|
@@ -0,0 +1,115 @@
|
|
1
|
+
/*!
|
2
|
+
* XRegExp Prototype Methods v1.0.0
|
3
|
+
* (c) 2012 Steven Levithan <http://xregexp.com/>
|
4
|
+
* MIT License
|
5
|
+
*/
|
6
|
+
|
7
|
+
/**
|
8
|
+
* Adds a collection of methods to `XRegExp.prototype`. RegExp objects copied by XRegExp are also
|
9
|
+
* augmented with any `XRegExp.prototype` methods. Hence, the following work equivalently:
|
10
|
+
*
|
11
|
+
* XRegExp('[a-z]', 'ig').xexec('abc');
|
12
|
+
* XRegExp(/[a-z]/ig).xexec('abc');
|
13
|
+
* XRegExp.globalize(/[a-z]/i).xexec('abc');
|
14
|
+
*/
|
15
|
+
(function (XRegExp) {
|
16
|
+
"use strict";
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Copy properties of `b` to `a`.
|
20
|
+
* @private
|
21
|
+
* @param {Object} a Object that will receive new properties.
|
22
|
+
* @param {Object} b Object whose properties will be copied.
|
23
|
+
*/
|
24
|
+
function extend(a, b) {
|
25
|
+
for (var p in b) {
|
26
|
+
if (b.hasOwnProperty(p)) {
|
27
|
+
a[p] = b[p];
|
28
|
+
}
|
29
|
+
}
|
30
|
+
//return a;
|
31
|
+
}
|
32
|
+
|
33
|
+
extend(XRegExp.prototype, {
|
34
|
+
|
35
|
+
/**
|
36
|
+
* Implicitly calls the regex's `test` method with the first value in the provided arguments array.
|
37
|
+
* @memberOf XRegExp.prototype
|
38
|
+
* @param {*} context Ignored. Accepted only for congruity with `Function.prototype.apply`.
|
39
|
+
* @param {Array} args Array with the string to search as its first value.
|
40
|
+
* @returns {Boolean} Whether the regex matched the provided value.
|
41
|
+
* @example
|
42
|
+
*
|
43
|
+
* XRegExp('[a-z]').apply(null, ['abc']); // -> true
|
44
|
+
*/
|
45
|
+
apply: function (context, args) {
|
46
|
+
return this.test(args[0]);
|
47
|
+
},
|
48
|
+
|
49
|
+
/**
|
50
|
+
* Implicitly calls the regex's `test` method with the provided string.
|
51
|
+
* @memberOf XRegExp.prototype
|
52
|
+
* @param {*} context Ignored. Accepted only for congruity with `Function.prototype.call`.
|
53
|
+
* @param {String} str String to search.
|
54
|
+
* @returns {Boolean} Whether the regex matched the provided value.
|
55
|
+
* @example
|
56
|
+
*
|
57
|
+
* XRegExp('[a-z]').call(null, 'abc'); // -> true
|
58
|
+
*/
|
59
|
+
call: function (context, str) {
|
60
|
+
return this.test(str);
|
61
|
+
},
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Implicitly calls {@link #XRegExp.forEach}.
|
65
|
+
* @memberOf XRegExp.prototype
|
66
|
+
* @example
|
67
|
+
*
|
68
|
+
* XRegExp('\\d').forEach('1a2345', function (match, i) {
|
69
|
+
* if (i % 2) this.push(+match[0]);
|
70
|
+
* }, []);
|
71
|
+
* // -> [2, 4]
|
72
|
+
*/
|
73
|
+
forEach: function (str, callback, context) {
|
74
|
+
return XRegExp.forEach(str, this, callback, context);
|
75
|
+
},
|
76
|
+
|
77
|
+
/**
|
78
|
+
* Implicitly calls {@link #XRegExp.globalize}.
|
79
|
+
* @memberOf XRegExp.prototype
|
80
|
+
* @example
|
81
|
+
*
|
82
|
+
* var globalCopy = XRegExp('regex').globalize();
|
83
|
+
* globalCopy.global; // -> true
|
84
|
+
*/
|
85
|
+
globalize: function () {
|
86
|
+
return XRegExp.globalize(this);
|
87
|
+
},
|
88
|
+
|
89
|
+
/**
|
90
|
+
* Implicitly calls {@link #XRegExp.exec}.
|
91
|
+
* @memberOf XRegExp.prototype
|
92
|
+
* @example
|
93
|
+
*
|
94
|
+
* var match = XRegExp('U\\+(?<hex>[0-9A-F]{4})').xexec('U+2620');
|
95
|
+
* match.hex; // -> '2620'
|
96
|
+
*/
|
97
|
+
xexec: function (str, pos, sticky) {
|
98
|
+
return XRegExp.exec(str, this, pos, sticky);
|
99
|
+
},
|
100
|
+
|
101
|
+
/**
|
102
|
+
* Implicitly calls {@link #XRegExp.test}.
|
103
|
+
* @memberOf XRegExp.prototype
|
104
|
+
* @example
|
105
|
+
*
|
106
|
+
* XRegExp('c').xtest('abc'); // -> true
|
107
|
+
*/
|
108
|
+
xtest: function (str, pos, sticky) {
|
109
|
+
return XRegExp.test(str, this, pos, sticky);
|
110
|
+
}
|
111
|
+
|
112
|
+
});
|
113
|
+
|
114
|
+
}(XRegExp));
|
115
|
+
|
@@ -0,0 +1,152 @@
|
|
1
|
+
/*!
|
2
|
+
* XRegExp Unicode Base v1.0.0
|
3
|
+
* (c) 2008-2012 Steven Levithan <http://xregexp.com/>
|
4
|
+
* MIT License
|
5
|
+
* Uses Unicode 6.1 <http://unicode.org/>
|
6
|
+
*/
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Adds support for the `\p{L}` or `\p{Letter}` Unicode category. Addon packages for other Unicode
|
10
|
+
* categories, scripts, blocks, and properties are available separately. All Unicode tokens can be
|
11
|
+
* inverted using `\P{..}` or `\p{^..}`. Token names are case insensitive, and any spaces, hyphens,
|
12
|
+
* and underscores are ignored.
|
13
|
+
* @requires XRegExp
|
14
|
+
*/
|
15
|
+
(function (XRegExp) {
|
16
|
+
"use strict";
|
17
|
+
|
18
|
+
var unicode = {};
|
19
|
+
|
20
|
+
/*--------------------------------------
|
21
|
+
* Private helper functions
|
22
|
+
*------------------------------------*/
|
23
|
+
|
24
|
+
// Generates a standardized token name (lowercase, with hyphens, spaces, and underscores removed)
|
25
|
+
function slug(name) {
|
26
|
+
return name.replace(/[- _]+/g, "").toLowerCase();
|
27
|
+
}
|
28
|
+
|
29
|
+
// Expands a list of Unicode code points and ranges to be usable in a regex character class
|
30
|
+
function expand(str) {
|
31
|
+
return str.replace(/\w{4}/g, "\\u$&");
|
32
|
+
}
|
33
|
+
|
34
|
+
// Adds leading zeros if shorter than four characters
|
35
|
+
function pad4(str) {
|
36
|
+
while (str.length < 4) {
|
37
|
+
str = "0" + str;
|
38
|
+
}
|
39
|
+
return str;
|
40
|
+
}
|
41
|
+
|
42
|
+
// Converts a hexadecimal number to decimal
|
43
|
+
function dec(hex) {
|
44
|
+
return parseInt(hex, 16);
|
45
|
+
}
|
46
|
+
|
47
|
+
// Converts a decimal number to hexadecimal
|
48
|
+
function hex(dec) {
|
49
|
+
return parseInt(dec, 10).toString(16);
|
50
|
+
}
|
51
|
+
|
52
|
+
// Inverts a list of Unicode code points and ranges
|
53
|
+
function invert(range) {
|
54
|
+
var output = [],
|
55
|
+
lastEnd = -1,
|
56
|
+
start;
|
57
|
+
XRegExp.forEach(range, /\\u(\w{4})(?:-\\u(\w{4}))?/, function (m) {
|
58
|
+
start = dec(m[1]);
|
59
|
+
if (start > (lastEnd + 1)) {
|
60
|
+
output.push("\\u" + pad4(hex(lastEnd + 1)));
|
61
|
+
if (start > (lastEnd + 2)) {
|
62
|
+
output.push("-\\u" + pad4(hex(start - 1)));
|
63
|
+
}
|
64
|
+
}
|
65
|
+
lastEnd = dec(m[2] || m[1]);
|
66
|
+
});
|
67
|
+
if (lastEnd < 0xFFFF) {
|
68
|
+
output.push("\\u" + pad4(hex(lastEnd + 1)));
|
69
|
+
if (lastEnd < 0xFFFE) {
|
70
|
+
output.push("-\\uFFFF");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
return output.join("");
|
74
|
+
}
|
75
|
+
|
76
|
+
// Generates an inverted token on first use
|
77
|
+
function cacheInversion(item) {
|
78
|
+
return unicode["^" + item] || (unicode["^" + item] = invert(unicode[item]));
|
79
|
+
}
|
80
|
+
|
81
|
+
/*--------------------------------------
|
82
|
+
* Core functionality
|
83
|
+
*------------------------------------*/
|
84
|
+
|
85
|
+
XRegExp.install("extensibility");
|
86
|
+
|
87
|
+
/**
|
88
|
+
* Adds to the list of Unicode properties that XRegExp regexes can match via \p{..} or \P{..}.
|
89
|
+
* @memberOf XRegExp
|
90
|
+
* @param {Object} pack Named sets of Unicode code points and ranges.
|
91
|
+
* @param {Object} [aliases] Aliases for the primary token names.
|
92
|
+
* @example
|
93
|
+
*
|
94
|
+
* XRegExp.addUnicodePackage({
|
95
|
+
* XDigit: '0030-00390041-00460061-0066' // 0-9A-Fa-f
|
96
|
+
* }, {
|
97
|
+
* XDigit: 'Hexadecimal'
|
98
|
+
* });
|
99
|
+
*/
|
100
|
+
XRegExp.addUnicodePackage = function (pack, aliases) {
|
101
|
+
var p;
|
102
|
+
if (!XRegExp.isInstalled("extensibility")) {
|
103
|
+
throw new Error("extensibility must be installed before adding Unicode packages");
|
104
|
+
}
|
105
|
+
if (pack) {
|
106
|
+
for (p in pack) {
|
107
|
+
if (pack.hasOwnProperty(p)) {
|
108
|
+
unicode[slug(p)] = expand(pack[p]);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
if (aliases) {
|
113
|
+
for (p in aliases) {
|
114
|
+
if (aliases.hasOwnProperty(p)) {
|
115
|
+
unicode[slug(aliases[p])] = unicode[slug(p)];
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
};
|
120
|
+
|
121
|
+
/* Adds data for the Unicode `Letter` category. Addon packages include other categories, scripts,
|
122
|
+
* blocks, and properties.
|
123
|
+
*/
|
124
|
+
XRegExp.addUnicodePackage({
|
125
|
+
L: "0041-005A0061-007A00AA00B500BA00C0-00D600D8-00F600F8-02C102C6-02D102E0-02E402EC02EE0370-037403760377037A-037D03860388-038A038C038E-03A103A3-03F503F7-0481048A-05270531-055605590561-058705D0-05EA05F0-05F20620-064A066E066F0671-06D306D506E506E606EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA07F407F507FA0800-0815081A082408280840-085808A008A2-08AC0904-0939093D09500958-09610971-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E460E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EC60EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10A0-10C510C710CD10D0-10FA10FC-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317D717DC1820-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541AA71B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C7D1CE9-1CEC1CEE-1CF11CF51CF61D00-1DBF1E00-1F151F18-1F1D1F20-1F451F48-1F4D1F50-1F571F591F5B1F5D1F5F-1F7D1F80-1FB41FB6-1FBC1FBE1FC2-1FC41FC6-1FCC1FD0-1FD31FD6-1FDB1FE0-1FEC1FF2-1FF41FF6-1FFC2071207F2090-209C21022107210A-211321152119-211D212421262128212A-212D212F-2139213C-213F2145-2149214E218321842C00-2C2E2C30-2C5E2C60-2CE42CEB-2CEE2CF22CF32D00-2D252D272D2D2D30-2D672D6F2D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE2E2F300530063031-3035303B303C3041-3096309D-309F30A1-30FA30FC-30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A48CA4D0-A4FDA500-A60CA610-A61FA62AA62BA640-A66EA67F-A697A6A0-A6E5A717-A71FA722-A788A78B-A78EA790-A793A7A0-A7AAA7F8-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2A9CFAA00-AA28AA40-AA42AA44-AA4BAA60-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADB-AADDAAE0-AAEAAAF2-AAF4AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB00-FB06FB13-FB17FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF21-FF3AFF41-FF5AFF66-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC"
|
126
|
+
}, {
|
127
|
+
L: "Letter"
|
128
|
+
});
|
129
|
+
|
130
|
+
/* Adds Unicode property syntax to XRegExp: \p{..}, \P{..}, \p{^..}
|
131
|
+
*/
|
132
|
+
XRegExp.addToken(
|
133
|
+
/\\([pP]){(\^?)([^}]*)}/,
|
134
|
+
function (match, scope) {
|
135
|
+
var inv = (match[1] === "P" || match[2]) ? "^" : "",
|
136
|
+
item = slug(match[3]);
|
137
|
+
// The double negative \P{^..} is invalid
|
138
|
+
if (match[1] === "P" && match[2]) {
|
139
|
+
throw new SyntaxError("invalid double negation \\P{^");
|
140
|
+
}
|
141
|
+
if (!unicode.hasOwnProperty(item)) {
|
142
|
+
throw new SyntaxError("invalid or unknown Unicode property " + match[0]);
|
143
|
+
}
|
144
|
+
return scope === "class" ?
|
145
|
+
(inv ? cacheInversion(item) : unicode[item]) :
|
146
|
+
"[" + inv + unicode[item] + "]";
|
147
|
+
},
|
148
|
+
{scope: "all"}
|
149
|
+
);
|
150
|
+
|
151
|
+
}(XRegExp));
|
152
|
+
|