xregexp-rails 1.5.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/javascripts/xregexp.js +1125 -524
- data/app/assets/javascripts/xregexp/build.js +146 -0
- data/app/assets/javascripts/xregexp/matchrecursive.js +181 -0
- data/app/assets/javascripts/xregexp/prototypes.js +115 -0
- data/app/assets/javascripts/xregexp/unicode-base.js +152 -0
- data/app/assets/javascripts/xregexp/unicode-blocks.js +183 -0
- data/app/assets/javascripts/xregexp/unicode-categories.js +102 -0
- data/app/assets/javascripts/xregexp/unicode-properties.js +39 -0
- data/app/assets/javascripts/xregexp/unicode-scripts.js +98 -0
- data/lib/xregexp-rails/version.rb +1 -1
- metadata +9 -6
- data/app/assets/javascripts/xregexp-matchrecursive.js +0 -151
- data/app/assets/javascripts/xregexp-unicode-base.js +0 -73
- data/app/assets/javascripts/xregexp-unicode-blocks.js +0 -187
- data/app/assets/javascripts/xregexp-unicode-categories.js +0 -102
- data/app/assets/javascripts/xregexp-unicode-scripts.js +0 -102
@@ -0,0 +1,146 @@
|
|
1
|
+
/*!
|
2
|
+
* XRegExp.build v0.1.0
|
3
|
+
* (c) 2012 Steven Levithan <http://xregexp.com/>
|
4
|
+
* MIT License
|
5
|
+
* Inspired by RegExp.create by Lea Verou <http://lea.verou.me/>
|
6
|
+
*/
|
7
|
+
|
8
|
+
(function (XRegExp) {
|
9
|
+
"use strict";
|
10
|
+
|
11
|
+
var subparts = /(\()(?!\?)|\\([1-9]\d*)|\\[\s\S]|\[(?:[^\\\]]|\\[\s\S])*]/g,
|
12
|
+
parts = XRegExp.union([/\({{([\w$]+)}}\)|{{([\w$]+)}}/, subparts], "g");
|
13
|
+
|
14
|
+
/**
|
15
|
+
* Strips a leading `^` and trailing unescaped `$`, if both are present.
|
16
|
+
* @private
|
17
|
+
* @param {String} pattern Pattern to process.
|
18
|
+
* @returns {String} Pattern with edge anchors removed.
|
19
|
+
*/
|
20
|
+
function deanchor(pattern) {
|
21
|
+
var startAnchor = /^(?:\(\?:\))?\^/, // Leading `^` or `(?:)^` (handles /x cruft)
|
22
|
+
endAnchor = /\$(?:\(\?:\))?$/; // Trailing `$` or `$(?:)` (handles /x cruft)
|
23
|
+
if (endAnchor.test(pattern.replace(/\\[\s\S]/g, ""))) { // Ensure trailing `$` isn't escaped
|
24
|
+
return pattern.replace(startAnchor, "").replace(endAnchor, "");
|
25
|
+
}
|
26
|
+
return pattern;
|
27
|
+
}
|
28
|
+
|
29
|
+
/**
|
30
|
+
* Converts the provided value to an XRegExp.
|
31
|
+
* @private
|
32
|
+
* @param {String|RegExp} value Value to convert.
|
33
|
+
* @returns {RegExp} XRegExp object with XRegExp syntax applied.
|
34
|
+
*/
|
35
|
+
function asXRegExp(value) {
|
36
|
+
return XRegExp.isRegExp(value) ?
|
37
|
+
(value.xregexp && !value.xregexp.isNative ? value : XRegExp(value.source)) :
|
38
|
+
XRegExp(value);
|
39
|
+
}
|
40
|
+
|
41
|
+
/**
|
42
|
+
* Builds regexes using named subpatterns, for readability and pattern reuse. Backreferences in the
|
43
|
+
* outer pattern and provided subpatterns are automatically renumbered to work correctly. Native
|
44
|
+
* flags used by provided subpatterns are ignored in favor of the `flags` argument.
|
45
|
+
* @memberOf XRegExp
|
46
|
+
* @param {String} pattern XRegExp pattern using `{{name}}` for embedded subpatterns. Allows
|
47
|
+
* `({{name}})` as shorthand for `(?<name>{{name}})`. Patterns cannot be embedded within
|
48
|
+
* character classes.
|
49
|
+
* @param {Object} subs Lookup object for named subpatterns. Values can be strings or regexes. A
|
50
|
+
* leading `^` and trailing unescaped `$` are stripped from subpatterns, if both are present.
|
51
|
+
* @param {String} [flags] Any combination of XRegExp flags.
|
52
|
+
* @returns {RegExp} Regex with interpolated subpatterns.
|
53
|
+
* @example
|
54
|
+
*
|
55
|
+
* var time = XRegExp.build('(?x)^ {{hours}} ({{minutes}}) $', {
|
56
|
+
* hours: XRegExp.build('{{h12}} : | {{h24}}', {
|
57
|
+
* h12: /1[0-2]|0?[1-9]/,
|
58
|
+
* h24: /2[0-3]|[01][0-9]/
|
59
|
+
* }, 'x'),
|
60
|
+
* minutes: /^[0-5][0-9]$/
|
61
|
+
* });
|
62
|
+
* time.test('10:59'); // -> true
|
63
|
+
* XRegExp.exec('10:59', time).minutes; // -> '59'
|
64
|
+
*/
|
65
|
+
XRegExp.build = function (pattern, subs, flags) {
|
66
|
+
var inlineFlags = /^\(\?([\w$]+)\)/.exec(pattern),
|
67
|
+
data = {},
|
68
|
+
numCaps = 0, // Caps is short for captures
|
69
|
+
numPriorCaps,
|
70
|
+
numOuterCaps = 0,
|
71
|
+
outerCapsMap = [0],
|
72
|
+
outerCapNames,
|
73
|
+
sub,
|
74
|
+
p;
|
75
|
+
|
76
|
+
// Add flags within a leading mode modifier to the overall pattern's flags
|
77
|
+
if (inlineFlags) {
|
78
|
+
flags = flags || "";
|
79
|
+
inlineFlags[1].replace(/./g, function (flag) {
|
80
|
+
flags += (flags.indexOf(flag) > -1 ? "" : flag); // Don't add duplicates
|
81
|
+
});
|
82
|
+
}
|
83
|
+
|
84
|
+
for (p in subs) {
|
85
|
+
if (subs.hasOwnProperty(p)) {
|
86
|
+
// Passing to XRegExp enables entended syntax for subpatterns provided as strings
|
87
|
+
// and ensures independent validity, lest an unescaped `(`, `)`, `[`, or trailing
|
88
|
+
// `\` breaks the `(?:)` wrapper. For subpatterns provided as regexes, it dies on
|
89
|
+
// octals and adds the `xregexp` property, for simplicity
|
90
|
+
sub = asXRegExp(subs[p]);
|
91
|
+
// Deanchoring allows embedding independently useful anchored regexes. If you
|
92
|
+
// really need to keep your anchors, double them (i.e., `^^...$$`)
|
93
|
+
data[p] = {pattern: deanchor(sub.source), names: sub.xregexp.captureNames || []};
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
// Passing to XRegExp dies on octals and ensures the outer pattern is independently valid;
|
98
|
+
// helps keep this simple. Named captures will be put back
|
99
|
+
pattern = asXRegExp(pattern);
|
100
|
+
outerCapNames = pattern.xregexp.captureNames || [];
|
101
|
+
pattern = pattern.source.replace(parts, function ($0, $1, $2, $3, $4) {
|
102
|
+
var subName = $1 || $2, capName, intro;
|
103
|
+
if (subName) { // Named subpattern
|
104
|
+
if (!data.hasOwnProperty(subName)) {
|
105
|
+
throw new ReferenceError("undefined property " + $0);
|
106
|
+
}
|
107
|
+
if ($1) { // Named subpattern was wrapped in a capturing group
|
108
|
+
capName = outerCapNames[numOuterCaps];
|
109
|
+
outerCapsMap[++numOuterCaps] = ++numCaps;
|
110
|
+
// If it's a named group, preserve the name. Otherwise, use the subpattern name
|
111
|
+
// as the capture name
|
112
|
+
intro = "(?<" + (capName || subName) + ">";
|
113
|
+
} else {
|
114
|
+
intro = "(?:";
|
115
|
+
}
|
116
|
+
numPriorCaps = numCaps;
|
117
|
+
return intro + data[subName].pattern.replace(subparts, function (match, paren, backref) {
|
118
|
+
if (paren) { // Capturing group
|
119
|
+
capName = data[subName].names[numCaps - numPriorCaps];
|
120
|
+
++numCaps;
|
121
|
+
if (capName) { // If the current capture has a name, preserve the name
|
122
|
+
return "(?<" + capName + ">";
|
123
|
+
}
|
124
|
+
} else if (backref) { // Backreference
|
125
|
+
return "\\" + (+backref + numPriorCaps); // Rewrite the backreference
|
126
|
+
}
|
127
|
+
return match;
|
128
|
+
}) + ")";
|
129
|
+
}
|
130
|
+
if ($3) { // Capturing group
|
131
|
+
capName = outerCapNames[numOuterCaps];
|
132
|
+
outerCapsMap[++numOuterCaps] = ++numCaps;
|
133
|
+
if (capName) { // If the current capture has a name, preserve the name
|
134
|
+
return "(?<" + capName + ">";
|
135
|
+
}
|
136
|
+
} else if ($4) { // Backreference
|
137
|
+
return "\\" + outerCapsMap[+$4]; // Rewrite the backreference
|
138
|
+
}
|
139
|
+
return $0;
|
140
|
+
});
|
141
|
+
|
142
|
+
return XRegExp(pattern, flags);
|
143
|
+
};
|
144
|
+
|
145
|
+
}(XRegExp));
|
146
|
+
|
@@ -0,0 +1,181 @@
|
|
1
|
+
/*!
|
2
|
+
* XRegExp.matchRecursive v0.2.0
|
3
|
+
* (c) 2009-2012 Steven Levithan <http://xregexp.com/>
|
4
|
+
* MIT License
|
5
|
+
*/
|
6
|
+
|
7
|
+
(function (XRegExp) {
|
8
|
+
"use strict";
|
9
|
+
|
10
|
+
/**
|
11
|
+
* Returns a match detail object composed of the provided values.
|
12
|
+
* @private
|
13
|
+
*/
|
14
|
+
function row(value, name, start, end) {
|
15
|
+
return {value:value, name:name, start:start, end:end};
|
16
|
+
}
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Returns an array of match strings between outermost left and right delimiters, or an array of
|
20
|
+
* objects with detailed match parts and position data. An error is thrown if delimiters are
|
21
|
+
* unbalanced within the data.
|
22
|
+
* @memberOf XRegExp
|
23
|
+
* @param {String} str String to search.
|
24
|
+
* @param {String} left Left delimiter as an XRegExp pattern.
|
25
|
+
* @param {String} right Right delimiter as an XRegExp pattern.
|
26
|
+
* @param {String} [flags] Flags for the left and right delimiters. Use any of: `gimnsxy`.
|
27
|
+
* @param {Object} [options] Lets you specify `valueNames` and `escapeChar` options.
|
28
|
+
* @returns {Array} Array of matches, or an empty array.
|
29
|
+
* @example
|
30
|
+
*
|
31
|
+
* // Basic usage
|
32
|
+
* var str = '(t((e))s)t()(ing)';
|
33
|
+
* XRegExp.matchRecursive(str, '\\(', '\\)', 'g');
|
34
|
+
* // -> ['t((e))s', '', 'ing']
|
35
|
+
*
|
36
|
+
* // Extended information mode with valueNames
|
37
|
+
* str = 'Here is <div> <div>an</div></div> example';
|
38
|
+
* XRegExp.matchRecursive(str, '<div\\s*>', '</div>', 'gi', {
|
39
|
+
* valueNames: ['between', 'left', 'match', 'right']
|
40
|
+
* });
|
41
|
+
* // -> [
|
42
|
+
* // {name: 'between', value: 'Here is ', start: 0, end: 8},
|
43
|
+
* // {name: 'left', value: '<div>', start: 8, end: 13},
|
44
|
+
* // {name: 'match', value: ' <div>an</div>', start: 13, end: 27},
|
45
|
+
* // {name: 'right', value: '</div>', start: 27, end: 33},
|
46
|
+
* // {name: 'between', value: ' example', start: 33, end: 41}
|
47
|
+
* // ]
|
48
|
+
*
|
49
|
+
* // Omitting unneeded parts with null valueNames, and using escapeChar
|
50
|
+
* str = '...{1}\\{{function(x,y){return y+x;}}';
|
51
|
+
* XRegExp.matchRecursive(str, '{', '}', 'g', {
|
52
|
+
* valueNames: ['literal', null, 'value', null],
|
53
|
+
* escapeChar: '\\'
|
54
|
+
* });
|
55
|
+
* // -> [
|
56
|
+
* // {name: 'literal', value: '...', start: 0, end: 3},
|
57
|
+
* // {name: 'value', value: '1', start: 4, end: 5},
|
58
|
+
* // {name: 'literal', value: '\\{', start: 6, end: 8},
|
59
|
+
* // {name: 'value', value: 'function(x,y){return y+x;}', start: 9, end: 35}
|
60
|
+
* // ]
|
61
|
+
*
|
62
|
+
* // Sticky mode via flag y
|
63
|
+
* str = '<1><<<2>>><3>4<5>';
|
64
|
+
* XRegExp.matchRecursive(str, '<', '>', 'gy');
|
65
|
+
* // -> ['1', '<<2>>', '3']
|
66
|
+
*/
|
67
|
+
XRegExp.matchRecursive = function (str, left, right, flags, options) {
|
68
|
+
flags = flags || "";
|
69
|
+
options = options || {};
|
70
|
+
var global = flags.indexOf("g") > -1,
|
71
|
+
sticky = flags.indexOf("y") > -1,
|
72
|
+
basicFlags = flags.replace(/y/g, ""), // Flag y controlled internally
|
73
|
+
escapeChar = options.escapeChar,
|
74
|
+
vN = options.valueNames,
|
75
|
+
output = [],
|
76
|
+
openTokens = 0,
|
77
|
+
delimStart = 0,
|
78
|
+
delimEnd = 0,
|
79
|
+
lastOuterEnd = 0,
|
80
|
+
outerStart,
|
81
|
+
innerStart,
|
82
|
+
leftMatch,
|
83
|
+
rightMatch,
|
84
|
+
esc;
|
85
|
+
left = XRegExp(left, basicFlags);
|
86
|
+
right = XRegExp(right, basicFlags);
|
87
|
+
|
88
|
+
if (escapeChar) {
|
89
|
+
if (escapeChar.length > 1) {
|
90
|
+
throw new SyntaxError("can't use more than one escape character");
|
91
|
+
}
|
92
|
+
escapeChar = XRegExp.escape(escapeChar);
|
93
|
+
// Using XRegExp.union safely rewrites backreferences in `left` and `right`
|
94
|
+
esc = new RegExp(
|
95
|
+
"(?:" + escapeChar + "[\\S\\s]|(?:(?!" + XRegExp.union([left, right]).source + ")[^" + escapeChar + "])+)+",
|
96
|
+
flags.replace(/[^im]+/g, "") // Flags gy not needed here; flags nsx handled by XRegExp
|
97
|
+
);
|
98
|
+
}
|
99
|
+
|
100
|
+
while (true) {
|
101
|
+
// If using an escape character, advance to the delimiter's next starting position,
|
102
|
+
// skipping any escaped characters in between
|
103
|
+
if (escapeChar) {
|
104
|
+
delimEnd += (XRegExp.exec(str, esc, delimEnd, "sticky") || [""])[0].length;
|
105
|
+
}
|
106
|
+
leftMatch = XRegExp.exec(str, left, delimEnd);
|
107
|
+
rightMatch = XRegExp.exec(str, right, delimEnd);
|
108
|
+
// Keep the leftmost match only
|
109
|
+
if (leftMatch && rightMatch) {
|
110
|
+
if (leftMatch.index <= rightMatch.index) {
|
111
|
+
rightMatch = null;
|
112
|
+
} else {
|
113
|
+
leftMatch = null;
|
114
|
+
}
|
115
|
+
}
|
116
|
+
/* Paths (LM:leftMatch, RM:rightMatch, OT:openTokens):
|
117
|
+
LM | RM | OT | Result
|
118
|
+
1 | 0 | 1 | loop
|
119
|
+
1 | 0 | 0 | loop
|
120
|
+
0 | 1 | 1 | loop
|
121
|
+
0 | 1 | 0 | throw
|
122
|
+
0 | 0 | 1 | throw
|
123
|
+
0 | 0 | 0 | break
|
124
|
+
* Doesn't include the sticky mode special case
|
125
|
+
* Loop ends after the first completed match if `!global` */
|
126
|
+
if (leftMatch || rightMatch) {
|
127
|
+
delimStart = (leftMatch || rightMatch).index;
|
128
|
+
delimEnd = delimStart + (leftMatch || rightMatch)[0].length;
|
129
|
+
} else if (!openTokens) {
|
130
|
+
break;
|
131
|
+
}
|
132
|
+
if (sticky && !openTokens && delimStart > lastOuterEnd) {
|
133
|
+
break;
|
134
|
+
}
|
135
|
+
if (leftMatch) {
|
136
|
+
if (!openTokens) {
|
137
|
+
outerStart = delimStart;
|
138
|
+
innerStart = delimEnd;
|
139
|
+
}
|
140
|
+
++openTokens;
|
141
|
+
} else if (rightMatch && openTokens) {
|
142
|
+
if (!--openTokens) {
|
143
|
+
if (vN) {
|
144
|
+
if (vN[0] && outerStart > lastOuterEnd) {
|
145
|
+
output.push(row(vN[0], str.slice(lastOuterEnd, outerStart), lastOuterEnd, outerStart));
|
146
|
+
}
|
147
|
+
if (vN[1]) {
|
148
|
+
output.push(row(vN[1], str.slice(outerStart, innerStart), outerStart, innerStart));
|
149
|
+
}
|
150
|
+
if (vN[2]) {
|
151
|
+
output.push(row(vN[2], str.slice(innerStart, delimStart), innerStart, delimStart));
|
152
|
+
}
|
153
|
+
if (vN[3]) {
|
154
|
+
output.push(row(vN[3], str.slice(delimStart, delimEnd), delimStart, delimEnd));
|
155
|
+
}
|
156
|
+
} else {
|
157
|
+
output.push(str.slice(innerStart, delimStart));
|
158
|
+
}
|
159
|
+
lastOuterEnd = delimEnd;
|
160
|
+
if (!global) {
|
161
|
+
break;
|
162
|
+
}
|
163
|
+
}
|
164
|
+
} else {
|
165
|
+
throw new Error("string contains unbalanced delimiters");
|
166
|
+
}
|
167
|
+
// If the delimiter matched an empty string, avoid an infinite loop
|
168
|
+
if (delimStart === delimEnd) {
|
169
|
+
++delimEnd;
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
if (global && !sticky && vN && vN[0] && str.length > lastOuterEnd) {
|
174
|
+
output.push(row(vN[0], str.slice(lastOuterEnd), lastOuterEnd, str.length));
|
175
|
+
}
|
176
|
+
|
177
|
+
return output;
|
178
|
+
};
|
179
|
+
|
180
|
+
}(XRegExp));
|
181
|
+
|
@@ -0,0 +1,115 @@
|
|
1
|
+
/*!
|
2
|
+
* XRegExp Prototype Methods v1.0.0
|
3
|
+
* (c) 2012 Steven Levithan <http://xregexp.com/>
|
4
|
+
* MIT License
|
5
|
+
*/
|
6
|
+
|
7
|
+
/**
|
8
|
+
* Adds a collection of methods to `XRegExp.prototype`. RegExp objects copied by XRegExp are also
|
9
|
+
* augmented with any `XRegExp.prototype` methods. Hence, the following work equivalently:
|
10
|
+
*
|
11
|
+
* XRegExp('[a-z]', 'ig').xexec('abc');
|
12
|
+
* XRegExp(/[a-z]/ig).xexec('abc');
|
13
|
+
* XRegExp.globalize(/[a-z]/i).xexec('abc');
|
14
|
+
*/
|
15
|
+
(function (XRegExp) {
|
16
|
+
"use strict";
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Copy properties of `b` to `a`.
|
20
|
+
* @private
|
21
|
+
* @param {Object} a Object that will receive new properties.
|
22
|
+
* @param {Object} b Object whose properties will be copied.
|
23
|
+
*/
|
24
|
+
function extend(a, b) {
|
25
|
+
for (var p in b) {
|
26
|
+
if (b.hasOwnProperty(p)) {
|
27
|
+
a[p] = b[p];
|
28
|
+
}
|
29
|
+
}
|
30
|
+
//return a;
|
31
|
+
}
|
32
|
+
|
33
|
+
extend(XRegExp.prototype, {
|
34
|
+
|
35
|
+
/**
|
36
|
+
* Implicitly calls the regex's `test` method with the first value in the provided arguments array.
|
37
|
+
* @memberOf XRegExp.prototype
|
38
|
+
* @param {*} context Ignored. Accepted only for congruity with `Function.prototype.apply`.
|
39
|
+
* @param {Array} args Array with the string to search as its first value.
|
40
|
+
* @returns {Boolean} Whether the regex matched the provided value.
|
41
|
+
* @example
|
42
|
+
*
|
43
|
+
* XRegExp('[a-z]').apply(null, ['abc']); // -> true
|
44
|
+
*/
|
45
|
+
apply: function (context, args) {
|
46
|
+
return this.test(args[0]);
|
47
|
+
},
|
48
|
+
|
49
|
+
/**
|
50
|
+
* Implicitly calls the regex's `test` method with the provided string.
|
51
|
+
* @memberOf XRegExp.prototype
|
52
|
+
* @param {*} context Ignored. Accepted only for congruity with `Function.prototype.call`.
|
53
|
+
* @param {String} str String to search.
|
54
|
+
* @returns {Boolean} Whether the regex matched the provided value.
|
55
|
+
* @example
|
56
|
+
*
|
57
|
+
* XRegExp('[a-z]').call(null, 'abc'); // -> true
|
58
|
+
*/
|
59
|
+
call: function (context, str) {
|
60
|
+
return this.test(str);
|
61
|
+
},
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Implicitly calls {@link #XRegExp.forEach}.
|
65
|
+
* @memberOf XRegExp.prototype
|
66
|
+
* @example
|
67
|
+
*
|
68
|
+
* XRegExp('\\d').forEach('1a2345', function (match, i) {
|
69
|
+
* if (i % 2) this.push(+match[0]);
|
70
|
+
* }, []);
|
71
|
+
* // -> [2, 4]
|
72
|
+
*/
|
73
|
+
forEach: function (str, callback, context) {
|
74
|
+
return XRegExp.forEach(str, this, callback, context);
|
75
|
+
},
|
76
|
+
|
77
|
+
/**
|
78
|
+
* Implicitly calls {@link #XRegExp.globalize}.
|
79
|
+
* @memberOf XRegExp.prototype
|
80
|
+
* @example
|
81
|
+
*
|
82
|
+
* var globalCopy = XRegExp('regex').globalize();
|
83
|
+
* globalCopy.global; // -> true
|
84
|
+
*/
|
85
|
+
globalize: function () {
|
86
|
+
return XRegExp.globalize(this);
|
87
|
+
},
|
88
|
+
|
89
|
+
/**
|
90
|
+
* Implicitly calls {@link #XRegExp.exec}.
|
91
|
+
* @memberOf XRegExp.prototype
|
92
|
+
* @example
|
93
|
+
*
|
94
|
+
* var match = XRegExp('U\\+(?<hex>[0-9A-F]{4})').xexec('U+2620');
|
95
|
+
* match.hex; // -> '2620'
|
96
|
+
*/
|
97
|
+
xexec: function (str, pos, sticky) {
|
98
|
+
return XRegExp.exec(str, this, pos, sticky);
|
99
|
+
},
|
100
|
+
|
101
|
+
/**
|
102
|
+
* Implicitly calls {@link #XRegExp.test}.
|
103
|
+
* @memberOf XRegExp.prototype
|
104
|
+
* @example
|
105
|
+
*
|
106
|
+
* XRegExp('c').xtest('abc'); // -> true
|
107
|
+
*/
|
108
|
+
xtest: function (str, pos, sticky) {
|
109
|
+
return XRegExp.test(str, this, pos, sticky);
|
110
|
+
}
|
111
|
+
|
112
|
+
});
|
113
|
+
|
114
|
+
}(XRegExp));
|
115
|
+
|
@@ -0,0 +1,152 @@
|
|
1
|
+
/*!
|
2
|
+
* XRegExp Unicode Base v1.0.0
|
3
|
+
* (c) 2008-2012 Steven Levithan <http://xregexp.com/>
|
4
|
+
* MIT License
|
5
|
+
* Uses Unicode 6.1 <http://unicode.org/>
|
6
|
+
*/
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Adds support for the `\p{L}` or `\p{Letter}` Unicode category. Addon packages for other Unicode
|
10
|
+
* categories, scripts, blocks, and properties are available separately. All Unicode tokens can be
|
11
|
+
* inverted using `\P{..}` or `\p{^..}`. Token names are case insensitive, and any spaces, hyphens,
|
12
|
+
* and underscores are ignored.
|
13
|
+
* @requires XRegExp
|
14
|
+
*/
|
15
|
+
(function (XRegExp) {
|
16
|
+
"use strict";
|
17
|
+
|
18
|
+
var unicode = {};
|
19
|
+
|
20
|
+
/*--------------------------------------
|
21
|
+
* Private helper functions
|
22
|
+
*------------------------------------*/
|
23
|
+
|
24
|
+
// Generates a standardized token name (lowercase, with hyphens, spaces, and underscores removed)
|
25
|
+
function slug(name) {
|
26
|
+
return name.replace(/[- _]+/g, "").toLowerCase();
|
27
|
+
}
|
28
|
+
|
29
|
+
// Expands a list of Unicode code points and ranges to be usable in a regex character class
|
30
|
+
function expand(str) {
|
31
|
+
return str.replace(/\w{4}/g, "\\u$&");
|
32
|
+
}
|
33
|
+
|
34
|
+
// Adds leading zeros if shorter than four characters
|
35
|
+
function pad4(str) {
|
36
|
+
while (str.length < 4) {
|
37
|
+
str = "0" + str;
|
38
|
+
}
|
39
|
+
return str;
|
40
|
+
}
|
41
|
+
|
42
|
+
// Converts a hexadecimal number to decimal
|
43
|
+
function dec(hex) {
|
44
|
+
return parseInt(hex, 16);
|
45
|
+
}
|
46
|
+
|
47
|
+
// Converts a decimal number to hexadecimal
|
48
|
+
function hex(dec) {
|
49
|
+
return parseInt(dec, 10).toString(16);
|
50
|
+
}
|
51
|
+
|
52
|
+
// Inverts a list of Unicode code points and ranges
|
53
|
+
function invert(range) {
|
54
|
+
var output = [],
|
55
|
+
lastEnd = -1,
|
56
|
+
start;
|
57
|
+
XRegExp.forEach(range, /\\u(\w{4})(?:-\\u(\w{4}))?/, function (m) {
|
58
|
+
start = dec(m[1]);
|
59
|
+
if (start > (lastEnd + 1)) {
|
60
|
+
output.push("\\u" + pad4(hex(lastEnd + 1)));
|
61
|
+
if (start > (lastEnd + 2)) {
|
62
|
+
output.push("-\\u" + pad4(hex(start - 1)));
|
63
|
+
}
|
64
|
+
}
|
65
|
+
lastEnd = dec(m[2] || m[1]);
|
66
|
+
});
|
67
|
+
if (lastEnd < 0xFFFF) {
|
68
|
+
output.push("\\u" + pad4(hex(lastEnd + 1)));
|
69
|
+
if (lastEnd < 0xFFFE) {
|
70
|
+
output.push("-\\uFFFF");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
return output.join("");
|
74
|
+
}
|
75
|
+
|
76
|
+
// Generates an inverted token on first use
|
77
|
+
function cacheInversion(item) {
|
78
|
+
return unicode["^" + item] || (unicode["^" + item] = invert(unicode[item]));
|
79
|
+
}
|
80
|
+
|
81
|
+
/*--------------------------------------
|
82
|
+
* Core functionality
|
83
|
+
*------------------------------------*/
|
84
|
+
|
85
|
+
XRegExp.install("extensibility");
|
86
|
+
|
87
|
+
/**
|
88
|
+
* Adds to the list of Unicode properties that XRegExp regexes can match via \p{..} or \P{..}.
|
89
|
+
* @memberOf XRegExp
|
90
|
+
* @param {Object} pack Named sets of Unicode code points and ranges.
|
91
|
+
* @param {Object} [aliases] Aliases for the primary token names.
|
92
|
+
* @example
|
93
|
+
*
|
94
|
+
* XRegExp.addUnicodePackage({
|
95
|
+
* XDigit: '0030-00390041-00460061-0066' // 0-9A-Fa-f
|
96
|
+
* }, {
|
97
|
+
* XDigit: 'Hexadecimal'
|
98
|
+
* });
|
99
|
+
*/
|
100
|
+
XRegExp.addUnicodePackage = function (pack, aliases) {
|
101
|
+
var p;
|
102
|
+
if (!XRegExp.isInstalled("extensibility")) {
|
103
|
+
throw new Error("extensibility must be installed before adding Unicode packages");
|
104
|
+
}
|
105
|
+
if (pack) {
|
106
|
+
for (p in pack) {
|
107
|
+
if (pack.hasOwnProperty(p)) {
|
108
|
+
unicode[slug(p)] = expand(pack[p]);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
if (aliases) {
|
113
|
+
for (p in aliases) {
|
114
|
+
if (aliases.hasOwnProperty(p)) {
|
115
|
+
unicode[slug(aliases[p])] = unicode[slug(p)];
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}
|
119
|
+
};
|
120
|
+
|
121
|
+
/* Adds data for the Unicode `Letter` category. Addon packages include other categories, scripts,
|
122
|
+
* blocks, and properties.
|
123
|
+
*/
|
124
|
+
XRegExp.addUnicodePackage({
|
125
|
+
L: "0041-005A0061-007A00AA00B500BA00C0-00D600D8-00F600F8-02C102C6-02D102E0-02E402EC02EE0370-037403760377037A-037D03860388-038A038C038E-03A103A3-03F503F7-0481048A-05270531-055605590561-058705D0-05EA05F0-05F20620-064A066E066F0671-06D306D506E506E606EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA07F407F507FA0800-0815081A082408280840-085808A008A2-08AC0904-0939093D09500958-09610971-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E460E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EC60EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10A0-10C510C710CD10D0-10FA10FC-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317D717DC1820-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541AA71B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C7D1CE9-1CEC1CEE-1CF11CF51CF61D00-1DBF1E00-1F151F18-1F1D1F20-1F451F48-1F4D1F50-1F571F591F5B1F5D1F5F-1F7D1F80-1FB41FB6-1FBC1FBE1FC2-1FC41FC6-1FCC1FD0-1FD31FD6-1FDB1FE0-1FEC1FF2-1FF41FF6-1FFC2071207F2090-209C21022107210A-211321152119-211D212421262128212A-212D212F-2139213C-213F2145-2149214E218321842C00-2C2E2C30-2C5E2C60-2CE42CEB-2CEE2CF22CF32D00-2D252D272D2D2D30-2D672D6F2D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE2E2F300530063031-3035303B303C3041-3096309D-309F30A1-30FA30FC-30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A48CA4D0-A4FDA500-A60CA610-A61FA62AA62BA640-A66EA67F-A697A6A0-A6E5A717-A71FA722-A788A78B-A78EA790-A793A7A0-A7AAA7F8-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2A9CFAA00-AA28AA40-AA42AA44-AA4BAA60-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADB-AADDAAE0-AAEAAAF2-AAF4AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB00-FB06FB13-FB17FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF21-FF3AFF41-FF5AFF66-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC"
|
126
|
+
}, {
|
127
|
+
L: "Letter"
|
128
|
+
});
|
129
|
+
|
130
|
+
/* Adds Unicode property syntax to XRegExp: \p{..}, \P{..}, \p{^..}
|
131
|
+
*/
|
132
|
+
XRegExp.addToken(
|
133
|
+
/\\([pP]){(\^?)([^}]*)}/,
|
134
|
+
function (match, scope) {
|
135
|
+
var inv = (match[1] === "P" || match[2]) ? "^" : "",
|
136
|
+
item = slug(match[3]);
|
137
|
+
// The double negative \P{^..} is invalid
|
138
|
+
if (match[1] === "P" && match[2]) {
|
139
|
+
throw new SyntaxError("invalid double negation \\P{^");
|
140
|
+
}
|
141
|
+
if (!unicode.hasOwnProperty(item)) {
|
142
|
+
throw new SyntaxError("invalid or unknown Unicode property " + match[0]);
|
143
|
+
}
|
144
|
+
return scope === "class" ?
|
145
|
+
(inv ? cacheInversion(item) : unicode[item]) :
|
146
|
+
"[" + inv + unicode[item] + "]";
|
147
|
+
},
|
148
|
+
{scope: "all"}
|
149
|
+
);
|
150
|
+
|
151
|
+
}(XRegExp));
|
152
|
+
|