rapydscript-ns 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/.agignore +1 -1
  2. package/.github/workflows/ci.yml +38 -38
  3. package/=template.pyj +5 -5
  4. package/CHANGELOG.md +18 -0
  5. package/HACKING.md +103 -103
  6. package/LICENSE +24 -24
  7. package/README.md +715 -169
  8. package/TODO.md +9 -2
  9. package/add-toc-to-readme +2 -2
  10. package/bin/export +75 -75
  11. package/bin/rapydscript +70 -70
  12. package/bin/web-repl-export +102 -102
  13. package/build +2 -2
  14. package/language-service/index.js +36 -27
  15. package/package.json +1 -1
  16. package/publish.py +37 -37
  17. package/release/baselib-plain-pretty.js +2358 -168
  18. package/release/baselib-plain-ugly.js +73 -3
  19. package/release/compiler.js +6282 -3092
  20. package/release/signatures.json +31 -30
  21. package/session.vim +4 -4
  22. package/setup.cfg +2 -2
  23. package/src/ast.pyj +1 -0
  24. package/src/baselib-builtins.pyj +340 -2
  25. package/src/baselib-bytes.pyj +664 -0
  26. package/src/baselib-errors.pyj +1 -1
  27. package/src/baselib-internal.pyj +267 -60
  28. package/src/baselib-itertools.pyj +110 -97
  29. package/src/baselib-str.pyj +22 -4
  30. package/src/compiler.pyj +36 -36
  31. package/src/errors.pyj +30 -30
  32. package/src/lib/abc.pyj +317 -0
  33. package/src/lib/aes.pyj +646 -646
  34. package/src/lib/copy.pyj +120 -120
  35. package/src/lib/dataclasses.pyj +532 -0
  36. package/src/lib/elementmaker.pyj +83 -83
  37. package/src/lib/encodings.pyj +126 -126
  38. package/src/lib/enum.pyj +125 -0
  39. package/src/lib/gettext.pyj +569 -569
  40. package/src/lib/itertools.pyj +580 -580
  41. package/src/lib/math.pyj +193 -193
  42. package/src/lib/operator.pyj +11 -11
  43. package/src/lib/pythonize.pyj +20 -20
  44. package/src/lib/random.pyj +118 -118
  45. package/src/lib/re.pyj +504 -470
  46. package/src/lib/react.pyj +74 -74
  47. package/src/lib/traceback.pyj +63 -63
  48. package/src/lib/typing.pyj +577 -0
  49. package/src/lib/uuid.pyj +77 -77
  50. package/src/monaco-language-service/builtins.js +14 -4
  51. package/src/monaco-language-service/diagnostics.js +19 -20
  52. package/src/monaco-language-service/dts.js +550 -550
  53. package/src/output/classes.pyj +62 -26
  54. package/src/output/comments.pyj +45 -45
  55. package/src/output/exceptions.pyj +201 -201
  56. package/src/output/functions.pyj +78 -5
  57. package/src/output/jsx.pyj +164 -164
  58. package/src/output/loops.pyj +5 -2
  59. package/src/output/operators.pyj +100 -34
  60. package/src/output/treeshake.pyj +182 -182
  61. package/src/output/utils.pyj +72 -72
  62. package/src/parse.pyj +80 -16
  63. package/src/string_interpolation.pyj +72 -72
  64. package/src/tokenizer.pyj +9 -4
  65. package/src/unicode_aliases.pyj +576 -576
  66. package/src/utils.pyj +192 -192
  67. package/test/_import_one.pyj +37 -37
  68. package/test/_import_two/__init__.pyj +11 -11
  69. package/test/_import_two/level2/deep.pyj +4 -4
  70. package/test/_import_two/other.pyj +6 -6
  71. package/test/_import_two/sub.pyj +13 -13
  72. package/test/abc.pyj +291 -0
  73. package/test/aes_vectors.pyj +421 -421
  74. package/test/annotations.pyj +80 -80
  75. package/test/arithmetic_nostrict.pyj +88 -0
  76. package/test/arithmetic_types.pyj +169 -0
  77. package/test/baselib.pyj +91 -0
  78. package/test/bytes.pyj +467 -0
  79. package/test/classes.pyj +1 -0
  80. package/test/comparison_ops.pyj +173 -0
  81. package/test/dataclasses.pyj +253 -0
  82. package/test/decorators.pyj +77 -77
  83. package/test/docstrings.pyj +39 -39
  84. package/test/elementmaker_test.pyj +45 -45
  85. package/test/enum.pyj +134 -0
  86. package/test/eval_exec.pyj +56 -0
  87. package/test/format.pyj +148 -0
  88. package/test/functions.pyj +151 -151
  89. package/test/generators.pyj +41 -41
  90. package/test/generic.pyj +370 -370
  91. package/test/imports.pyj +72 -72
  92. package/test/internationalization.pyj +73 -73
  93. package/test/lint.pyj +164 -164
  94. package/test/loops.pyj +85 -85
  95. package/test/numpy.pyj +734 -734
  96. package/test/object.pyj +64 -0
  97. package/test/omit_function_metadata.pyj +20 -20
  98. package/test/python_compat.pyj +17 -15
  99. package/test/python_features.pyj +70 -15
  100. package/test/regexp.pyj +83 -55
  101. package/test/repl.pyj +121 -121
  102. package/test/scoped_flags.pyj +76 -76
  103. package/test/tuples.pyj +96 -0
  104. package/test/typing.pyj +469 -0
  105. package/test/unit/index.js +116 -7
  106. package/test/unit/language-service-dts.js +543 -543
  107. package/test/unit/language-service-hover.js +455 -455
  108. package/test/unit/language-service.js +84 -0
  109. package/test/unit/web-repl.js +804 -1
  110. package/test/vars_locals_globals.pyj +94 -0
  111. package/tools/cli.js +558 -547
  112. package/tools/compile.js +224 -219
  113. package/tools/completer.js +131 -131
  114. package/tools/embedded_compiler.js +262 -251
  115. package/tools/gettext.js +185 -185
  116. package/tools/ini.js +65 -65
  117. package/tools/lint.js +16 -19
  118. package/tools/msgfmt.js +187 -187
  119. package/tools/repl.js +223 -223
  120. package/tools/test.js +118 -118
  121. package/tools/utils.js +128 -128
  122. package/tools/web_repl.js +95 -95
  123. package/try +41 -41
  124. package/web-repl/env.js +196 -196
  125. package/web-repl/index.html +163 -163
  126. package/web-repl/main.js +252 -252
  127. package/web-repl/prism.css +139 -139
  128. package/web-repl/prism.js +113 -113
  129. package/web-repl/rapydscript.js +224 -224
  130. package/web-repl/sha1.js +25 -25
  131. package/PYTHON_DIFFERENCES_REPORT.md +0 -291
  132. package/PYTHON_FEATURE_COVERAGE.md +0 -200
package/src/lib/re.pyj CHANGED
@@ -1,470 +1,504 @@
1
- # vim:fileencoding=utf-8
2
- # License: BSD
3
- # Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
4
- # Copyright: 2013, Alexander Tsepkov
5
-
6
- # globals: ρσ_iterator_symbol, ρσ_list_decorate
7
-
8
- # basic implementation of Python's 're' library
9
-
10
- from __python__ import bound_methods
11
-
12
- # Alias DB from http://www.unicode.org/Public/8.0.0/ucd/NameAliases.txt {{{
13
- _ALIAS_MAP = {"null":0,"nul":0,"start of heading":1,"soh":1,"start of text":2,"stx":2,"end of text":3,"etx":3,"end of transmission":4,"eot":4,"enquiry":5,"enq":5,"acknowledge":6,"ack":6,"alert":7,"bel":7,"backspace":8,"bs":8,"character tabulation":9,"horizontal tabulation":9,"ht":9,"tab":9,"line feed":10,"new line":10,"end of line":10,"lf":10,"nl":10,"eol":10,"line tabulation":11,"vertical tabulation":11,"vt":11,"form feed":12,"ff":12,"carriage return":13,"cr":13,"shift out":14,"locking-shift one":14,"so":14,"shift in":15,"locking-shift zero":15,"si":15,"data link escape":16,"dle":16,"device control one":17,"dc1":17,"device control two":18,"dc2":18,"device control three":19,"dc3":19,"device control four":20,"dc4":20,"negative acknowledge":21,"nak":21,"synchronous idle":22,"syn":22,"end of transmission block":23,"etb":23,"cancel":24,"can":24,"end of medium":25,"eom":25,"substitute":26,"sub":26,"escape":27,"esc":27,"information separator four":28,"file separator":28,"fs":28,"information separator three":29,"group separator":29,"gs":29,"information separator two":30,"record separator":30,"rs":30,"information separator one":31,"unit separator":31,"us":31,"sp":32,"delete":127,"del":127,"padding character":128,"pad":128,"high octet preset":129,"hop":129,"break permitted here":130,"bph":130,"no break here":131,"nbh":131,"index":132,"ind":132,"next line":133,"nel":133,"start of selected area":134,"ssa":134,"end of selected area":135,"esa":135,"character tabulation set":136,"horizontal tabulation set":136,"hts":136,"character tabulation with justification":137,"horizontal tabulation with justification":137,"htj":137,"line tabulation set":138,"vertical tabulation set":138,"vts":138,"partial line forward":139,"partial line down":139,"pld":139,"partial line backward":140,"partial line up":140,"plu":140,"reverse line feed":141,"reverse index":141,"ri":141,"single shift two":142,"single-shift-2":142,"ss2":142,"single shift three":143,"single-shift-3":143,"ss3":143,"device control string":144,"dcs":144,"private use one":145,"private use-1":145,"pu1":145,"private use two":146,"private use-2":146,"pu2":146,"set transmit state":147,"sts":147,"cancel character":148,"cch":148,"message waiting":149,"mw":149,"start of guarded area":150,"start of protected area":150,"spa":150,"end of guarded area":151,"end of protected area":151,"epa":151,"start of string":152,"sos":152,"single graphic character introducer":153,"sgc":153,"single character introducer":154,"sci":154,"control sequence introducer":155,"csi":155,"string terminator":156,"st":156,"operating system command":157,"osc":157,"privacy message":158,"pm":158,"application program command":159,"apc":159,"nbsp":160,"shy":173,"latin capital letter gha":418,"latin small letter gha":419,"cgj":847,"alm":1564,"syriac sublinear colon skewed left":1801,"kannada letter llla":3294,"lao letter fo fon":3741,"lao letter fo fay":3743,"lao letter ro":3747,"lao letter lo":3749,"tibetan mark bka- shog gi mgo rgyan":4048,"fvs1":6155,"fvs2":6156,"fvs3":6157,"mvs":6158,"zwsp":8203,"zwnj":8204,"zwj":8205,"lrm":8206,"rlm":8207,"lre":8234,"rle":8235,"pdf":8236,"lro":8237,"rlo":8238,"nnbsp":8239,"mmsp":8287,"wj":8288,"lri":8294,"rli":8295,"fsi":8296,"pdi":8297,"weierstrass elliptic function":8472,"micr on us symbol":9288,"micr dash symbol":9289,"leftwards triangle-headed arrow with double vertical stroke":11130,"rightwards triangle-headed arrow with double vertical stroke":11132,"yi syllable iteration mark":40981,"presentation form for vertical right white lenticular bracket":65048,"vs1":65024,"vs2":65025,"vs3":65026,"vs4":65027,"vs5":65028,"vs6":65029,"vs7":65030,"vs8":65031,"vs9":65032,"vs10":65033,"vs11":65034,"vs12":65035,"vs13":65036,"vs14":65037,"vs15":65038,"vs16":65039,"byte order mark":65279,"bom":65279,"zwnbsp":65279,"cuneiform sign nu11 tenu":74452,"cuneiform sign nu11 over nu11 bur over bur":74453,"byzantine musical symbol fthora skliron chroma vasis":118981,"vs17":917760,"vs18":917761,"vs19":917762,"vs20":917763,"vs21":917764,"vs22":917765,"vs23":917766,"vs24":917767,"vs25":917768,"vs26":917769,"vs27":917770,"vs28":917771,"vs29":917772,"vs30":917773,"vs31":917774,"vs32":917775,"vs33":917776,"vs34":917777,"vs35":917778,"vs36":917779,"vs37":917780,"vs38":917781,"vs39":917782,"vs40":917783,"vs41":917784,"vs42":917785,"vs43":917786,"vs44":917787,"vs45":917788,"vs46":917789,"vs47":917790,"vs48":917791,"vs49":917792,"vs50":917793,"vs51":917794,"vs52":917795,"vs53":917796,"vs54":917797,"vs55":917798,"vs56":917799,"vs57":917800,"vs58":917801,"vs59":917802,"vs60":917803,"vs61":917804,"vs62":917805,"vs63":917806,"vs64":917807,"vs65":917808,"vs66":917809,"vs67":917810,"vs68":917811,"vs69":917812,"vs70":917813,"vs71":917814,"vs72":917815,"vs73":917816,"vs74":917817,"vs75":917818,"vs76":917819,"vs77":917820,"vs78":917821,"vs79":917822,"vs80":917823,"vs81":917824,"vs82":917825,"vs83":917826,"vs84":917827,"vs85":917828,"vs86":917829,"vs87":917830,"vs88":917831,"vs89":917832,"vs90":917833,"vs91":917834,"vs92":917835,"vs93":917836,"vs94":917837,"vs95":917838,"vs96":917839,"vs97":917840,"vs98":917841,"vs99":917842,"vs100":917843,"vs101":917844,"vs102":917845,"vs103":917846,"vs104":917847,"vs105":917848,"vs106":917849,"vs107":917850,"vs108":917851,"vs109":917852,"vs110":917853,"vs111":917854,"vs112":917855,"vs113":917856,"vs114":917857,"vs115":917858,"vs116":917859,"vs117":917860,"vs118":917861,"vs119":917862,"vs120":917863,"vs121":917864,"vs122":917865,"vs123":917866,"vs124":917867,"vs125":917868,"vs126":917869,"vs127":917870,"vs128":917871,"vs129":917872,"vs130":917873,"vs131":917874,"vs132":917875,"vs133":917876,"vs134":917877,"vs135":917878,"vs136":917879,"vs137":917880,"vs138":917881,"vs139":917882,"vs140":917883,"vs141":917884,"vs142":917885,"vs143":917886,"vs144":917887,"vs145":917888,"vs146":917889,"vs147":917890,"vs148":917891,"vs149":917892,"vs150":917893,"vs151":917894,"vs152":917895,"vs153":917896,"vs154":917897,"vs155":917898,"vs156":917899,"vs157":917900,"vs158":917901,"vs159":917902,"vs160":917903,"vs161":917904,"vs162":917905,"vs163":917906,"vs164":917907,"vs165":917908,"vs166":917909,"vs167":917910,"vs168":917911,"vs169":917912,"vs170":917913,"vs171":917914,"vs172":917915,"vs173":917916,"vs174":917917,"vs175":917918,"vs176":917919,"vs177":917920,"vs178":917921,"vs179":917922,"vs180":917923,"vs181":917924,"vs182":917925,"vs183":917926,"vs184":917927,"vs185":917928,"vs186":917929,"vs187":917930,"vs188":917931,"vs189":917932,"vs190":917933,"vs191":917934,"vs192":917935,"vs193":917936,"vs194":917937,"vs195":917938,"vs196":917939,"vs197":917940,"vs198":917941,"vs199":917942,"vs200":917943,"vs201":917944,"vs202":917945,"vs203":917946,"vs204":917947,"vs205":917948,"vs206":917949,"vs207":917950,"vs208":917951,"vs209":917952,"vs210":917953,"vs211":917954,"vs212":917955,"vs213":917956,"vs214":917957,"vs215":917958,"vs216":917959,"vs217":917960,"vs218":917961,"vs219":917962,"vs220":917963,"vs221":917964,"vs222":917965,"vs223":917966,"vs224":917967,"vs225":917968,"vs226":917969,"vs227":917970,"vs228":917971,"vs229":917972,"vs230":917973,"vs231":917974,"vs232":917975,"vs233":917976,"vs234":917977,"vs235":917978,"vs236":917979,"vs237":917980,"vs238":917981,"vs239":917982,"vs240":917983,"vs241":917984,"vs242":917985,"vs243":917986,"vs244":917987,"vs245":917988,"vs246":917989,"vs247":917990,"vs248":917991,"vs249":917992,"vs250":917993,"vs251":917994,"vs252":917995,"vs253":917996,"vs254":917997,"vs255":917998,"vs256":917999}
14
- # }}}
15
-
16
- _ASCII_CONTROL_CHARS = {'a':7, 'b':8, 'f': 12, 'n': 10, 'r': 13, 't': 9, 'v': 11}
17
- _HEX_PAT = /^[a-fA-F0-9]/
18
- _NUM_PAT = /^[0-9]/
19
- _GROUP_PAT = /<([^>]+)>/
20
- _NAME_PAT = /^[a-zA-Z ]/
21
-
22
- I = IGNORECASE = 2
23
- L = LOCALE = 4
24
- M = MULTILINE = 8
25
- D = DOTALL = 16
26
- U = UNICODE = 32
27
- X = VERBOSE = 64
28
- DEBUG = 128
29
- A = ASCII = 256
30
-
31
- supports_unicode = RegExp.prototype.unicode is not undefined
32
-
33
- _RE_ESCAPE = /[-\/\\^$*+?.()|[\]{}]/g
34
-
35
- _re_cache_map = {}
36
- _re_cache_items = v'[]'
37
-
38
- error = SyntaxError # This is the error JS throws for invalid regexps
39
- has_prop = Object.prototype.hasOwnProperty.call.bind(Object.prototype.hasOwnProperty)
40
-
41
- def _expand(groups, repl, group_name_map):
42
- i = 0
43
-
44
- def next():
45
- nonlocal i
46
- return v'repl[i++]'
47
-
48
- def peek():
49
- return repl[i]
50
-
51
- def read_digits(count, pat, base, maxval, prefix):
52
- ans = prefix or ''
53
- greedy = count is Number.MAX_VALUE
54
- while count > 0:
55
- count -= 1
56
- if not pat.test(peek()):
57
- if greedy:
58
- break
59
- return ans
60
- ans += next()
61
- nval = parseInt(ans, base)
62
- if nval > maxval:
63
- return ans
64
- return nval
65
-
66
- def read_escape_sequence():
67
- nonlocal i
68
- q = next()
69
- if not q or q is '\\':
70
- return '\\'
71
- if '"\''.indexOf(q) is not -1:
72
- return q
73
- if _ASCII_CONTROL_CHARS[q]:
74
- return String.fromCharCode(_ASCII_CONTROL_CHARS[q])
75
- if '0' <= q <= '9':
76
- ans = read_digits(Number.MAX_VALUE, _NUM_PAT, 10, Number.MAX_VALUE, q)
77
- if jstype(ans) is 'number':
78
- return groups[ans] or ''
79
- return '\\' + ans
80
- if q is 'g':
81
- m = _GROUP_PAT.exec(repl[i:])
82
- if m is not None:
83
- i += m[0].length
84
- gn = m[1]
85
- if isNaN(parseInt(gn, 10)):
86
- if not has_prop(group_name_map, gn):
87
- return ''
88
- gn = group_name_map[gn][-1]
89
- return groups[gn] or ''
90
- if q is 'x':
91
- code = read_digits(2, _HEX_PAT, 16, 0x10FFFF)
92
- if jstype(code) is 'number':
93
- return String.fromCharCode(code)
94
- return '\\x' + code
95
- if q is 'u':
96
- code = read_digits(4, _HEX_PAT, 16, 0x10FFFF)
97
- if jstype(code) is 'number':
98
- return String.fromCharCode(code)
99
- return '\\u' + code
100
- if q is 'U':
101
- code = read_digits(8, _HEX_PAT, 16, 0x10FFFF)
102
- if jstype(code) is 'number':
103
- if code <= 0xFFFF:
104
- return String.fromCharCode(code)
105
- code -= 0x10000
106
- return String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))
107
- return '\\U' + code
108
- if q is 'N' and peek() is '{':
109
- next()
110
- name = ''
111
- while _NAME_PAT.test(peek()):
112
- name += next()
113
- if peek() is not '}':
114
- return '\\N{' + name
115
- next()
116
- key = (name or '').toLowerCase()
117
- if not name or not has_prop(_ALIAS_MAP, key):
118
- return '\\N{' + name + '}'
119
- code = _ALIAS_MAP[key]
120
- if code <= 0xFFFF:
121
- return String.fromCharCode(code)
122
- code -= 0x10000
123
- return String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))
124
-
125
- return '\\' + q
126
-
127
- ans = ch = ''
128
- while True:
129
- ch = next()
130
- if ch is '\\':
131
- ans += read_escape_sequence()
132
- elif not ch:
133
- break
134
- else:
135
- ans += ch
136
- return ans
137
-
138
- def transform_regex(source, flags):
139
- pos = 0
140
- previous_backslash = in_class = False
141
- ans = ''
142
- group_map = {}
143
- flags = flags or 0
144
- group_count = 0
145
-
146
- while pos < source.length:
147
- ch = v'source[pos++]'
148
- if previous_backslash:
149
- ans += '\\' + ch
150
- previous_backslash = False
151
- continue
152
-
153
- if in_class:
154
- if ch is ']':
155
- in_class = False
156
- ans += ch
157
- continue
158
-
159
- if ch is '\\':
160
- previous_backslash = True
161
- continue
162
-
163
- if ch is '[':
164
- in_class = True
165
- if source[pos] is ']': # in python the empty set is not allowed, instead []] is the same as [\]]
166
- pos += 1
167
- ch = r'[\]'
168
- elif ch is '(':
169
- if source[pos] is '?':
170
- extension = source[pos + 1]
171
- if extension is '#':
172
- close = source.indexOf(')', pos + 1)
173
- if close is -1:
174
- raise ValueError('Expecting a closing )')
175
- pos = close + 1
176
- continue
177
- if 'aiLmsux'.indexOf(extension) is not -1:
178
- flag_map = {'a':ASCII, 'i':IGNORECASE, 'L':LOCALE, 'm':MULTILINE, 's':DOTALL, 'u':UNICODE, 'x':VERBOSE}
179
- close = source.indexOf(')', pos + 1)
180
- if close is -1:
181
- raise SyntaxError('Expecting a closing )')
182
- flgs = source[pos+1:close]
183
- for v'var i = 0; i < flgs.length; i++':
184
- q = flgs[i] # noqa:undef
185
- if not has_prop(flag_map, q):
186
- raise SyntaxError('Invalid flag: ' + q)
187
- flags |= flag_map[q]
188
- pos = close + 1
189
- continue
190
- if extension is '(':
191
- raise SyntaxError('Group existence assertions are not supported in JavaScript')
192
- if extension is 'P':
193
- pos += 2
194
- q = source[pos]
195
- if q is '<':
196
- close = source.indexOf('>', pos)
197
- if close is -1:
198
- raise SyntaxError('Named group not closed, expecting >')
199
- name = source[pos+1:close]
200
- if not has_prop(group_map, name):
201
- group_map[name] = v'[]'
202
- group_map[name].push(v'++group_count')
203
- pos = close + 1
204
- elif q is '=':
205
- close = source.indexOf(')', pos)
206
- if close is -1:
207
- raise SyntaxError('Named group back-reference not closed, expecting a )')
208
- name = source[pos+1:close]
209
- if not isNaN(parseInt(name, 10)):
210
- ans += '\\' + name
211
- else:
212
- if not has_prop(group_map, name):
213
- raise SyntaxError('Invalid back-reference. The named group: ' + name + ' has not yet been defined.')
214
- ans += '\\' + group_map[name][-1]
215
- pos = close + 1
216
- continue
217
- else:
218
- raise SyntaxError('Expecting < or = after (?P')
219
- else:
220
- group_count += 1
221
- elif ch is '.' and (flags & DOTALL):
222
- ans += r'[\s\S]' # JavaScript has no DOTALL
223
- continue
224
-
225
- ans += ch
226
-
227
- return ans, flags, group_map
228
-
229
- class MatchObject:
230
-
231
- def __init__(self, regex, match, pos, endpos):
232
- self.re = regex
233
- self.string = match.input
234
- self._start_pos = match.index
235
- self._groups = match
236
- self.pos, self.endpos = pos, endpos
237
-
238
- def _compute_extents(self):
239
- # compute start/end for each group
240
- match = self._groups
241
- self._start = v'Array(match.length)'
242
- self._end = v'Array(match.length)'
243
- self._start[0] = self._start_pos
244
- self._end[0] = self._start_pos + match[0].length
245
- offset = self._start_pos
246
- extent = match[0]
247
- loc = 0
248
- for v'var i = 1; i < match.length; i++':
249
- g = match[i]
250
- loc = extent.indexOf(g, loc)
251
- if loc is -1:
252
- self._start[i] = self._start[i-1]
253
- self._end[i] = self._end[i-1]
254
- else:
255
- self._start[i] = offset + loc
256
- loc += g.length
257
- self._end[i] = offset + loc # noqa:undef
258
-
259
- def groups(self, defval=None):
260
- ans = v'[]'
261
- for v'var i = 1; i < self._groups.length; i++':
262
- val = self._groups[i] # noqa:undef
263
- if val is undefined:
264
- val = defval
265
- ans.push(val)
266
- return ans
267
-
268
- def _group_number(self, g):
269
- if jstype(g) is 'number':
270
- return g
271
- if has_prop(self.re.group_name_map, g):
272
- return self.re.group_name_map[g][-1]
273
- return g
274
-
275
- def _group_val(self, q, defval):
276
- val = undefined
277
- if jstype(q) is 'number' and -1 < q < self._groups.length:
278
- val = self._groups[q]
279
- else:
280
- if has_prop(self.re.group_name_map, q):
281
- val = self._groups[self.re.group_name_map[q][-1]]
282
- if val is undefined:
283
- val = defval
284
- return val
285
-
286
- def group(self):
287
- if arguments.length is 0:
288
- return self._groups[0]
289
- ans = v'[]'
290
- for v'var i = 0; i < arguments.length; i++':
291
- q = arguments[i] # noqa:undef
292
- ans.push(self._group_val(q, None))
293
- return ans[0] if ans.length is 1 else ans
294
-
295
- def start(self, g):
296
- if self._start is undefined:
297
- self._compute_extents()
298
- val = self._start[self._group_number(g or 0)]
299
- if val is undefined:
300
- val = -1
301
- return val
302
-
303
- def end(self, g):
304
- if self._end is undefined:
305
- self._compute_extents()
306
- val = self._end[self._group_number(g or 0)]
307
- if val is undefined:
308
- val = -1
309
- return val
310
-
311
- def span(self, g):
312
- return [self.start(g), self.end(g)]
313
-
314
- def expand(self, repl):
315
- return _expand(repl, this._groups, this.re.group_name_map)
316
-
317
- def groupdict(self, defval=None):
318
- gnm = self.re.group_name_map
319
- names = Object.keys(gnm)
320
- ans = {}
321
- for v"var i = 0; i < names.length; i++":
322
- name = names[i] # noqa:undef
323
- if has_prop(gnm, name):
324
- val = self._groups[gnm[name][-1]]
325
- if val is undefined:
326
- val = defval
327
- ans[name] = val
328
- return ans
329
-
330
- def captures(self, group_name):
331
- ans = []
332
- if not has_prop(self.re.group_name_map, group_name):
333
- return ans
334
- groups = self.re.group_name_map[group_name]
335
- for v'var i = 0; i < groups.length; i++':
336
- val = self._groups[groups[i]] # noqa:undef
337
- if val is not undefined:
338
- ans.push(val)
339
- return ans
340
-
341
- def capturesdict(self):
342
- gnm = self.re.group_name_map
343
- names = Object.keys(gnm)
344
- ans = {}
345
- for v'var i = 0; i < names.length; i++':
346
- name = names[i] # noqa:undef
347
- ans[name] = self.captures(name)
348
- return ans
349
-
350
- class RegexObject:
351
-
352
- def __init__(self, pattern, flags):
353
- self.pattern = pattern.source if isinstance(pattern, RegExp) else pattern
354
- self.js_pattern, self.flags, self.group_name_map = transform_regex(self.pattern, flags)
355
-
356
- modifiers = ''
357
- if self.flags & IGNORECASE: modifiers += 'i'
358
- if self.flags & MULTILINE: modifiers += 'm'
359
- if not (self.flags & ASCII) and supports_unicode:
360
- modifiers += 'u'
361
- self._modifiers = modifiers + 'g'
362
- self._pattern = RegExp(self.js_pattern, self._modifiers)
363
-
364
- def _do_search(self, pat, string, pos, endpos):
365
- pat.lastIndex = 0
366
- if endpos is not None:
367
- string = string[:endpos]
368
- while True:
369
- n = pat.exec(string)
370
- if n is None:
371
- return None
372
- if n.index >= pos:
373
- return MatchObject(self, n, pos, endpos)
374
-
375
- def search(self, string, pos=0, endpos=None):
376
- return self._do_search(self._pattern, string, pos, endpos)
377
-
378
- def match(self, string, pos=0, endpos=None):
379
- return self._do_search(RegExp('^' + self.js_pattern, self._modifiers), string, pos, endpos)
380
-
381
- def split(self, string, maxsplit=0):
382
- self._pattern.lastIndex = 0
383
- return string.split(self._pattern, maxsplit or undefined)
384
-
385
- def findall(self, string):
386
- self._pattern.lastIndex = 0
387
- return ρσ_list_decorate(string.match(self._pattern) or v'[]')
388
-
389
- def finditer(self, string):
390
- # We have to copy pat since lastIndex is mutable
391
- pat = RegExp(this._pattern.source, this._modifiers) # noqa: unused-local
392
- ans = v"{'_string':string, '_r':pat, '_self':self}"
393
- ans[ρσ_iterator_symbol] = def():
394
- return this
395
- ans['next'] = def():
396
- m = this._r.exec(this._string)
397
- if m is None:
398
- return v"{'done':true}"
399
- return v"{'done':false, 'value':new MatchObject(this._self, m, 0, null)}"
400
- return ans
401
-
402
- def subn(self, repl, string, count=0):
403
- expand = _expand
404
- if jstype(repl) is 'function':
405
- expand = def(m, repl, gnm): return '' + repl(MatchObject(self, m, 0, None))
406
- this._pattern.lastIndex = 0
407
- num = 0
408
- matches = v'[]'
409
-
410
- while count < 1 or num < count:
411
- m = this._pattern.exec(string)
412
- if m is None:
413
- break
414
- matches.push(m)
415
- num += 1
416
-
417
- for v'var i = matches.length - 1; i > -1; i--':
418
- m = matches[i] # noqa:undef
419
- start = m.index
420
- end = start + m[0].length
421
- string = string[:start] + expand(m, repl, self.group_name_map) + string[end:]
422
- return string, matches.length
423
-
424
- def sub(self, repl, string, count=0):
425
- return self.subn(repl, string, count)[0]
426
-
427
- def _get_from_cache(pattern, flags):
428
- if isinstance(pattern, RegExp):
429
- pattern = pattern.source
430
- key = JSON.stringify(v'[pattern, flags]')
431
- if has_prop(_re_cache_map, key):
432
- return _re_cache_map[key]
433
- if _re_cache_items.length >= 100:
434
- v'delete _re_cache_map[_re_cache_items.shift()]'
435
- ans = RegexObject(pattern, flags)
436
- _re_cache_map[key] = ans
437
- _re_cache_items.push(key)
438
- return ans
439
-
440
- def compile(pattern, flags=0):
441
- return _get_from_cache(pattern, flags)
442
-
443
- def search(pattern, string, flags=0):
444
- return _get_from_cache(pattern, flags).search(string)
445
-
446
- def match(pattern, string, flags=0):
447
- return _get_from_cache(pattern, flags).match(string)
448
-
449
- def split(pattern, string, maxsplit=0, flags=0):
450
- return _get_from_cache(pattern, flags).split(string)
451
-
452
- def findall(pattern, string, flags=0):
453
- return _get_from_cache(pattern, flags).findall(string)
454
-
455
- def finditer(pattern, string, flags=0):
456
- return _get_from_cache(pattern, flags).finditer(string)
457
-
458
- def sub(pattern, repl, string, count=0, flags=0):
459
- return _get_from_cache(pattern, flags).sub(repl, string, count)
460
-
461
- def subn(pattern, repl, string, count=0, flags=0):
462
- return _get_from_cache(pattern, flags).subn(repl, string, count)
463
-
464
- def escape(string):
465
- return string.replace(_RE_ESCAPE, '\\$&')
466
-
467
- def purge():
468
- nonlocal _re_cache_map, _re_cache_items
469
- _re_cache_map = {}
470
- _re_cache_items = v'[]'
1
+ # vim:fileencoding=utf-8
2
+ # License: BSD
3
+ # Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
4
+ # Copyright: 2013, Alexander Tsepkov
5
+
6
+ # globals: ρσ_iterator_symbol, ρσ_list_decorate
7
+
8
+ # basic implementation of Python's 're' library
9
+
10
+ from __python__ import bound_methods
11
+
12
+ # Alias DB from http://www.unicode.org/Public/8.0.0/ucd/NameAliases.txt {{{
13
+ _ALIAS_MAP = {"null":0,"nul":0,"start of heading":1,"soh":1,"start of text":2,"stx":2,"end of text":3,"etx":3,"end of transmission":4,"eot":4,"enquiry":5,"enq":5,"acknowledge":6,"ack":6,"alert":7,"bel":7,"backspace":8,"bs":8,"character tabulation":9,"horizontal tabulation":9,"ht":9,"tab":9,"line feed":10,"new line":10,"end of line":10,"lf":10,"nl":10,"eol":10,"line tabulation":11,"vertical tabulation":11,"vt":11,"form feed":12,"ff":12,"carriage return":13,"cr":13,"shift out":14,"locking-shift one":14,"so":14,"shift in":15,"locking-shift zero":15,"si":15,"data link escape":16,"dle":16,"device control one":17,"dc1":17,"device control two":18,"dc2":18,"device control three":19,"dc3":19,"device control four":20,"dc4":20,"negative acknowledge":21,"nak":21,"synchronous idle":22,"syn":22,"end of transmission block":23,"etb":23,"cancel":24,"can":24,"end of medium":25,"eom":25,"substitute":26,"sub":26,"escape":27,"esc":27,"information separator four":28,"file separator":28,"fs":28,"information separator three":29,"group separator":29,"gs":29,"information separator two":30,"record separator":30,"rs":30,"information separator one":31,"unit separator":31,"us":31,"sp":32,"delete":127,"del":127,"padding character":128,"pad":128,"high octet preset":129,"hop":129,"break permitted here":130,"bph":130,"no break here":131,"nbh":131,"index":132,"ind":132,"next line":133,"nel":133,"start of selected area":134,"ssa":134,"end of selected area":135,"esa":135,"character tabulation set":136,"horizontal tabulation set":136,"hts":136,"character tabulation with justification":137,"horizontal tabulation with justification":137,"htj":137,"line tabulation set":138,"vertical tabulation set":138,"vts":138,"partial line forward":139,"partial line down":139,"pld":139,"partial line backward":140,"partial line up":140,"plu":140,"reverse line feed":141,"reverse index":141,"ri":141,"single shift two":142,"single-shift-2":142,"ss2":142,"single shift three":143,"single-shift-3":143,"ss3":143,"device control string":144,"dcs":144,"private use one":145,"private use-1":145,"pu1":145,"private use two":146,"private use-2":146,"pu2":146,"set transmit state":147,"sts":147,"cancel character":148,"cch":148,"message waiting":149,"mw":149,"start of guarded area":150,"start of protected area":150,"spa":150,"end of guarded area":151,"end of protected area":151,"epa":151,"start of string":152,"sos":152,"single graphic character introducer":153,"sgc":153,"single character introducer":154,"sci":154,"control sequence introducer":155,"csi":155,"string terminator":156,"st":156,"operating system command":157,"osc":157,"privacy message":158,"pm":158,"application program command":159,"apc":159,"nbsp":160,"shy":173,"latin capital letter gha":418,"latin small letter gha":419,"cgj":847,"alm":1564,"syriac sublinear colon skewed left":1801,"kannada letter llla":3294,"lao letter fo fon":3741,"lao letter fo fay":3743,"lao letter ro":3747,"lao letter lo":3749,"tibetan mark bka- shog gi mgo rgyan":4048,"fvs1":6155,"fvs2":6156,"fvs3":6157,"mvs":6158,"zwsp":8203,"zwnj":8204,"zwj":8205,"lrm":8206,"rlm":8207,"lre":8234,"rle":8235,"pdf":8236,"lro":8237,"rlo":8238,"nnbsp":8239,"mmsp":8287,"wj":8288,"lri":8294,"rli":8295,"fsi":8296,"pdi":8297,"weierstrass elliptic function":8472,"micr on us symbol":9288,"micr dash symbol":9289,"leftwards triangle-headed arrow with double vertical stroke":11130,"rightwards triangle-headed arrow with double vertical stroke":11132,"yi syllable iteration mark":40981,"presentation form for vertical right white lenticular bracket":65048,"vs1":65024,"vs2":65025,"vs3":65026,"vs4":65027,"vs5":65028,"vs6":65029,"vs7":65030,"vs8":65031,"vs9":65032,"vs10":65033,"vs11":65034,"vs12":65035,"vs13":65036,"vs14":65037,"vs15":65038,"vs16":65039,"byte order mark":65279,"bom":65279,"zwnbsp":65279,"cuneiform sign nu11 tenu":74452,"cuneiform sign nu11 over nu11 bur over bur":74453,"byzantine musical symbol fthora skliron chroma vasis":118981,"vs17":917760,"vs18":917761,"vs19":917762,"vs20":917763,"vs21":917764,"vs22":917765,"vs23":917766,"vs24":917767,"vs25":917768,"vs26":917769,"vs27":917770,"vs28":917771,"vs29":917772,"vs30":917773,"vs31":917774,"vs32":917775,"vs33":917776,"vs34":917777,"vs35":917778,"vs36":917779,"vs37":917780,"vs38":917781,"vs39":917782,"vs40":917783,"vs41":917784,"vs42":917785,"vs43":917786,"vs44":917787,"vs45":917788,"vs46":917789,"vs47":917790,"vs48":917791,"vs49":917792,"vs50":917793,"vs51":917794,"vs52":917795,"vs53":917796,"vs54":917797,"vs55":917798,"vs56":917799,"vs57":917800,"vs58":917801,"vs59":917802,"vs60":917803,"vs61":917804,"vs62":917805,"vs63":917806,"vs64":917807,"vs65":917808,"vs66":917809,"vs67":917810,"vs68":917811,"vs69":917812,"vs70":917813,"vs71":917814,"vs72":917815,"vs73":917816,"vs74":917817,"vs75":917818,"vs76":917819,"vs77":917820,"vs78":917821,"vs79":917822,"vs80":917823,"vs81":917824,"vs82":917825,"vs83":917826,"vs84":917827,"vs85":917828,"vs86":917829,"vs87":917830,"vs88":917831,"vs89":917832,"vs90":917833,"vs91":917834,"vs92":917835,"vs93":917836,"vs94":917837,"vs95":917838,"vs96":917839,"vs97":917840,"vs98":917841,"vs99":917842,"vs100":917843,"vs101":917844,"vs102":917845,"vs103":917846,"vs104":917847,"vs105":917848,"vs106":917849,"vs107":917850,"vs108":917851,"vs109":917852,"vs110":917853,"vs111":917854,"vs112":917855,"vs113":917856,"vs114":917857,"vs115":917858,"vs116":917859,"vs117":917860,"vs118":917861,"vs119":917862,"vs120":917863,"vs121":917864,"vs122":917865,"vs123":917866,"vs124":917867,"vs125":917868,"vs126":917869,"vs127":917870,"vs128":917871,"vs129":917872,"vs130":917873,"vs131":917874,"vs132":917875,"vs133":917876,"vs134":917877,"vs135":917878,"vs136":917879,"vs137":917880,"vs138":917881,"vs139":917882,"vs140":917883,"vs141":917884,"vs142":917885,"vs143":917886,"vs144":917887,"vs145":917888,"vs146":917889,"vs147":917890,"vs148":917891,"vs149":917892,"vs150":917893,"vs151":917894,"vs152":917895,"vs153":917896,"vs154":917897,"vs155":917898,"vs156":917899,"vs157":917900,"vs158":917901,"vs159":917902,"vs160":917903,"vs161":917904,"vs162":917905,"vs163":917906,"vs164":917907,"vs165":917908,"vs166":917909,"vs167":917910,"vs168":917911,"vs169":917912,"vs170":917913,"vs171":917914,"vs172":917915,"vs173":917916,"vs174":917917,"vs175":917918,"vs176":917919,"vs177":917920,"vs178":917921,"vs179":917922,"vs180":917923,"vs181":917924,"vs182":917925,"vs183":917926,"vs184":917927,"vs185":917928,"vs186":917929,"vs187":917930,"vs188":917931,"vs189":917932,"vs190":917933,"vs191":917934,"vs192":917935,"vs193":917936,"vs194":917937,"vs195":917938,"vs196":917939,"vs197":917940,"vs198":917941,"vs199":917942,"vs200":917943,"vs201":917944,"vs202":917945,"vs203":917946,"vs204":917947,"vs205":917948,"vs206":917949,"vs207":917950,"vs208":917951,"vs209":917952,"vs210":917953,"vs211":917954,"vs212":917955,"vs213":917956,"vs214":917957,"vs215":917958,"vs216":917959,"vs217":917960,"vs218":917961,"vs219":917962,"vs220":917963,"vs221":917964,"vs222":917965,"vs223":917966,"vs224":917967,"vs225":917968,"vs226":917969,"vs227":917970,"vs228":917971,"vs229":917972,"vs230":917973,"vs231":917974,"vs232":917975,"vs233":917976,"vs234":917977,"vs235":917978,"vs236":917979,"vs237":917980,"vs238":917981,"vs239":917982,"vs240":917983,"vs241":917984,"vs242":917985,"vs243":917986,"vs244":917987,"vs245":917988,"vs246":917989,"vs247":917990,"vs248":917991,"vs249":917992,"vs250":917993,"vs251":917994,"vs252":917995,"vs253":917996,"vs254":917997,"vs255":917998,"vs256":917999}
14
+ # }}}
15
+
16
+ _ASCII_CONTROL_CHARS = {'a':7, 'b':8, 'f': 12, 'n': 10, 'r': 13, 't': 9, 'v': 11}
17
+ _HEX_PAT = /^[a-fA-F0-9]/
18
+ _NUM_PAT = /^[0-9]/
19
+ _GROUP_PAT = /<([^>]+)>/
20
+ _NAME_PAT = /^[a-zA-Z ]/
21
+
22
+ I = IGNORECASE = 2
23
+ L = LOCALE = 4
24
+ M = MULTILINE = 8
25
+ S = D = DOTALL = 16 # re.S is the canonical Python alias; re.D kept for compat
26
+ U = UNICODE = 32
27
+ X = VERBOSE = 64
28
+ DEBUG = 128
29
+ A = ASCII = 256
30
+ NOFLAG = 0
31
+
32
+ supports_unicode = RegExp.prototype.unicode is not undefined
33
+
34
+ # ES2022 'd' (hasIndices) flag gives exact group start/end offsets
35
+ _supports_indices = v"(function(){try{new RegExp('a','d');return true;}catch(e){return false;}})()"
36
+
37
+ _RE_ESCAPE = /[-\/\\^$*+?.()|[\]{}]/g
38
+
39
+ _re_cache_map = {}
40
+ _re_cache_items = v'[]'
41
+
42
+ error = SyntaxError # This is the error JS throws for invalid regexps
43
+ has_prop = Object.prototype.hasOwnProperty.call.bind(Object.prototype.hasOwnProperty)
44
+
45
+ def _expand(groups, repl, group_name_map):
46
+ i = 0
47
+
48
+ def next():
49
+ nonlocal i
50
+ return v'repl[i++]'
51
+
52
+ def peek():
53
+ return repl[i]
54
+
55
+ def read_digits(count, pat, base, maxval, prefix):
56
+ ans = prefix or ''
57
+ greedy = count is Number.MAX_VALUE
58
+ while count > 0:
59
+ count -= 1
60
+ if not pat.test(peek()):
61
+ if greedy:
62
+ break
63
+ return ans
64
+ ans += next()
65
+ nval = parseInt(ans, base)
66
+ if nval > maxval:
67
+ return ans
68
+ return nval
69
+
70
+ def read_escape_sequence():
71
+ nonlocal i
72
+ q = next()
73
+ if not q or q is '\\':
74
+ return '\\'
75
+ if '"\''.indexOf(q) is not -1:
76
+ return q
77
+ if _ASCII_CONTROL_CHARS[q]:
78
+ return String.fromCharCode(_ASCII_CONTROL_CHARS[q])
79
+ if '0' <= q <= '9':
80
+ ans = read_digits(Number.MAX_VALUE, _NUM_PAT, 10, Number.MAX_VALUE, q)
81
+ if jstype(ans) is 'number':
82
+ return groups[ans] or ''
83
+ return '\\' + ans
84
+ if q is 'g':
85
+ m = _GROUP_PAT.exec(repl[i:])
86
+ if m is not None:
87
+ i += m[0].length
88
+ gn = m[1]
89
+ if isNaN(parseInt(gn, 10)):
90
+ if not has_prop(group_name_map, gn):
91
+ return ''
92
+ gn = group_name_map[gn][-1]
93
+ return groups[gn] or ''
94
+ if q is 'x':
95
+ code = read_digits(2, _HEX_PAT, 16, 0x10FFFF)
96
+ if jstype(code) is 'number':
97
+ return String.fromCharCode(code)
98
+ return '\\x' + code
99
+ if q is 'u':
100
+ code = read_digits(4, _HEX_PAT, 16, 0x10FFFF)
101
+ if jstype(code) is 'number':
102
+ return String.fromCharCode(code)
103
+ return '\\u' + code
104
+ if q is 'U':
105
+ code = read_digits(8, _HEX_PAT, 16, 0x10FFFF)
106
+ if jstype(code) is 'number':
107
+ if code <= 0xFFFF:
108
+ return String.fromCharCode(code)
109
+ code -= 0x10000
110
+ return String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))
111
+ return '\\U' + code
112
+ if q is 'N' and peek() is '{':
113
+ next()
114
+ name = ''
115
+ while _NAME_PAT.test(peek()):
116
+ name += next()
117
+ if peek() is not '}':
118
+ return '\\N{' + name
119
+ next()
120
+ key = (name or '').toLowerCase()
121
+ if not name or not has_prop(_ALIAS_MAP, key):
122
+ return '\\N{' + name + '}'
123
+ code = _ALIAS_MAP[key]
124
+ if code <= 0xFFFF:
125
+ return String.fromCharCode(code)
126
+ code -= 0x10000
127
+ return String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))
128
+
129
+ return '\\' + q
130
+
131
+ ans = ch = ''
132
+ while True:
133
+ ch = next()
134
+ if ch is '\\':
135
+ ans += read_escape_sequence()
136
+ elif not ch:
137
+ break
138
+ else:
139
+ ans += ch
140
+ return ans
141
+
142
+ def transform_regex(source, flags):
143
+ pos = 0
144
+ previous_backslash = in_class = False
145
+ ans = ''
146
+ group_map = {}
147
+ flags = flags or 0
148
+ group_count = 0
149
+
150
+ while pos < source.length:
151
+ ch = v'source[pos++]'
152
+ if previous_backslash:
153
+ ans += '\\' + ch
154
+ previous_backslash = False
155
+ continue
156
+
157
+ if in_class:
158
+ if ch is ']':
159
+ in_class = False
160
+ ans += ch
161
+ continue
162
+
163
+ if ch is '\\':
164
+ previous_backslash = True
165
+ continue
166
+
167
+ if ch is '[':
168
+ in_class = True
169
+ if source[pos] is ']': # in python the empty set is not allowed, instead []] is the same as [\]]
170
+ pos += 1
171
+ ch = r'[\]'
172
+ elif ch is '(':
173
+ if source[pos] is '?':
174
+ extension = source[pos + 1]
175
+ if extension is '#':
176
+ close = source.indexOf(')', pos + 1)
177
+ if close is -1:
178
+ raise ValueError('Expecting a closing )')
179
+ pos = close + 1
180
+ continue
181
+ if 'aiLmsux'.indexOf(extension) is not -1:
182
+ flag_map = {'a':ASCII, 'i':IGNORECASE, 'L':LOCALE, 'm':MULTILINE, 's':DOTALL, 'u':UNICODE, 'x':VERBOSE}
183
+ close = source.indexOf(')', pos + 1)
184
+ if close is -1:
185
+ raise SyntaxError('Expecting a closing )')
186
+ flgs = source[pos+1:close]
187
+ for v'var i = 0; i < flgs.length; i++':
188
+ q = flgs[i] # noqa:undef
189
+ if not has_prop(flag_map, q):
190
+ raise SyntaxError('Invalid flag: ' + q)
191
+ flags |= flag_map[q]
192
+ pos = close + 1
193
+ continue
194
+ if extension is '(':
195
+ raise SyntaxError('Group existence assertions are not supported in JavaScript')
196
+ if extension is 'P':
197
+ pos += 2
198
+ q = source[pos]
199
+ if q is '<':
200
+ close = source.indexOf('>', pos)
201
+ if close is -1:
202
+ raise SyntaxError('Named group not closed, expecting >')
203
+ name = source[pos+1:close]
204
+ if not has_prop(group_map, name):
205
+ group_map[name] = v'[]'
206
+ group_map[name].push(v'++group_count')
207
+ pos = close + 1
208
+ elif q is '=':
209
+ close = source.indexOf(')', pos)
210
+ if close is -1:
211
+ raise SyntaxError('Named group back-reference not closed, expecting a )')
212
+ name = source[pos+1:close]
213
+ if not isNaN(parseInt(name, 10)):
214
+ ans += '\\' + name
215
+ else:
216
+ if not has_prop(group_map, name):
217
+ raise SyntaxError('Invalid back-reference. The named group: ' + name + ' has not yet been defined.')
218
+ ans += '\\' + group_map[name][-1]
219
+ pos = close + 1
220
+ continue
221
+ else:
222
+ raise SyntaxError('Expecting < or = after (?P')
223
+ else:
224
+ group_count += 1
225
+ elif ch is '.' and (flags & DOTALL):
226
+ ans += r'[\s\S]' # JavaScript has no DOTALL
227
+ continue
228
+
229
+ ans += ch
230
+
231
+ return ans, flags, group_map
232
+
233
+ class MatchObject:
234
+
235
+ def __init__(self, regex, match, pos, endpos):
236
+ self.re = regex
237
+ self.string = match.input
238
+ self._start_pos = match.index
239
+ self._groups = match
240
+ self.pos, self.endpos = pos, endpos
241
+
242
+ def _compute_extents(self):
243
+ # compute start/end for each group
244
+ match = self._groups
245
+ self._start = v'Array(match.length)'
246
+ self._end = v'Array(match.length)'
247
+ # ES2022: use accurate per-group indices when the 'd' flag is present
248
+ if match.indices is not undefined:
249
+ for v'var i = 0; i < match.indices.length; i++':
250
+ pair = match.indices[i] # noqa:undef
251
+ if pair is undefined:
252
+ self._start[i] = -1
253
+ self._end[i] = -1
254
+ else:
255
+ self._start[i] = pair[0]
256
+ self._end[i] = pair[1]
257
+ return
258
+ # Fallback heuristic for environments without 'd' flag
259
+ self._start[0] = self._start_pos
260
+ self._end[0] = self._start_pos + match[0].length
261
+ offset = self._start_pos
262
+ extent = match[0]
263
+ loc = 0
264
+ for v'var i = 1; i < match.length; i++':
265
+ g = match[i]
266
+ loc = extent.indexOf(g, loc)
267
+ if loc is -1:
268
+ self._start[i] = self._start[i-1]
269
+ self._end[i] = self._end[i-1]
270
+ else:
271
+ self._start[i] = offset + loc
272
+ loc += g.length
273
+ self._end[i] = offset + loc # noqa:undef
274
+
275
+ def groups(self, defval=None):
276
+ ans = v'[]'
277
+ for v'var i = 1; i < self._groups.length; i++':
278
+ val = self._groups[i] # noqa:undef
279
+ if val is undefined:
280
+ val = defval
281
+ ans.push(val)
282
+ return ans
283
+
284
+ def _group_number(self, g):
285
+ if jstype(g) is 'number':
286
+ return g
287
+ if has_prop(self.re.group_name_map, g):
288
+ return self.re.group_name_map[g][-1]
289
+ return g
290
+
291
+ def _group_val(self, q, defval):
292
+ val = undefined
293
+ if jstype(q) is 'number' and -1 < q < self._groups.length:
294
+ val = self._groups[q]
295
+ else:
296
+ if has_prop(self.re.group_name_map, q):
297
+ val = self._groups[self.re.group_name_map[q][-1]]
298
+ if val is undefined:
299
+ val = defval
300
+ return val
301
+
302
+ def group(self):
303
+ if arguments.length is 0:
304
+ return self._groups[0]
305
+ ans = v'[]'
306
+ for v'var i = 0; i < arguments.length; i++':
307
+ q = arguments[i] # noqa:undef
308
+ ans.push(self._group_val(q, None))
309
+ return ans[0] if ans.length is 1 else ans
310
+
311
+ def start(self, g):
312
+ if self._start is undefined:
313
+ self._compute_extents()
314
+ val = self._start[self._group_number(g or 0)]
315
+ if val is undefined:
316
+ val = -1
317
+ return val
318
+
319
+ def end(self, g):
320
+ if self._end is undefined:
321
+ self._compute_extents()
322
+ val = self._end[self._group_number(g or 0)]
323
+ if val is undefined:
324
+ val = -1
325
+ return val
326
+
327
+ def span(self, g):
328
+ return [self.start(g), self.end(g)]
329
+
330
+ def expand(self, repl):
331
+ return _expand(repl, this._groups, this.re.group_name_map)
332
+
333
+ def groupdict(self, defval=None):
334
+ gnm = self.re.group_name_map
335
+ names = Object.keys(gnm)
336
+ ans = {}
337
+ for v"var i = 0; i < names.length; i++":
338
+ name = names[i] # noqa:undef
339
+ if has_prop(gnm, name):
340
+ val = self._groups[gnm[name][-1]]
341
+ if val is undefined:
342
+ val = defval
343
+ ans[name] = val
344
+ return ans
345
+
346
+ def captures(self, group_name):
347
+ ans = []
348
+ if not has_prop(self.re.group_name_map, group_name):
349
+ return ans
350
+ groups = self.re.group_name_map[group_name]
351
+ for v'var i = 0; i < groups.length; i++':
352
+ val = self._groups[groups[i]] # noqa:undef
353
+ if val is not undefined:
354
+ ans.push(val)
355
+ return ans
356
+
357
+ def capturesdict(self):
358
+ gnm = self.re.group_name_map
359
+ names = Object.keys(gnm)
360
+ ans = {}
361
+ for v'var i = 0; i < names.length; i++':
362
+ name = names[i] # noqa:undef
363
+ ans[name] = self.captures(name)
364
+ return ans
365
+
366
+ class RegexObject:
367
+
368
+ def __init__(self, pattern, flags):
369
+ self.pattern = pattern.source if isinstance(pattern, RegExp) else pattern
370
+ self.js_pattern, self.flags, self.group_name_map = transform_regex(self.pattern, flags)
371
+
372
+ modifiers = ''
373
+ if self.flags & IGNORECASE: modifiers += 'i'
374
+ if self.flags & MULTILINE: modifiers += 'm'
375
+ if not (self.flags & ASCII) and supports_unicode:
376
+ modifiers += 'u'
377
+ if _supports_indices:
378
+ modifiers += 'd'
379
+ self._modifiers = modifiers + 'g'
380
+ self._pattern = RegExp(self.js_pattern, self._modifiers)
381
+
382
+ def _do_search(self, pat, string, pos, endpos):
383
+ pat.lastIndex = 0
384
+ if endpos is not None:
385
+ string = string[:endpos]
386
+ while True:
387
+ n = pat.exec(string)
388
+ if n is None:
389
+ return None
390
+ if n.index >= pos:
391
+ return MatchObject(self, n, pos, endpos)
392
+
393
+ def search(self, string, pos=0, endpos=None):
394
+ return self._do_search(self._pattern, string, pos, endpos)
395
+
396
+ def match(self, string, pos=0, endpos=None):
397
+ return self._do_search(RegExp('^' + self.js_pattern, self._modifiers), string, pos, endpos)
398
+
399
+ def split(self, string, maxsplit=0):
400
+ self._pattern.lastIndex = 0
401
+ return string.split(self._pattern, maxsplit or undefined)
402
+
403
+ def findall(self, string):
404
+ self._pattern.lastIndex = 0
405
+ return ρσ_list_decorate(string.match(self._pattern) or v'[]')
406
+
407
+ def finditer(self, string):
408
+ # We have to copy pat since lastIndex is mutable
409
+ pat = RegExp(this._pattern.source, this._modifiers) # noqa: unused-local
410
+ ans = v"{'_string':string, '_r':pat, '_self':self}"
411
+ ans[ρσ_iterator_symbol] = def():
412
+ return this
413
+ ans['next'] = def():
414
+ m = this._r.exec(this._string)
415
+ if m is None:
416
+ return v"{'done':true}"
417
+ return v"{'done':false, 'value':new MatchObject(this._self, m, 0, null)}"
418
+ return ans
419
+
420
+ def subn(self, repl, string, count=0):
421
+ expand = _expand
422
+ if jstype(repl) is 'function':
423
+ expand = def(m, repl, gnm): return '' + repl(MatchObject(self, m, 0, None))
424
+ this._pattern.lastIndex = 0
425
+ num = 0
426
+ matches = v'[]'
427
+
428
+ while count < 1 or num < count:
429
+ m = this._pattern.exec(string)
430
+ if m is None:
431
+ break
432
+ matches.push(m)
433
+ num += 1
434
+
435
+ for v'var i = matches.length - 1; i > -1; i--':
436
+ m = matches[i] # noqa:undef
437
+ start = m.index
438
+ end = start + m[0].length
439
+ string = string[:start] + expand(m, repl, self.group_name_map) + string[end:]
440
+ return string, matches.length
441
+
442
+ def sub(self, repl, string, count=0):
443
+ return self.subn(repl, string, count)[0]
444
+
445
+ def fullmatch(self, string, pos=0, endpos=None):
446
+ if endpos is not None:
447
+ string = string[:endpos]
448
+ end = string.length
449
+ # Sticky 'y' + no 'm': anchors match to exactly pos..end
450
+ mods = self._modifiers.replace('g', 'y').replace('m', '')
451
+ pat = RegExp(self.js_pattern + '$', mods)
452
+ pat.lastIndex = pos
453
+ m = pat.exec(string)
454
+ if m is None or m.index is not pos or m.index + m[0].length is not end:
455
+ return None
456
+ return MatchObject(self, m, pos, endpos)
457
+
458
+ def _get_from_cache(pattern, flags):
459
+ if isinstance(pattern, RegExp):
460
+ pattern = pattern.source
461
+ key = JSON.stringify(v'[pattern, flags]')
462
+ if has_prop(_re_cache_map, key):
463
+ return _re_cache_map[key]
464
+ if _re_cache_items.length >= 100:
465
+ v'delete _re_cache_map[_re_cache_items.shift()]'
466
+ ans = RegexObject(pattern, flags)
467
+ _re_cache_map[key] = ans
468
+ _re_cache_items.push(key)
469
+ return ans
470
+
471
+ def compile(pattern, flags=0):
472
+ return _get_from_cache(pattern, flags)
473
+
474
+ def search(pattern, string, flags=0):
475
+ return _get_from_cache(pattern, flags).search(string)
476
+
477
+ def match(pattern, string, flags=0):
478
+ return _get_from_cache(pattern, flags).match(string)
479
+
480
+ def fullmatch(pattern, string, flags=0):
481
+ return _get_from_cache(pattern, flags).fullmatch(string)
482
+
483
+ def split(pattern, string, maxsplit=0, flags=0):
484
+ return _get_from_cache(pattern, flags).split(string)
485
+
486
+ def findall(pattern, string, flags=0):
487
+ return _get_from_cache(pattern, flags).findall(string)
488
+
489
+ def finditer(pattern, string, flags=0):
490
+ return _get_from_cache(pattern, flags).finditer(string)
491
+
492
+ def sub(pattern, repl, string, count=0, flags=0):
493
+ return _get_from_cache(pattern, flags).sub(repl, string, count)
494
+
495
+ def subn(pattern, repl, string, count=0, flags=0):
496
+ return _get_from_cache(pattern, flags).subn(repl, string, count)
497
+
498
+ def escape(string):
499
+ return string.replace(_RE_ESCAPE, '\\$&')
500
+
501
+ def purge():
502
+ nonlocal _re_cache_map, _re_cache_items
503
+ _re_cache_map = {}
504
+ _re_cache_items = v'[]'