musicdl 2.1.11__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- musicdl/__init__.py +5 -5
- musicdl/modules/__init__.py +10 -3
- musicdl/modules/common/__init__.py +2 -0
- musicdl/modules/common/gdstudio.py +204 -0
- musicdl/modules/js/__init__.py +1 -0
- musicdl/modules/js/youtube/__init__.py +2 -0
- musicdl/modules/js/youtube/botguard.js +1 -0
- musicdl/modules/js/youtube/jsinterp.py +902 -0
- musicdl/modules/js/youtube/runner.js +2 -0
- musicdl/modules/sources/__init__.py +41 -10
- musicdl/modules/sources/apple.py +207 -0
- musicdl/modules/sources/base.py +256 -28
- musicdl/modules/sources/bilibili.py +118 -0
- musicdl/modules/sources/buguyy.py +148 -0
- musicdl/modules/sources/fangpi.py +153 -0
- musicdl/modules/sources/fivesing.py +108 -0
- musicdl/modules/sources/gequbao.py +148 -0
- musicdl/modules/sources/jamendo.py +108 -0
- musicdl/modules/sources/joox.py +104 -68
- musicdl/modules/sources/kugou.py +129 -76
- musicdl/modules/sources/kuwo.py +188 -68
- musicdl/modules/sources/lizhi.py +107 -0
- musicdl/modules/sources/migu.py +172 -66
- musicdl/modules/sources/mitu.py +140 -0
- musicdl/modules/sources/mp3juice.py +264 -0
- musicdl/modules/sources/netease.py +163 -115
- musicdl/modules/sources/qianqian.py +125 -77
- musicdl/modules/sources/qq.py +232 -94
- musicdl/modules/sources/tidal.py +342 -0
- musicdl/modules/sources/ximalaya.py +256 -0
- musicdl/modules/sources/yinyuedao.py +144 -0
- musicdl/modules/sources/youtube.py +238 -0
- musicdl/modules/utils/__init__.py +12 -4
- musicdl/modules/utils/appleutils.py +563 -0
- musicdl/modules/utils/data.py +107 -0
- musicdl/modules/utils/logger.py +211 -58
- musicdl/modules/utils/lyric.py +73 -0
- musicdl/modules/utils/misc.py +335 -23
- musicdl/modules/utils/modulebuilder.py +75 -0
- musicdl/modules/utils/neteaseutils.py +81 -0
- musicdl/modules/utils/qqutils.py +184 -0
- musicdl/modules/utils/quarkparser.py +105 -0
- musicdl/modules/utils/songinfoutils.py +54 -0
- musicdl/modules/utils/tidalutils.py +738 -0
- musicdl/modules/utils/youtubeutils.py +3606 -0
- musicdl/musicdl.py +184 -86
- musicdl-2.7.3.dist-info/LICENSE +203 -0
- musicdl-2.7.3.dist-info/METADATA +704 -0
- musicdl-2.7.3.dist-info/RECORD +53 -0
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/WHEEL +5 -5
- musicdl-2.7.3.dist-info/entry_points.txt +2 -0
- musicdl/modules/sources/baiduFlac.py +0 -69
- musicdl/modules/sources/xiami.py +0 -104
- musicdl/modules/utils/downloader.py +0 -80
- musicdl-2.1.11.dist-info/LICENSE +0 -22
- musicdl-2.1.11.dist-info/METADATA +0 -82
- musicdl-2.1.11.dist-info/RECORD +0 -24
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/top_level.txt +0 -0
- {musicdl-2.1.11.dist-info → musicdl-2.7.3.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,902 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Function:
|
|
3
|
+
Implementation of JSInterpreter, refer to https://pytubefix.readthedocs.io/en/latest/index.html
|
|
4
|
+
Author:
|
|
5
|
+
Zhenchao Jin
|
|
6
|
+
WeChat Official Account (微信公众号):
|
|
7
|
+
Charles的皮卡丘
|
|
8
|
+
'''
|
|
9
|
+
import re
|
|
10
|
+
import json
|
|
11
|
+
import math
|
|
12
|
+
import datetime
|
|
13
|
+
import calendar
|
|
14
|
+
import operator
|
|
15
|
+
import itertools
|
|
16
|
+
import contextlib
|
|
17
|
+
import email.utils
|
|
18
|
+
import collections
|
|
19
|
+
from functools import update_wrapper
|
|
20
|
+
from contextlib import suppress as compat_contextlib_suppress
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
'''constants'''
|
|
24
|
+
DATE_FORMATS = (
|
|
25
|
+
'%d %B %Y', '%d %b %Y', '%B %d %Y', '%B %dst %Y', '%B %dnd %Y', '%B %drd %Y', '%B %dth %Y', '%b %d %Y', '%b %dst %Y', '%b %dnd %Y', '%b %drd %Y', '%b %dth %Y',
|
|
26
|
+
'%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %drd %Y %I:%M', '%b %dth %Y %I:%M', '%Y %m %d', '%Y-%m-%d', '%Y.%m.%d.', '%Y/%m/%d', '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S',
|
|
27
|
+
'%Y%m%d%H%M', '%Y%m%d%H%M%S', '%Y%m%d', '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S:%f', '%d.%m.%Y %H:%M', '%d.%m.%Y %H.%M',
|
|
28
|
+
'%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%S.%f0Z', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M', '%b %d %Y at %H:%M',
|
|
29
|
+
'%b %d %Y at %H:%M:%S', '%B %d %Y at %H:%M', '%B %d %Y at %H:%M:%S', '%H:%M %d-%b-%Y',
|
|
30
|
+
)
|
|
31
|
+
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
|
|
32
|
+
DATE_FORMATS_MONTH_FIRST.extend(['%m-%d-%Y', '%m.%d.%Y', '%m/%d/%Y', '%m/%d/%y', '%m/%d/%Y %H:%M:%S'])
|
|
33
|
+
TIMEZONE_NAMES = {'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0, 'AST': -4, 'ADT': -3, 'EST': -5, 'EDT': -4, 'CST': -6, 'CDT': -5, 'MST': -7, 'MDT': -6, 'PST': -8, 'PDT': -7}
|
|
34
|
+
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
|
|
35
|
+
DATE_FORMATS_DAY_FIRST.extend(['%d-%m-%Y', '%d.%m.%Y', '%d.%m.%y', '%d/%m/%Y', '%d/%m/%y', '%d/%m/%Y %H:%M:%S', '%d-%m-%Y %H:%M', '%H:%M %d/%m/%Y'])
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
'''js2json'''
|
|
39
|
+
def js2json(code, vars={}, *, strict=False):
|
|
40
|
+
# constants
|
|
41
|
+
STRING_QUOTES = '\'"`'
|
|
42
|
+
STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
|
|
43
|
+
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
|
44
|
+
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
|
|
45
|
+
INTEGER_TABLE = ((fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16), (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8))
|
|
46
|
+
# process escape
|
|
47
|
+
def _processescape(match):
|
|
48
|
+
JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
|
|
49
|
+
escape = match.group(1) or match.group(2)
|
|
50
|
+
return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES else R'\u00' if escape == 'x' else '' if escape == '\n' else escape)
|
|
51
|
+
# template substitute
|
|
52
|
+
def _templatesubstitute(match):
|
|
53
|
+
evaluated = js2json(match.group(1), vars, strict=strict)
|
|
54
|
+
if evaluated[0] == '"': return json.loads(evaluated)
|
|
55
|
+
return evaluated
|
|
56
|
+
# fix kv
|
|
57
|
+
def _fixkv(m):
|
|
58
|
+
v: str = m.group(0)
|
|
59
|
+
if v in ('true', 'false', 'null'): return v
|
|
60
|
+
elif v in ('undefined', 'void 0'): return 'null'
|
|
61
|
+
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',': return ''
|
|
62
|
+
if v[0] in STRING_QUOTES:
|
|
63
|
+
v = re.sub(r'(?s)\${([^}]+)}', _templatesubstitute, v[1:-1]) if v[0] == '`' else v[1:-1]
|
|
64
|
+
escaped = re.sub(r'(?s)(")|\\(.)', _processescape, v)
|
|
65
|
+
r = f'"{escaped}"'
|
|
66
|
+
return r
|
|
67
|
+
for regex, base in INTEGER_TABLE:
|
|
68
|
+
im = re.match(regex, v)
|
|
69
|
+
if im:
|
|
70
|
+
i = int(im.group(1), base)
|
|
71
|
+
return f'"{i}":' if v.endswith(':') else str(i)
|
|
72
|
+
if v in vars:
|
|
73
|
+
try:
|
|
74
|
+
if not strict: json.loads(vars[v])
|
|
75
|
+
except json.JSONDecodeError:
|
|
76
|
+
return json.dumps(vars[v])
|
|
77
|
+
else:
|
|
78
|
+
return vars[v]
|
|
79
|
+
if not strict: return f'"{v}"'
|
|
80
|
+
raise ValueError(f'Unknown value: {v}')
|
|
81
|
+
# create map
|
|
82
|
+
def _createmap(mobj):
|
|
83
|
+
r = json.dumps(dict(json.loads(js2json(mobj.group(1) or '[]', vars=vars))))
|
|
84
|
+
return r
|
|
85
|
+
# process
|
|
86
|
+
code = re.sub(r'new Map\((\[.*?\])?\)', _createmap, code)
|
|
87
|
+
if not strict:
|
|
88
|
+
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
|
|
89
|
+
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
|
|
90
|
+
code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
|
|
91
|
+
code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
|
|
92
|
+
# return
|
|
93
|
+
return re.sub(rf'''(?sx)
|
|
94
|
+
{STRING_RE}|
|
|
95
|
+
{COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
|
|
96
|
+
void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
|
|
97
|
+
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
|
|
98
|
+
[0-9]+(?={SKIP_RE}:)|
|
|
99
|
+
!+
|
|
100
|
+
''', _fixkv, code)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
'''extracttimezone'''
|
|
104
|
+
def extracttimezone(date_str):
|
|
105
|
+
r = r'''(?x)
|
|
106
|
+
^.{8,}? # >=8 char non-TZ prefix, if present
|
|
107
|
+
(?P<tz>Z| # just the UTC Z, or
|
|
108
|
+
(?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
|
|
109
|
+
(?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
|
|
110
|
+
[ ]? # optional space
|
|
111
|
+
(?P<sign>\+|-) # +/-
|
|
112
|
+
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
|
|
113
|
+
$)
|
|
114
|
+
'''
|
|
115
|
+
m = re.search(r, date_str)
|
|
116
|
+
if not m:
|
|
117
|
+
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
|
|
118
|
+
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
|
|
119
|
+
if timezone is not None: date_str = date_str[:-len(m.group('tz'))]
|
|
120
|
+
timezone = datetime.timedelta(hours=timezone or 0)
|
|
121
|
+
else:
|
|
122
|
+
date_str = date_str[:-len(m.group('tz'))]
|
|
123
|
+
if not m.group('sign'): timezone = datetime.timedelta()
|
|
124
|
+
else:
|
|
125
|
+
sign = 1 if m.group('sign') == '+' else -1
|
|
126
|
+
timezone = datetime.timedelta(hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes')))
|
|
127
|
+
return timezone, date_str
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
'''dateformats'''
|
|
131
|
+
def dateformats(day_first=True):
|
|
132
|
+
return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
'''unifiedtimestamp'''
|
|
136
|
+
def unifiedtimestamp(date_str, day_first=True):
|
|
137
|
+
if not isinstance(date_str, str): return None
|
|
138
|
+
date_str = re.sub(r'\s+', ' ', re.sub(r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
|
|
139
|
+
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
|
140
|
+
timezone, date_str = extracttimezone(date_str)
|
|
141
|
+
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
|
|
142
|
+
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
|
|
143
|
+
if m: date_str = date_str[:-len(m.group('tz'))]
|
|
144
|
+
m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
|
|
145
|
+
if m: date_str = m.group(1)
|
|
146
|
+
for expression in dateformats(day_first):
|
|
147
|
+
with contextlib.suppress(ValueError):
|
|
148
|
+
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
|
|
149
|
+
return calendar.timegm(dt.timetuple())
|
|
150
|
+
timetuple = email.utils.parsedate_tz(date_str)
|
|
151
|
+
if timetuple: return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
'''removequotes'''
|
|
155
|
+
def removequotes(s):
|
|
156
|
+
if s is None or len(s) < 2:
|
|
157
|
+
return s
|
|
158
|
+
for quote in ('"', "'",):
|
|
159
|
+
if s[0] == quote and s[-1] == quote:
|
|
160
|
+
return s[1:-1]
|
|
161
|
+
return s
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
'''truncatestring'''
|
|
165
|
+
def truncatestring(s, left, right=0):
|
|
166
|
+
assert left > 3 and right >= 0
|
|
167
|
+
if s is None or len(s) <= left + right:
|
|
168
|
+
return s
|
|
169
|
+
return f'{s[:left - 3]}...{s[-right:] if right else ""}'
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
'''jsbitop'''
|
|
173
|
+
def jsbitop(op):
|
|
174
|
+
def zeroise(x):
|
|
175
|
+
if x in (None, JSUndefined):
|
|
176
|
+
return 0
|
|
177
|
+
with contextlib.suppress(TypeError):
|
|
178
|
+
if math.isnan(x): return 0
|
|
179
|
+
return x
|
|
180
|
+
def wrapped(a, b):
|
|
181
|
+
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
|
182
|
+
return wrapped
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
'''jsarithop'''
|
|
186
|
+
def jsarithop(op):
|
|
187
|
+
def wrapped(a, b):
|
|
188
|
+
if JSUndefined in (a, b): return float('nan')
|
|
189
|
+
return op(a or 0, b or 0)
|
|
190
|
+
return wrapped
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
'''jsdiv'''
|
|
194
|
+
def jsdiv(a, b):
|
|
195
|
+
if JSUndefined in (a, b) or not (a or b):
|
|
196
|
+
return float('nan')
|
|
197
|
+
return (a or 0) / b if b else float('inf')
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
'''jsmod'''
|
|
201
|
+
def jsmod(a, b):
|
|
202
|
+
if JSUndefined in (a, b) or not b:
|
|
203
|
+
return float('nan')
|
|
204
|
+
return (a or 0) % b
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
'''jsexp'''
|
|
208
|
+
def jsexp(a, b):
|
|
209
|
+
if not b: return 1
|
|
210
|
+
elif JSUndefined in (a, b): return float('nan')
|
|
211
|
+
return (a or 0) ** b
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
'''jseqop'''
|
|
215
|
+
def jseqop(op):
|
|
216
|
+
def wrapped(a, b):
|
|
217
|
+
if {a, b} <= {None, JSUndefined}: return op(a, a)
|
|
218
|
+
return op(a, b)
|
|
219
|
+
return wrapped
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
'''jscompop'''
|
|
223
|
+
def jscompop(op):
|
|
224
|
+
def wrapped(a, b):
|
|
225
|
+
if JSUndefined in (a, b): return False
|
|
226
|
+
if isinstance(a, str) or isinstance(b, str): return op(str(a or 0), str(b or 0))
|
|
227
|
+
return op(a or 0, b or 0)
|
|
228
|
+
return wrapped
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
'''jsternary'''
|
|
232
|
+
def jsternary(cndn, if_true=True, if_false=False):
|
|
233
|
+
if cndn in (False, None, 0, '', JSUndefined):
|
|
234
|
+
return if_false
|
|
235
|
+
with contextlib.suppress(TypeError):
|
|
236
|
+
if math.isnan(cndn): return if_false
|
|
237
|
+
return if_true
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
'''jstypeof'''
|
|
241
|
+
def jstypeof(expr):
|
|
242
|
+
with compat_contextlib_suppress(TypeError, KeyError):
|
|
243
|
+
return {JSUndefined: 'undefined', float('nan'): 'number', float('inf'): 'number', True: 'boolean', False: 'boolean', None: 'object'}[expr]
|
|
244
|
+
for t, n in (((str, bytes), 'string'), ((int, float, complex), 'number')):
|
|
245
|
+
if isinstance(expr, t): return n
|
|
246
|
+
if callable(expr): return 'function'
|
|
247
|
+
return 'object'
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
'''wrapsop'''
|
|
251
|
+
def wrapsop(op):
|
|
252
|
+
def _updateandrenamewrapper(w):
|
|
253
|
+
f = update_wrapper(w, op)
|
|
254
|
+
f.__name__ = str('JS_') + f.__name__
|
|
255
|
+
return f
|
|
256
|
+
return _updateandrenamewrapper
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
'''jsunaryop'''
|
|
260
|
+
def jsunaryop(op):
|
|
261
|
+
@wrapsop(op)
|
|
262
|
+
def wrapped(_, a):
|
|
263
|
+
return op(a)
|
|
264
|
+
return wrapped
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
'''extractplayerjsglobalvar'''
|
|
268
|
+
def extractplayerjsglobalvar(jscode):
|
|
269
|
+
global_var = re.search(
|
|
270
|
+
r'''(?x)
|
|
271
|
+
(?P<q1>["\'])use\s+strict(?P=q1);\s*
|
|
272
|
+
(?P<code>
|
|
273
|
+
var\s+(?P<name>[a-zA-Z0-9_$]+)\s*=\s*
|
|
274
|
+
(?P<value>
|
|
275
|
+
(?P<q2>["\'])(?:(?!(?P=q2)).|\\.)+(?P=q2)
|
|
276
|
+
\.split\((?P<q3>["\'])(?:(?!(?P=q3)).)+(?P=q3)\)
|
|
277
|
+
|\[\s*(?:(?P<q4>["\'])(?:(?!(?P=q4)).|\\.)*(?P=q4)\s*,?\s*)+\]
|
|
278
|
+
)
|
|
279
|
+
)[;,]
|
|
280
|
+
''', jscode)
|
|
281
|
+
if global_var: return global_var.group('code'), global_var.group("name"), global_var.group("value")
|
|
282
|
+
else: return None, None, None
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
'''fixupnfunctioncode'''
|
|
286
|
+
def fixupnfunctioncode(argnames, code, full_code):
|
|
287
|
+
global_var, _, _ = extractplayerjsglobalvar(full_code)
|
|
288
|
+
if global_var: code = global_var + '; ' + code
|
|
289
|
+
return argnames, re.sub(rf';\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(["\'])undefined\1\s*\)\s*return\s+{argnames[0]};', ';', code)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
'''NODEFAULT'''
|
|
293
|
+
class NODEFAULT:
|
|
294
|
+
pass
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
'''FunctionWithRepr'''
|
|
298
|
+
class FunctionWithRepr:
|
|
299
|
+
def __init__(self, func, repr_=None):
|
|
300
|
+
self.func, self.__repr = func, repr_
|
|
301
|
+
def __call__(self, *args, **kwargs):
|
|
302
|
+
return self.func(*args, **kwargs)
|
|
303
|
+
def __repr__(self):
|
|
304
|
+
if self.__repr:
|
|
305
|
+
return self.__repr
|
|
306
|
+
return f'{self.func.__module__}.{self.func.__qualname__}'
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
'''JSUndefined'''
|
|
310
|
+
class JSUndefined:
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
'''JSBreak'''
|
|
315
|
+
class JSBreak(Exception):
|
|
316
|
+
def __init__(self):
|
|
317
|
+
Exception.__init__(self, 'Invalid break')
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
'''JSContinue'''
|
|
321
|
+
class JSContinue(Exception):
|
|
322
|
+
def __init__(self):
|
|
323
|
+
Exception.__init__(self, 'Invalid continue')
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
'''JSThrow'''
|
|
327
|
+
class JSThrow(Exception):
|
|
328
|
+
def __init__(self, e):
|
|
329
|
+
self.error = e
|
|
330
|
+
Exception.__init__(self, f'Uncaught exception {e}')
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
'''LocalNameSpace'''
|
|
334
|
+
class LocalNameSpace(collections.ChainMap):
|
|
335
|
+
def __setitem__(self, key, value):
|
|
336
|
+
for scope in self.maps:
|
|
337
|
+
if key in scope:
|
|
338
|
+
scope[key] = value
|
|
339
|
+
return
|
|
340
|
+
self.maps[0][key] = value
|
|
341
|
+
def __delitem__(self, key):
|
|
342
|
+
raise NotImplementedError('Deleting is not supported')
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
'''constants'''
|
|
346
|
+
_OPERATORS = {
|
|
347
|
+
'?': None, '??': None, '||': None, '&&': None, '|': jsbitop(operator.or_), '^': jsbitop(operator.xor), '&': jsbitop(operator.and_), '===': operator.is_,
|
|
348
|
+
'!==': operator.is_not, '==': jseqop(operator.eq), '!=': jseqop(operator.ne), '<=': jscompop(operator.le), '>=': jscompop(operator.ge), '<': jscompop(operator.lt),
|
|
349
|
+
'>': jscompop(operator.gt), '>>': jsbitop(operator.rshift), '<<': jsbitop(operator.lshift), '+': jsarithop(operator.add), '-': jsarithop(operator.sub),
|
|
350
|
+
'*': jsarithop(operator.mul), '%': jsmod, '/': jsdiv, '**': jsexp,
|
|
351
|
+
}
|
|
352
|
+
_UNARY_OPERATORS_X = {'void': jsunaryop(lambda _: JSUndefined), 'typeof': jsunaryop(jstypeof)}
|
|
353
|
+
_COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'}
|
|
354
|
+
_ALL_OPERATORS = {**_OPERATORS, **_UNARY_OPERATORS_X}
|
|
355
|
+
_NAME_RE = r'[a-zA-Z_$][\w$]*'
|
|
356
|
+
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
|
357
|
+
_QUOTES = '\'"/'
|
|
358
|
+
_NESTED_BRACKETS = r'[^[\]]+(?:\[[^[\]]+(?:\[[^\]]+\])?\])?'
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
'''JSInterpreter'''
|
|
362
|
+
class JSInterpreter:
|
|
363
|
+
__named_object_counter = 0
|
|
364
|
+
_RE_FLAGS = {'d': 1024, 'g': 2048, 'i': re.I, 'm': re.M, 's': re.S, 'u': re.U, 'y': 4096}
|
|
365
|
+
def __init__(self, code, objects=None):
|
|
366
|
+
self.code, self._functions = code, {}
|
|
367
|
+
self._objects = {} if objects is None else objects
|
|
368
|
+
'''Exception'''
|
|
369
|
+
class Exception(Exception):
|
|
370
|
+
def __init__(self, msg, expr=None, *args, **kwargs):
|
|
371
|
+
if expr is not None:
|
|
372
|
+
msg = f'{msg.rstrip()} in: {truncatestring(expr, 50, 50)}'
|
|
373
|
+
super().__init__(msg, *args, **kwargs)
|
|
374
|
+
'''_namedobject'''
|
|
375
|
+
def _namedobject(self, namespace, obj):
|
|
376
|
+
self.__named_object_counter += 1
|
|
377
|
+
name = f'__pytubefix_jsinterp_obj{self.__named_object_counter}'
|
|
378
|
+
if callable(obj) and not isinstance(obj, FunctionWithRepr):
|
|
379
|
+
obj = FunctionWithRepr(obj, f'F<{self.__named_object_counter}>')
|
|
380
|
+
namespace[name] = obj
|
|
381
|
+
return name
|
|
382
|
+
'''_regexflags'''
|
|
383
|
+
@classmethod
|
|
384
|
+
def _regexflags(cls, expr):
|
|
385
|
+
flags = 0
|
|
386
|
+
if not expr: return flags, expr
|
|
387
|
+
for idx, ch in enumerate(expr):
|
|
388
|
+
if ch not in cls._RE_FLAGS: break
|
|
389
|
+
flags |= cls._RE_FLAGS[ch]
|
|
390
|
+
return flags, expr[idx + 1:]
|
|
391
|
+
'''_separate'''
|
|
392
|
+
@staticmethod
|
|
393
|
+
def _separate(expr, delim=',', max_split=None):
|
|
394
|
+
OP_CHARS = '+-*/%&|^=<>!,;{}:['
|
|
395
|
+
if not expr: return
|
|
396
|
+
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
|
397
|
+
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
|
398
|
+
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
|
399
|
+
for idx, char in enumerate(expr):
|
|
400
|
+
if not in_quote and char in _MATCHING_PARENS:
|
|
401
|
+
counters[_MATCHING_PARENS[char]] += 1
|
|
402
|
+
elif not in_quote and char in counters:
|
|
403
|
+
if counters[char]: counters[char] -= 1
|
|
404
|
+
elif not escaping:
|
|
405
|
+
if char in _QUOTES and in_quote in (char, None):
|
|
406
|
+
if in_quote or after_op or char != '/': in_quote = None if in_quote and not in_regex_char_group else char
|
|
407
|
+
elif in_quote == '/' and char in '[]':
|
|
408
|
+
in_regex_char_group = char == '['
|
|
409
|
+
escaping = not escaping and in_quote and char == '\\'
|
|
410
|
+
in_unary_op = (not in_quote and not in_regex_char_group and after_op not in (True, False) and char in '-+')
|
|
411
|
+
after_op = char if (not in_quote and char in OP_CHARS) else (char.isspace() and after_op)
|
|
412
|
+
if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op:
|
|
413
|
+
pos = 0
|
|
414
|
+
continue
|
|
415
|
+
elif pos != delim_len:
|
|
416
|
+
pos += 1
|
|
417
|
+
continue
|
|
418
|
+
yield expr[start: idx - delim_len]
|
|
419
|
+
start, pos = idx + 1, 0
|
|
420
|
+
splits += 1
|
|
421
|
+
if max_split and splits >= max_split: break
|
|
422
|
+
yield expr[start:]
|
|
423
|
+
'''_separateatparen'''
|
|
424
|
+
@classmethod
|
|
425
|
+
def _separateatparen(cls, expr, delim=None):
|
|
426
|
+
if delim is None:
|
|
427
|
+
delim = expr and _MATCHING_PARENS[expr[0]]
|
|
428
|
+
separated = list(cls._separate(expr, delim, 1))
|
|
429
|
+
if len(separated) < 2:
|
|
430
|
+
raise cls.Exception(f'No terminating paren {delim}', expr)
|
|
431
|
+
return separated[0][1:].strip(), separated[1].strip()
|
|
432
|
+
'''_operator'''
|
|
433
|
+
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
|
434
|
+
if op in ('||', '&&'):
|
|
435
|
+
if (op == '&&') ^ jsternary(left_val): return left_val
|
|
436
|
+
elif op == '??':
|
|
437
|
+
if left_val not in (None, JSUndefined): return left_val
|
|
438
|
+
elif op == '?':
|
|
439
|
+
right_expr = jsternary(left_val, *self._separate(right_expr, ':', 1))
|
|
440
|
+
right_val = self.interpretexpression(right_expr, local_vars, allow_recursion)
|
|
441
|
+
if not _OPERATORS.get(op): return right_val
|
|
442
|
+
try: return _OPERATORS[op](left_val, right_val)
|
|
443
|
+
except Exception as e: raise self.Exception(f'Failed to evaluate {left_val!r} {op} {right_val!r}', expr, cause=e)
|
|
444
|
+
'''_index'''
|
|
445
|
+
def _index(self, obj, idx, allow_undefined=False):
|
|
446
|
+
if idx == 'length': return len(obj)
|
|
447
|
+
try:
|
|
448
|
+
return obj[int(idx)] if isinstance(obj, list) else obj[idx]
|
|
449
|
+
except Exception as e:
|
|
450
|
+
if allow_undefined: return JSUndefined
|
|
451
|
+
raise self.Exception(f'Cannot get index {idx}', repr(obj), cause=e)
|
|
452
|
+
'''_dump'''
|
|
453
|
+
def _dump(self, obj, namespace):
|
|
454
|
+
try: return json.dumps(obj)
|
|
455
|
+
except TypeError: return self._namedobject(namespace, obj)
|
|
456
|
+
'''handleoperators'''
|
|
457
|
+
def handleoperators(self, expr, local_vars, allow_recursion):
|
|
458
|
+
for op in _ALL_OPERATORS:
|
|
459
|
+
separated = list(self._separate(expr, op))
|
|
460
|
+
right_expr = separated.pop()
|
|
461
|
+
while True:
|
|
462
|
+
if op in '?<>*-' and len(separated) > 1 and not separated[-1].strip(): separated.pop()
|
|
463
|
+
elif not (separated and op == '?' and right_expr.startswith('.')): break
|
|
464
|
+
right_expr = f'{op}{right_expr}'
|
|
465
|
+
if op != '-': right_expr = f'{separated.pop()}{op}{right_expr}'
|
|
466
|
+
if not separated: continue
|
|
467
|
+
left_val = self.interpretexpression(op.join(separated), local_vars, allow_recursion)
|
|
468
|
+
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True
|
|
469
|
+
'''interpretstatement'''
|
|
470
|
+
def interpretstatement(self, stmt, local_vars, allow_recursion=100):
|
|
471
|
+
if allow_recursion < 0: raise self.Exception('Recursion limit reached')
|
|
472
|
+
allow_recursion -= 1
|
|
473
|
+
should_return = False
|
|
474
|
+
sub_statements = list(self._separate(stmt, ';')) or ['']
|
|
475
|
+
expr = stmt = sub_statements.pop().strip()
|
|
476
|
+
for sub_stmt in sub_statements:
|
|
477
|
+
ret, should_return = self.interpretstatement(sub_stmt, local_vars, allow_recursion)
|
|
478
|
+
if should_return: return ret, should_return
|
|
479
|
+
m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
|
|
480
|
+
if m:
|
|
481
|
+
expr = stmt[len(m.group(0)):].strip()
|
|
482
|
+
if m.group('throw'): raise JSThrow(self.interpretexpression(expr, local_vars, allow_recursion))
|
|
483
|
+
should_return = not m.group('var')
|
|
484
|
+
if not expr: return None, should_return
|
|
485
|
+
if expr[0] in _QUOTES:
|
|
486
|
+
inner, outer = self._separate(expr, expr[0], 1)
|
|
487
|
+
if expr[0] == '/':
|
|
488
|
+
flags, outer = self._regexflags(outer)
|
|
489
|
+
inner = f'{inner}/{flags}'
|
|
490
|
+
else:
|
|
491
|
+
inner = json.loads(js2json(f'{inner}{expr[0]}', strict=True))
|
|
492
|
+
if not outer: return inner, should_return
|
|
493
|
+
expr = self._namedobject(local_vars, inner) + outer
|
|
494
|
+
if expr.startswith('new '):
|
|
495
|
+
obj = expr[4:]
|
|
496
|
+
if obj.startswith('Date('):
|
|
497
|
+
left, right = self._separateatparen(obj[4:])
|
|
498
|
+
date = unifiedtimestamp(self.interpretexpression(left, local_vars, allow_recursion), False)
|
|
499
|
+
if date is None: raise self.Exception(f'Failed to parse date {left!r}', expr)
|
|
500
|
+
expr = self._dump(int(date * 1000), local_vars) + right
|
|
501
|
+
else:
|
|
502
|
+
raise self.Exception(f'Unsupported object {obj}', expr)
|
|
503
|
+
if expr.startswith('void '):
|
|
504
|
+
left = self.interpretexpression(expr[5:], local_vars, allow_recursion)
|
|
505
|
+
return None, should_return
|
|
506
|
+
for op in _UNARY_OPERATORS_X:
|
|
507
|
+
if not expr.startswith(op): continue
|
|
508
|
+
operand = expr[len(op):]
|
|
509
|
+
if not operand or operand[0] != ' ': continue
|
|
510
|
+
op_result = self.handleoperators(expr, local_vars, allow_recursion)
|
|
511
|
+
if op_result: return op_result[0], should_return
|
|
512
|
+
if expr.startswith('{'):
|
|
513
|
+
inner, outer = self._separateatparen(expr)
|
|
514
|
+
sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
|
|
515
|
+
if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
|
|
516
|
+
def _dictitem(key, val):
|
|
517
|
+
val = self.interpretexpression(val, local_vars, allow_recursion)
|
|
518
|
+
if re.match(_NAME_RE, key): return key, val
|
|
519
|
+
return self.interpretexpression(key, local_vars, allow_recursion), val
|
|
520
|
+
return dict(_dictitem(k, v) for k, v in sub_expressions), should_return
|
|
521
|
+
inner, should_abort = self.interpretstatement(inner, local_vars, allow_recursion)
|
|
522
|
+
if not outer or should_abort:
|
|
523
|
+
return inner, should_abort or should_return
|
|
524
|
+
else:
|
|
525
|
+
expr = self._dump(inner, local_vars) + outer
|
|
526
|
+
if expr.startswith('('):
|
|
527
|
+
inner, outer = self._separateatparen(expr)
|
|
528
|
+
inner, should_abort = self.interpretstatement(inner, local_vars, allow_recursion)
|
|
529
|
+
if not outer or should_abort: return inner, should_abort or should_return
|
|
530
|
+
else: expr = self._dump(inner, local_vars) + outer
|
|
531
|
+
if expr.startswith('['):
|
|
532
|
+
inner, outer = self._separateatparen(expr)
|
|
533
|
+
name = self._namedobject(local_vars, [self.interpretexpression(item, local_vars, allow_recursion) for item in self._separate(inner)])
|
|
534
|
+
expr = name + outer
|
|
535
|
+
m = re.match(r'''(?x)
|
|
536
|
+
(?P<try>try)\s*\{|
|
|
537
|
+
(?P<if>if)\s*\(|
|
|
538
|
+
(?P<switch>switch)\s*\(|
|
|
539
|
+
(?P<for>for)\s*\(
|
|
540
|
+
''', expr)
|
|
541
|
+
md = m.groupdict() if m else {}
|
|
542
|
+
if md.get('if'):
|
|
543
|
+
cndn, expr = self._separateatparen(expr[m.end() - 1:])
|
|
544
|
+
if expr.startswith('{'): if_expr, expr = self._separateatparen(expr)
|
|
545
|
+
else: if_expr, expr = self._separateatparen(' %s;' % (expr,), delim=';')
|
|
546
|
+
else_expr = None
|
|
547
|
+
m = re.match(r'else\s*{', expr)
|
|
548
|
+
if m: else_expr, expr = self._separateatparen(expr[m.end() - 1:])
|
|
549
|
+
cndn = jsternary(self.interpretexpression(cndn, local_vars, allow_recursion))
|
|
550
|
+
ret, should_abort = self.interpretstatement(if_expr if cndn else else_expr, local_vars, allow_recursion)
|
|
551
|
+
if should_abort: return ret, True
|
|
552
|
+
if md.get('try'):
|
|
553
|
+
try_expr, expr = self._separateatparen(expr[m.end() - 1:])
|
|
554
|
+
err = None
|
|
555
|
+
try:
|
|
556
|
+
ret, should_abort = self.interpretstatement(try_expr, local_vars, allow_recursion)
|
|
557
|
+
if should_abort: return ret, True
|
|
558
|
+
except Exception as e:
|
|
559
|
+
err = e
|
|
560
|
+
pending = (None, False)
|
|
561
|
+
m = re.match(fr'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{', expr)
|
|
562
|
+
if m:
|
|
563
|
+
sub_expr, expr = self._separateatparen(expr[m.end() - 1:])
|
|
564
|
+
if err:
|
|
565
|
+
catch_vars = {}
|
|
566
|
+
if m.group('err'): catch_vars[m.group('err')] = err.error if isinstance(err, JSThrow) else err
|
|
567
|
+
catch_vars = local_vars.new_child(catch_vars)
|
|
568
|
+
err, pending = None, self.interpretstatement(sub_expr, catch_vars, allow_recursion)
|
|
569
|
+
m = re.match(r'finally\s*\{', expr)
|
|
570
|
+
if m:
|
|
571
|
+
sub_expr, expr = self._separateatparen(expr[m.end() - 1:])
|
|
572
|
+
ret, should_abort = self.interpretstatement(sub_expr, local_vars, allow_recursion)
|
|
573
|
+
if should_abort: return ret, True
|
|
574
|
+
ret, should_abort = pending
|
|
575
|
+
if should_abort: return ret, True
|
|
576
|
+
if err: raise err
|
|
577
|
+
elif md.get('for'):
|
|
578
|
+
constructor, remaining = self._separateatparen(expr[m.end() - 1:])
|
|
579
|
+
if remaining.startswith('{'):
|
|
580
|
+
body, expr = self._separateatparen(remaining)
|
|
581
|
+
else:
|
|
582
|
+
switch_m = re.match(r'switch\s*\(', remaining) # FIXME
|
|
583
|
+
if switch_m:
|
|
584
|
+
switch_val, remaining = self._separateatparen(remaining[switch_m.end() - 1:])
|
|
585
|
+
body, expr = self._separateatparen(remaining, '}')
|
|
586
|
+
body = 'switch(%s){%s}' % (switch_val, body)
|
|
587
|
+
else:
|
|
588
|
+
body, expr = remaining, ''
|
|
589
|
+
start, cndn, increment = self._separate(constructor, ';')
|
|
590
|
+
self.interpretexpression(start, local_vars, allow_recursion)
|
|
591
|
+
while True:
|
|
592
|
+
if not jsternary(self.interpretexpression(cndn, local_vars, allow_recursion)): break
|
|
593
|
+
try:
|
|
594
|
+
ret, should_abort = self.interpretstatement(body, local_vars, allow_recursion)
|
|
595
|
+
if should_abort: return ret, True
|
|
596
|
+
except JSBreak: break
|
|
597
|
+
except JSContinue: pass
|
|
598
|
+
self.interpretexpression(increment, local_vars, allow_recursion)
|
|
599
|
+
elif md.get('switch'):
|
|
600
|
+
switch_val, remaining = self._separateatparen(expr[m.end() - 1:])
|
|
601
|
+
switch_val = self.interpretexpression(switch_val, local_vars, allow_recursion)
|
|
602
|
+
body, expr = self._separateatparen(remaining, '}')
|
|
603
|
+
items = body.replace('default:', 'case default:').split('case ')[1:]
|
|
604
|
+
for default in (False, True):
|
|
605
|
+
matched = False
|
|
606
|
+
for item in items:
|
|
607
|
+
case, stmt = (i.strip() for i in self._separate(item, ':', 1))
|
|
608
|
+
if default: matched = matched or case == 'default'
|
|
609
|
+
elif not matched: matched = (case != 'default' and switch_val == self.interpretexpression(case, local_vars, allow_recursion))
|
|
610
|
+
if not matched: continue
|
|
611
|
+
try:
|
|
612
|
+
ret, should_abort = self.interpretstatement(stmt, local_vars, allow_recursion)
|
|
613
|
+
if should_abort: return ret
|
|
614
|
+
except JSBreak:
|
|
615
|
+
break
|
|
616
|
+
if matched: break
|
|
617
|
+
if md:
|
|
618
|
+
ret, should_abort = self.interpretstatement(expr, local_vars, allow_recursion)
|
|
619
|
+
return ret, should_abort or should_return
|
|
620
|
+
sub_expressions = list(self._separate(expr))
|
|
621
|
+
if len(sub_expressions) > 1:
|
|
622
|
+
for sub_expr in sub_expressions:
|
|
623
|
+
ret, should_abort = self.interpretstatement(sub_expr, local_vars, allow_recursion)
|
|
624
|
+
if should_abort: return ret, True
|
|
625
|
+
return ret, False
|
|
626
|
+
p =fr'''(?x)
|
|
627
|
+
(?P<out>{_NAME_RE})(?:\[(?P<index>{_NESTED_BRACKETS})\])?\s*
|
|
628
|
+
(?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
|
|
629
|
+
=(?!=)(?P<expr>.*)$
|
|
630
|
+
'''
|
|
631
|
+
m = re.match(p, expr)
|
|
632
|
+
if m:
|
|
633
|
+
left_val = local_vars.get(m.group('out'))
|
|
634
|
+
if not m.group('index'):
|
|
635
|
+
local_vars[m.group('out')] = self._operator(m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
|
|
636
|
+
return local_vars[m.group('out')], should_return
|
|
637
|
+
elif left_val in (None, JSUndefined):
|
|
638
|
+
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
|
|
639
|
+
idx = self.interpretexpression(m.group('index'), local_vars, allow_recursion)
|
|
640
|
+
if not isinstance(idx, (int, float)): raise self.Exception(f'List index {idx} must be integer', expr)
|
|
641
|
+
idx = int(idx)
|
|
642
|
+
left_val[idx] = self._operator(m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
|
|
643
|
+
return left_val[idx], should_return
|
|
644
|
+
for m in re.finditer(rf'''(?x)
|
|
645
|
+
(?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
|
|
646
|
+
(?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr):
|
|
647
|
+
var = m.group('var1') or m.group('var2')
|
|
648
|
+
start, end = m.span()
|
|
649
|
+
sign = m.group('pre_sign') or m.group('post_sign')
|
|
650
|
+
ret = local_vars[var]
|
|
651
|
+
local_vars[var] += 1 if sign[0] == '+' else -1
|
|
652
|
+
if m.group('pre_sign'): ret = local_vars[var]
|
|
653
|
+
expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
|
|
654
|
+
if not expr: return None, should_return
|
|
655
|
+
m = re.match(fr'''(?x)
|
|
656
|
+
(?P<assign>
|
|
657
|
+
(?P<out>{_NAME_RE})(?:\[(?P<index>{_NESTED_BRACKETS})\])?\s*
|
|
658
|
+
(?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
|
|
659
|
+
=(?!=)(?P<expr>.*)$
|
|
660
|
+
)|(?P<return>
|
|
661
|
+
(?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
|
|
662
|
+
)|(?P<attribute>
|
|
663
|
+
(?P<var>{_NAME_RE})(?:
|
|
664
|
+
(?P<nullish>\?)?\.(?P<member>[^(]+)|
|
|
665
|
+
\[(?P<member2>{_NESTED_BRACKETS})\]
|
|
666
|
+
)\s*
|
|
667
|
+
)|(?P<indexing>
|
|
668
|
+
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
|
669
|
+
)|(?P<function>
|
|
670
|
+
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
|
|
671
|
+
)''', expr)
|
|
672
|
+
if m and m.group('assign'):
|
|
673
|
+
left_val = local_vars.get(m.group('out'))
|
|
674
|
+
if not m.group('index'):
|
|
675
|
+
local_vars[m.group('out')] = self._operator(m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
|
|
676
|
+
return local_vars[m.group('out')], should_return
|
|
677
|
+
elif left_val in (None, JSUndefined):
|
|
678
|
+
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
|
|
679
|
+
idx = self.interpretexpression(m.group('index'), local_vars, allow_recursion)
|
|
680
|
+
if not isinstance(idx, (int, float)):
|
|
681
|
+
raise self.Exception(f'List index {idx} must be integer', expr)
|
|
682
|
+
idx = int(idx)
|
|
683
|
+
left_val[idx] = self._operator(m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
|
|
684
|
+
return left_val[idx], should_return
|
|
685
|
+
elif expr.isdigit():
|
|
686
|
+
return int(expr), should_return
|
|
687
|
+
elif expr == 'break':
|
|
688
|
+
raise JSBreak()
|
|
689
|
+
elif expr == 'continue':
|
|
690
|
+
raise JSContinue()
|
|
691
|
+
elif expr == 'undefined':
|
|
692
|
+
return JSUndefined, should_return
|
|
693
|
+
elif expr == 'NaN':
|
|
694
|
+
return float('NaN'), should_return
|
|
695
|
+
elif m and m.group('return'):
|
|
696
|
+
try:
|
|
697
|
+
return local_vars[m.group('name')], should_return
|
|
698
|
+
except KeyError as e:
|
|
699
|
+
return self.extractglobalvar(e.args[0]), should_return
|
|
700
|
+
with contextlib.suppress(ValueError):
|
|
701
|
+
return json.loads(js2json(expr, strict=True)), should_return
|
|
702
|
+
if m and m.group('indexing'):
|
|
703
|
+
val = local_vars[m.group('in')]
|
|
704
|
+
idx = self.interpretexpression(m.group('idx'), local_vars, allow_recursion)
|
|
705
|
+
return self._index(val, idx), should_return
|
|
706
|
+
op_result = self.handleoperators(expr, local_vars, allow_recursion)
|
|
707
|
+
if op_result: return op_result[0], should_return
|
|
708
|
+
if m and m.group('attribute'):
|
|
709
|
+
variable, member, nullish = m.group('var', 'member', 'nullish')
|
|
710
|
+
if not member: member = self.interpretexpression(m.group('member2'), local_vars, allow_recursion)
|
|
711
|
+
arg_str = expr[m.end():]
|
|
712
|
+
if arg_str.startswith('('): arg_str, remaining = self._separateatparen(arg_str)
|
|
713
|
+
else: arg_str, remaining = None, arg_str
|
|
714
|
+
def assertion(cndn, msg):
|
|
715
|
+
if not cndn: raise self.Exception(f'{member} {msg}', expr)
|
|
716
|
+
def _evalmethod():
|
|
717
|
+
nonlocal member
|
|
718
|
+
types = {'String': str, 'Math': float, 'Array': list}
|
|
719
|
+
obj = local_vars.get(variable, types.get(variable, NODEFAULT))
|
|
720
|
+
if obj is NODEFAULT:
|
|
721
|
+
if variable not in self._objects:
|
|
722
|
+
try:
|
|
723
|
+
self._objects[variable] = self.extractobject(variable, local_vars)
|
|
724
|
+
except self.Exception:
|
|
725
|
+
if not nullish:
|
|
726
|
+
raise
|
|
727
|
+
obj = self._objects.get(variable, JSUndefined)
|
|
728
|
+
if nullish and obj is JSUndefined: return JSUndefined
|
|
729
|
+
if arg_str is None: return self._index(obj, member, nullish)
|
|
730
|
+
argvals = [self.interpretexpression(v, local_vars, allow_recursion) for v in self._separate(arg_str)]
|
|
731
|
+
if isinstance(obj, type) and member.startswith('prototype.'):
|
|
732
|
+
new_member, _, func_prototype = member.partition('.')[2].partition('.')
|
|
733
|
+
assertion(argvals, 'takes one or more arguments')
|
|
734
|
+
assertion(isinstance(argvals[0], obj), f'needs binding to type {obj}')
|
|
735
|
+
if func_prototype == 'call':
|
|
736
|
+
obj, *argvals = argvals
|
|
737
|
+
elif func_prototype == 'apply':
|
|
738
|
+
assertion(len(argvals) == 2, 'takes two arguments')
|
|
739
|
+
obj, argvals = argvals
|
|
740
|
+
assertion(isinstance(argvals, list), 'second argument needs to be a list')
|
|
741
|
+
else:
|
|
742
|
+
raise self.Exception(f'Unsupported Function method {func_prototype}', expr)
|
|
743
|
+
member = new_member
|
|
744
|
+
if obj == str:
|
|
745
|
+
if member == 'fromCharCode':
|
|
746
|
+
assertion(argvals, 'takes one or more arguments')
|
|
747
|
+
return ''.join(map(chr, argvals))
|
|
748
|
+
raise self.Exception(f'Unsupported String method {member}', expr)
|
|
749
|
+
elif obj == float:
|
|
750
|
+
if member == 'pow':
|
|
751
|
+
assertion(len(argvals) == 2, 'takes two arguments')
|
|
752
|
+
return argvals[0] ** argvals[1]
|
|
753
|
+
raise self.Exception(f'Unsupported Math method {member}', expr)
|
|
754
|
+
if member == 'split':
|
|
755
|
+
assertion(argvals, 'takes one or more arguments')
|
|
756
|
+
assertion(len(argvals) == 1, 'with limit argument is not implemented')
|
|
757
|
+
return obj.split(argvals[0]) if argvals[0] else list(obj)
|
|
758
|
+
elif member == 'join':
|
|
759
|
+
assertion(isinstance(obj, list), 'must be applied on a list')
|
|
760
|
+
assertion(len(argvals) == 1, 'takes exactly one argument')
|
|
761
|
+
return argvals[0].join(obj)
|
|
762
|
+
elif member == 'reverse':
|
|
763
|
+
assertion(not argvals, 'does not take any arguments')
|
|
764
|
+
obj.reverse()
|
|
765
|
+
return obj
|
|
766
|
+
elif member == 'slice':
|
|
767
|
+
assertion(isinstance(obj, (list, str)), 'must be applied on a list or string')
|
|
768
|
+
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
|
|
769
|
+
return obj[slice(*argvals, None)]
|
|
770
|
+
elif member == 'splice':
|
|
771
|
+
assertion(isinstance(obj, list), 'must be applied on a list')
|
|
772
|
+
assertion(argvals, 'takes one or more arguments')
|
|
773
|
+
index, howMany = map(int, (argvals + [len(obj)])[:2])
|
|
774
|
+
if index < 0:
|
|
775
|
+
index += len(obj)
|
|
776
|
+
add_items = argvals[2:]
|
|
777
|
+
res = []
|
|
778
|
+
for i in range(index, min(index + howMany, len(obj))):
|
|
779
|
+
res.append(obj.pop(index))
|
|
780
|
+
for i, item in enumerate(add_items):
|
|
781
|
+
obj.insert(index + i, item)
|
|
782
|
+
return res
|
|
783
|
+
elif member == 'unshift':
|
|
784
|
+
assertion(isinstance(obj, list), 'must be applied on a list')
|
|
785
|
+
assertion(argvals, 'takes one or more arguments')
|
|
786
|
+
for item in reversed(argvals): obj.insert(0, item)
|
|
787
|
+
return obj
|
|
788
|
+
elif member == 'pop':
|
|
789
|
+
assertion(isinstance(obj, list), 'must be applied on a list')
|
|
790
|
+
assertion(not argvals, 'does not take any arguments')
|
|
791
|
+
if not obj: return
|
|
792
|
+
return obj.pop()
|
|
793
|
+
elif member == 'push':
|
|
794
|
+
assertion(argvals, 'takes one or more arguments')
|
|
795
|
+
obj.extend(argvals)
|
|
796
|
+
return obj
|
|
797
|
+
elif member == 'forEach':
|
|
798
|
+
assertion(argvals, 'takes one or more arguments')
|
|
799
|
+
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
|
800
|
+
f, this = (argvals + [''])[:2]
|
|
801
|
+
return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
|
|
802
|
+
elif member == 'indexOf':
|
|
803
|
+
assertion(argvals, 'takes one or more arguments')
|
|
804
|
+
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
|
805
|
+
idx, start = (argvals + [0])[:2]
|
|
806
|
+
try: return obj.index(idx, start)
|
|
807
|
+
except ValueError: return -1
|
|
808
|
+
elif member == 'charCodeAt':
|
|
809
|
+
assertion(isinstance(obj, str), 'must be applied on a string')
|
|
810
|
+
assertion(len(argvals) == 1, 'takes exactly one argument')
|
|
811
|
+
idx = argvals[0] if isinstance(argvals[0], int) else 0
|
|
812
|
+
if idx >= len(obj): return None
|
|
813
|
+
return ord(obj[idx])
|
|
814
|
+
idx = int(member) if isinstance(obj, list) else member
|
|
815
|
+
return obj[idx](argvals, allow_recursion=allow_recursion)
|
|
816
|
+
if remaining:
|
|
817
|
+
ret, should_abort = self.interpretstatement(self._namedobject(local_vars, _evalmethod()) + remaining, local_vars, allow_recursion)
|
|
818
|
+
return ret, should_return or should_abort
|
|
819
|
+
else:
|
|
820
|
+
return _evalmethod(), should_return
|
|
821
|
+
elif m and m.group('function'):
|
|
822
|
+
fname = m.group('fname')
|
|
823
|
+
argvals = [self.interpretexpression(v, local_vars, allow_recursion) for v in self._separate(m.group('args'))]
|
|
824
|
+
if fname in local_vars:
|
|
825
|
+
return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return
|
|
826
|
+
elif fname not in self._functions:
|
|
827
|
+
self._functions[fname] = self.extractfunction(fname)
|
|
828
|
+
return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return
|
|
829
|
+
raise self.Exception(f'Unsupported JS expression {truncatestring(expr, 20, 20) if expr != stmt else ""}', stmt)
|
|
830
|
+
'''interpretexpression'''
|
|
831
|
+
def interpretexpression(self, expr, local_vars, allow_recursion):
|
|
832
|
+
ret, should_return = self.interpretstatement(expr, local_vars, allow_recursion)
|
|
833
|
+
if should_return: raise self.Exception('Cannot return from an expression', expr)
|
|
834
|
+
return ret
|
|
835
|
+
'''extractglobalvar'''
|
|
836
|
+
def extractglobalvar(self, var):
|
|
837
|
+
global_var = re.search(fr'''var\s?{re.escape(var)}=(?P<val>.*?);''', self.code)
|
|
838
|
+
code = global_var.group('val')
|
|
839
|
+
return code
|
|
840
|
+
'''extractobject'''
|
|
841
|
+
def extractobject(self, objname, *global_stack):
|
|
842
|
+
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
|
843
|
+
obj = {}
|
|
844
|
+
obj_m = re.search(
|
|
845
|
+
r'''(?x)
|
|
846
|
+
(?<![a-zA-Z$0-9.])%s\s*=\s*{\s*
|
|
847
|
+
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
|
|
848
|
+
}\s*;
|
|
849
|
+
''' % (re.escape(objname), _FUNC_NAME_RE), self.code)
|
|
850
|
+
if not obj_m: raise self.Exception(f'Could not find object {objname}')
|
|
851
|
+
fields = obj_m.group('fields')
|
|
852
|
+
fields_m = re.finditer(
|
|
853
|
+
r'''(?x)
|
|
854
|
+
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
|
|
855
|
+
''' % (_FUNC_NAME_RE, _NAME_RE), fields)
|
|
856
|
+
for f in fields_m:
|
|
857
|
+
argnames = f.group('args').split(',')
|
|
858
|
+
name = removequotes(f.group('key'))
|
|
859
|
+
obj[name] = FunctionWithRepr(self.buildfunction(argnames, f.group('code'), *global_stack), f'F<{name}>')
|
|
860
|
+
return obj
|
|
861
|
+
'''extractfunctioncode'''
|
|
862
|
+
def extractfunctioncode(self, funcname):
|
|
863
|
+
func_m = re.search(
|
|
864
|
+
r'''(?xs)
|
|
865
|
+
(?:
|
|
866
|
+
function\s+%(name)s|
|
|
867
|
+
[{;,]\s*%(name)s\s*=\s*function|
|
|
868
|
+
(?:var|const|let)\s+%(name)s\s*=\s*function
|
|
869
|
+
)\s*
|
|
870
|
+
\((?P<args>[^)]*)\)\s*
|
|
871
|
+
(?P<code>{.+})''' % {'name': re.escape(funcname)}, self.code)
|
|
872
|
+
if func_m is None: raise self.Exception(f'Could not find JS function "{funcname}"')
|
|
873
|
+
code, _ = self._separateatparen(func_m.group('code'))
|
|
874
|
+
return [x.strip() for x in func_m.group('args').split(',')], code
|
|
875
|
+
'''extractfunction'''
|
|
876
|
+
def extractfunction(self, funcname):
|
|
877
|
+
return FunctionWithRepr(self.extractfunctionfromcode(*fixupnfunctioncode(*self.extractfunctioncode(funcname), self.code)), f'F<{funcname}>')
|
|
878
|
+
'''extractfunctionfromcode'''
|
|
879
|
+
def extractfunctionfromcode(self, argnames, code, *global_stack):
|
|
880
|
+
local_vars = {}
|
|
881
|
+
while True:
|
|
882
|
+
mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
|
|
883
|
+
if mobj is None: break
|
|
884
|
+
start, body_start = mobj.span()
|
|
885
|
+
body, remaining = self._separateatparen(code[body_start - 1:])
|
|
886
|
+
name = self._namedobject(local_vars, self.extractfunctionfromcode([x.strip() for x in mobj.group('args').split(',')], body, local_vars, *global_stack))
|
|
887
|
+
code = code[:start] + name + remaining
|
|
888
|
+
return self.buildfunction(argnames, code, local_vars, *global_stack)
|
|
889
|
+
'''callfunction'''
|
|
890
|
+
def callfunction(self, funcname, *args):
|
|
891
|
+
return self.extractfunction(funcname)(args)
|
|
892
|
+
'''buildfunction'''
|
|
893
|
+
def buildfunction(self, argnames, code, *global_stack):
|
|
894
|
+
global_stack = list(global_stack) or [{}]
|
|
895
|
+
argnames = tuple(argnames)
|
|
896
|
+
def resf(args, kwargs={}, allow_recursion=100):
|
|
897
|
+
global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None))
|
|
898
|
+
global_stack[0].update(kwargs)
|
|
899
|
+
var_stack = LocalNameSpace(*global_stack)
|
|
900
|
+
ret, should_abort = self.interpretstatement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
|
901
|
+
if should_abort: return ret
|
|
902
|
+
return resf
|