scratchattach 3.0.0b0__py3-none-any.whl → 3.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__about__.py +1 -0
- cli/__init__.py +26 -0
- cli/cmd/__init__.py +4 -0
- cli/cmd/group.py +127 -0
- cli/cmd/login.py +60 -0
- cli/cmd/profile.py +7 -0
- cli/cmd/sessions.py +5 -0
- cli/context.py +142 -0
- cli/db.py +66 -0
- cli/namespace.py +14 -0
- cloud/__init__.py +2 -0
- cloud/_base.py +483 -0
- cloud/cloud.py +183 -0
- editor/__init__.py +22 -0
- editor/asset.py +265 -0
- editor/backpack_json.py +115 -0
- editor/base.py +191 -0
- editor/block.py +584 -0
- editor/blockshape.py +357 -0
- editor/build_defaulting.py +51 -0
- editor/code_translation/__init__.py +0 -0
- editor/code_translation/parse.py +177 -0
- editor/comment.py +80 -0
- editor/commons.py +145 -0
- editor/extension.py +50 -0
- editor/field.py +99 -0
- editor/inputs.py +138 -0
- editor/meta.py +117 -0
- editor/monitor.py +185 -0
- editor/mutation.py +381 -0
- editor/pallete.py +88 -0
- editor/prim.py +174 -0
- editor/project.py +381 -0
- editor/sprite.py +609 -0
- editor/twconfig.py +114 -0
- editor/vlb.py +134 -0
- eventhandlers/__init__.py +0 -0
- eventhandlers/_base.py +101 -0
- eventhandlers/cloud_events.py +130 -0
- eventhandlers/cloud_recorder.py +26 -0
- eventhandlers/cloud_requests.py +544 -0
- eventhandlers/cloud_server.py +249 -0
- eventhandlers/cloud_storage.py +135 -0
- eventhandlers/combine.py +30 -0
- eventhandlers/filterbot.py +163 -0
- eventhandlers/message_events.py +42 -0
- other/__init__.py +0 -0
- other/other_apis.py +598 -0
- other/project_json_capabilities.py +475 -0
- {scratchattach-3.0.0b0.dist-info → scratchattach-3.0.0b1.dist-info}/METADATA +1 -1
- scratchattach-3.0.0b1.dist-info/RECORD +79 -0
- scratchattach-3.0.0b1.dist-info/top_level.txt +7 -0
- site/__init__.py +0 -0
- site/_base.py +93 -0
- site/activity.py +426 -0
- site/alert.py +226 -0
- site/backpack_asset.py +119 -0
- site/browser_cookie3_stub.py +17 -0
- site/browser_cookies.py +61 -0
- site/classroom.py +454 -0
- site/cloud_activity.py +121 -0
- site/comment.py +228 -0
- site/forum.py +436 -0
- site/placeholder.py +132 -0
- site/project.py +932 -0
- site/session.py +1323 -0
- site/studio.py +704 -0
- site/typed_dicts.py +151 -0
- site/user.py +1252 -0
- utils/__init__.py +0 -0
- utils/commons.py +263 -0
- utils/encoder.py +161 -0
- utils/enums.py +237 -0
- utils/exceptions.py +277 -0
- utils/optional_async.py +154 -0
- utils/requests.py +306 -0
- scratchattach/__init__.py +0 -37
- scratchattach/__main__.py +0 -93
- scratchattach-3.0.0b0.dist-info/RECORD +0 -8
- scratchattach-3.0.0b0.dist-info/top_level.txt +0 -1
- {scratchattach-3.0.0b0.dist-info → scratchattach-3.0.0b1.dist-info}/WHEEL +0 -0
- {scratchattach-3.0.0b0.dist-info → scratchattach-3.0.0b1.dist-info}/entry_points.txt +0 -0
- {scratchattach-3.0.0b0.dist-info → scratchattach-3.0.0b1.dist-info}/licenses/LICENSE +0 -0
site/forum.py
ADDED
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
"""ForumTopic and ForumPost classes"""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import warnings
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Optional, Any
|
|
7
|
+
from urllib.parse import urlparse, parse_qs
|
|
8
|
+
import xml.etree.ElementTree as ET
|
|
9
|
+
|
|
10
|
+
from bs4 import BeautifulSoup, Tag
|
|
11
|
+
|
|
12
|
+
from . import user
|
|
13
|
+
from . import session as module_session
|
|
14
|
+
from scratchattach.utils.commons import headers
|
|
15
|
+
from scratchattach.utils import exceptions, commons
|
|
16
|
+
from ._base import BaseSiteComponent
|
|
17
|
+
from scratchattach.utils.requests import requests
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ForumTopic(BaseSiteComponent):
|
|
21
|
+
'''
|
|
22
|
+
Represents a Scratch forum topic.
|
|
23
|
+
|
|
24
|
+
Attributes:
|
|
25
|
+
|
|
26
|
+
:.id:
|
|
27
|
+
|
|
28
|
+
:.title:
|
|
29
|
+
|
|
30
|
+
:.category_name:
|
|
31
|
+
|
|
32
|
+
:.last_updated:
|
|
33
|
+
|
|
34
|
+
Attributes only available if the object was created using scratchattach.get_topic_list or scratchattach.Session.connect_topic_list:
|
|
35
|
+
|
|
36
|
+
:.reply_count:
|
|
37
|
+
|
|
38
|
+
:.view_count:
|
|
39
|
+
|
|
40
|
+
:.update(): Updates the attributes
|
|
41
|
+
'''
|
|
42
|
+
id: int
|
|
43
|
+
title: str
|
|
44
|
+
category_name: Optional[str] = None
|
|
45
|
+
last_updated: Optional[str] = None
|
|
46
|
+
_session: Optional[module_session.Session] = field(default=None)
|
|
47
|
+
reply_count: Optional[int] = field(default=None)
|
|
48
|
+
view_count: Optional[int] = field(default=None)
|
|
49
|
+
|
|
50
|
+
def __str__(self):
|
|
51
|
+
return f"-F {self.title} ({self.id})"
|
|
52
|
+
|
|
53
|
+
def __post_init__(self):
|
|
54
|
+
# Info on how the .update method has to fetch the data:
|
|
55
|
+
self.update_function = requests.get
|
|
56
|
+
self.update_api = f"https://scratch.mit.edu/discuss/feeds/topic/{self.id}/"
|
|
57
|
+
|
|
58
|
+
# Headers and cookies:
|
|
59
|
+
if self._session is None:
|
|
60
|
+
self._headers = headers
|
|
61
|
+
self._cookies = {}
|
|
62
|
+
else:
|
|
63
|
+
self._headers = self._session.get_headers()
|
|
64
|
+
self._cookies = self._session.get_cookies()
|
|
65
|
+
|
|
66
|
+
# Headers for operations that require accept and Content-Type fields:
|
|
67
|
+
self._json_headers = dict(self._headers)
|
|
68
|
+
self._json_headers["accept"] = "application/json"
|
|
69
|
+
self._json_headers["Content-Type"] = "application/json"
|
|
70
|
+
|
|
71
|
+
def update(self):
|
|
72
|
+
# As there is no JSON API for getting forum topics anymore,
|
|
73
|
+
# the data has to be retrieved from the XML feed.
|
|
74
|
+
response = self.update_function(
|
|
75
|
+
self.update_api,
|
|
76
|
+
headers = self._headers,
|
|
77
|
+
cookies = self._cookies, timeout=20 # fetching forums can take very long
|
|
78
|
+
)
|
|
79
|
+
# Check for 429 error:
|
|
80
|
+
if "429" in str(response):
|
|
81
|
+
return "429"
|
|
82
|
+
|
|
83
|
+
# Parse XML response
|
|
84
|
+
if response.status_code == 200:
|
|
85
|
+
try:
|
|
86
|
+
root = ET.fromstring(response.text)
|
|
87
|
+
namespace = {'atom': 'http://www.w3.org/2005/Atom'}
|
|
88
|
+
|
|
89
|
+
title = root.findtext('atom:title', namespaces=namespace).replace("Latest posts on ","")
|
|
90
|
+
category_name = root.findall('.//atom:entry', namespaces=namespace)[0].findtext('.//atom:title', namespaces=namespace).split(" :: ")[1]
|
|
91
|
+
last_updated = root.findtext('atom:updated', namespaces=namespace)
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
raise exceptions.ScrapeError(str(e))
|
|
95
|
+
else:
|
|
96
|
+
raise exceptions.ForumContentNotFound
|
|
97
|
+
self.title = title
|
|
98
|
+
self.category_name = category_name
|
|
99
|
+
self.last_updated = last_updated
|
|
100
|
+
return True
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def from_id(cls, __id: int, session: module_session.Session, update: bool = False):
|
|
104
|
+
new = cls(id=__id, _session=session, title="", last_updated="", category_name="")
|
|
105
|
+
if update:
|
|
106
|
+
new.update()
|
|
107
|
+
return new
|
|
108
|
+
|
|
109
|
+
def _update_from_dict(self, data: dict[str, Any]):
|
|
110
|
+
self.__dict__.update(data)
|
|
111
|
+
|
|
112
|
+
def posts(self, *, page=1, order="oldest") -> list[ForumPost]:
|
|
113
|
+
"""
|
|
114
|
+
Args:
|
|
115
|
+
page (int): The page of the forum topic that should be returned. First page is at index 1.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
list<scratchattach.forum.ForumPost>: A list containing the posts from the specified page of the forum topic
|
|
119
|
+
"""
|
|
120
|
+
if order != "oldest":
|
|
121
|
+
warnings.warn("Warning: All post orders except for 'oldest' are deprecated and no longer work") # For backwards compatibility
|
|
122
|
+
|
|
123
|
+
posts = []
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
url = f"https://scratch.mit.edu/discuss/topic/{self.id}/?page={page}"
|
|
127
|
+
response = requests.get(url, headers=headers, cookies=self._cookies)
|
|
128
|
+
except Exception as e:
|
|
129
|
+
raise exceptions.FetchError(str(e))
|
|
130
|
+
try:
|
|
131
|
+
soup = BeautifulSoup(response.content, 'html.parser')
|
|
132
|
+
soup_elm = soup.find("div", class_="djangobb")
|
|
133
|
+
assert isinstance(soup_elm, Tag)
|
|
134
|
+
try:
|
|
135
|
+
pagination_div = soup_elm.find('div', class_='pagination')
|
|
136
|
+
assert isinstance(pagination_div, Tag)
|
|
137
|
+
num_pages = int(pagination_div.find_all('a', class_='page')[-1].text)
|
|
138
|
+
except Exception:
|
|
139
|
+
num_pages = 1
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
# get topic category:
|
|
143
|
+
topic_category = ""
|
|
144
|
+
breadcrumb_ul = soup_elm.find_all('ul')[1] # Find the second ul element
|
|
145
|
+
if breadcrumb_ul:
|
|
146
|
+
assert isinstance(breadcrumb_ul, Tag)
|
|
147
|
+
link = breadcrumb_ul.find_all('a')[1] # Get the right anchor tag
|
|
148
|
+
topic_category = link.text.strip() # Extract and strip text content
|
|
149
|
+
except Exception as e:
|
|
150
|
+
warnings.warn(f"Warning: Couldn't scrape topic category for topic {self.id} - {e}")
|
|
151
|
+
topic_category = ""
|
|
152
|
+
|
|
153
|
+
# get corresponding posts:
|
|
154
|
+
post_htmls = soup.find_all('div', class_='blockpost')
|
|
155
|
+
for raw_post in post_htmls:
|
|
156
|
+
if not isinstance(raw_post, Tag):
|
|
157
|
+
continue
|
|
158
|
+
post = ForumPost(id=int(str(raw_post['id']).replace("p", "")), topic_id=self.id, _session=self._session, topic_category=topic_category, topic_num_pages=num_pages)
|
|
159
|
+
post.update_from_html(raw_post)
|
|
160
|
+
|
|
161
|
+
posts.append(post)
|
|
162
|
+
except Exception as e:
|
|
163
|
+
raise exceptions.ScrapeError() from e
|
|
164
|
+
|
|
165
|
+
return posts
|
|
166
|
+
|
|
167
|
+
def first_post(self):
|
|
168
|
+
"""
|
|
169
|
+
Returns:
|
|
170
|
+
scratchattach.forum.ForumPost: An object representing the first topic post
|
|
171
|
+
"""
|
|
172
|
+
posts = self.posts(page=1)
|
|
173
|
+
if len(posts) > 0:
|
|
174
|
+
return posts[0]
|
|
175
|
+
|
|
176
|
+
@dataclass
|
|
177
|
+
class ForumPost(BaseSiteComponent):
|
|
178
|
+
'''
|
|
179
|
+
Represents a Scratch forum post.
|
|
180
|
+
|
|
181
|
+
Attributes:
|
|
182
|
+
|
|
183
|
+
:.id:
|
|
184
|
+
|
|
185
|
+
:.author_name: The name of the user who created this post
|
|
186
|
+
|
|
187
|
+
:.author_avatar_url:
|
|
188
|
+
|
|
189
|
+
:.posted: The date the post was made
|
|
190
|
+
|
|
191
|
+
:.topic_id: The id of the topic this post is in
|
|
192
|
+
|
|
193
|
+
:.topic_name: The name of the topic the post is in
|
|
194
|
+
|
|
195
|
+
:.topic_category: The name of the category the post topic is in
|
|
196
|
+
|
|
197
|
+
:.topic_num_pages: The number of pages the post topic has
|
|
198
|
+
|
|
199
|
+
:.deleted: Whether the post was deleted (always False because deleted posts can't be retrieved anymore)
|
|
200
|
+
|
|
201
|
+
:.html_content: Returns the content as HTML
|
|
202
|
+
|
|
203
|
+
:.content: Returns the content as text
|
|
204
|
+
|
|
205
|
+
:.post_index: The index that the post has in the topic
|
|
206
|
+
|
|
207
|
+
:.update(): Updates the attributes
|
|
208
|
+
'''
|
|
209
|
+
id: int = field(default=0)
|
|
210
|
+
topic_id: int = field(default=0)
|
|
211
|
+
topic_name: str = field(default="")
|
|
212
|
+
topic_category: str = field(default="")
|
|
213
|
+
topic_num_pages: int = field(default=0)
|
|
214
|
+
author_name: str = field(default="")
|
|
215
|
+
author_avatar_url: str = field(default="")
|
|
216
|
+
posted: str = field(default="")
|
|
217
|
+
deleted: bool = field(default=False)
|
|
218
|
+
html_content: str = field(default="")
|
|
219
|
+
content: str = field(default="")
|
|
220
|
+
post_index: int = field(default=0)
|
|
221
|
+
_session: Optional[module_session.Session] = field(default=None)
|
|
222
|
+
def __post_init__(self):
|
|
223
|
+
|
|
224
|
+
# A forum post can't be updated the usual way as there is no API anymore
|
|
225
|
+
self.update_api = ""
|
|
226
|
+
|
|
227
|
+
# Headers and cookies:
|
|
228
|
+
if self._session is None:
|
|
229
|
+
self._headers = headers
|
|
230
|
+
self._cookies = {}
|
|
231
|
+
else:
|
|
232
|
+
self._headers = self._session.get_headers()
|
|
233
|
+
self._cookies = self._session.get_cookies()
|
|
234
|
+
|
|
235
|
+
# Headers for operations that require accept and Content-Type fields:
|
|
236
|
+
self._json_headers = dict(self._headers)
|
|
237
|
+
self._json_headers["accept"] = "application/json"
|
|
238
|
+
self._json_headers["Content-Type"] = "application/json"
|
|
239
|
+
|
|
240
|
+
def update_function(self, *args, **kwargs):
|
|
241
|
+
raise TypeError("Forum posts cannot be updated like this")
|
|
242
|
+
|
|
243
|
+
def update(self):
|
|
244
|
+
"""
|
|
245
|
+
Updates the attributes of the ForumPost object.
|
|
246
|
+
As there is no API for retrieving a single post anymore, this requires reloading the forum page.
|
|
247
|
+
"""
|
|
248
|
+
page = 1
|
|
249
|
+
posts = ForumTopic.from_id(self.topic_id, session=self._session).posts(page=1)
|
|
250
|
+
while posts != []:
|
|
251
|
+
matching = list(filter(lambda x : int(x.id) == int(self.id), posts))
|
|
252
|
+
if len(matching) > 0:
|
|
253
|
+
this = matching[0]
|
|
254
|
+
break
|
|
255
|
+
page += 1
|
|
256
|
+
posts = ForumTopic.from_id(self.topic_id, session=self._session).posts(page=page)
|
|
257
|
+
else:
|
|
258
|
+
return False
|
|
259
|
+
self._update_from_dict(vars(this))
|
|
260
|
+
|
|
261
|
+
def _update_from_dict(self, data: dict[str, Any]):
|
|
262
|
+
self.__dict__.update(data)
|
|
263
|
+
return True
|
|
264
|
+
|
|
265
|
+
def update_from_html(self, soup_html: Tag):
|
|
266
|
+
return self._update_from_html(soup_html)
|
|
267
|
+
|
|
268
|
+
def _update_from_html(self, soup_html: Tag):
|
|
269
|
+
post_index_elm = soup_html.find('span', class_='conr')
|
|
270
|
+
assert isinstance(post_index_elm, Tag)
|
|
271
|
+
id_attr = soup_html['id']
|
|
272
|
+
assert isinstance(id_attr, str)
|
|
273
|
+
posted_elm = soup_html.find('a', href=True)
|
|
274
|
+
assert isinstance(posted_elm, Tag)
|
|
275
|
+
content_elm = soup_html.find('div', class_='post_body_html')
|
|
276
|
+
assert isinstance(content_elm, Tag)
|
|
277
|
+
author_name_elm = soup_html.select_one('dl dt a')
|
|
278
|
+
assert isinstance(author_name_elm, Tag)
|
|
279
|
+
topic_name_elm = soup_html.find('h3')
|
|
280
|
+
assert isinstance(topic_name_elm, Tag)
|
|
281
|
+
|
|
282
|
+
self.post_index = int(post_index_elm.text.strip('#'))
|
|
283
|
+
self.id = int(id_attr.replace("p", ""))
|
|
284
|
+
self.posted = posted_elm.text.strip()
|
|
285
|
+
self.content = content_elm.text.strip()
|
|
286
|
+
self.html_content = str(soup_html.find('div', class_='post_body_html'))
|
|
287
|
+
self.author_name = author_name_elm.text.strip()
|
|
288
|
+
self.author_avatar_url = str(author_name_elm['href'])
|
|
289
|
+
self.topic_name = topic_name_elm.text.strip()
|
|
290
|
+
return True
|
|
291
|
+
|
|
292
|
+
def topic(self):
|
|
293
|
+
"""
|
|
294
|
+
Returns:
|
|
295
|
+
scratchattach.forum.ForumTopic: An object representing the forum topic this post is in.
|
|
296
|
+
"""
|
|
297
|
+
return self._make_linked_object("id", self.topic_id, ForumTopic, exceptions.ForumContentNotFound)
|
|
298
|
+
|
|
299
|
+
def ocular_reactions(self):
|
|
300
|
+
return requests.get(f"https://my-ocular.jeffalo.net/api/reactions/{self.id}", timeout=10).json()
|
|
301
|
+
|
|
302
|
+
def author(self):
|
|
303
|
+
"""
|
|
304
|
+
Returns:
|
|
305
|
+
scratchattach.user.User: An object representing the user who created this forum post.
|
|
306
|
+
"""
|
|
307
|
+
return self._make_linked_object("username", self.author_name, user.User, exceptions.UserNotFound)
|
|
308
|
+
|
|
309
|
+
def edit(self, new_content: str):
|
|
310
|
+
"""
|
|
311
|
+
Changes the content of the forum post. You can only use this function if this object was created using :meth:`scratchattach.session.Session.connect_post` or through another method that requires authentication. You must own the forum post.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
new_content (str): The text that the forum post will be set to.
|
|
315
|
+
"""
|
|
316
|
+
|
|
317
|
+
self._assert_auth()
|
|
318
|
+
|
|
319
|
+
cookies = dict(self._cookies)
|
|
320
|
+
cookies["accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
|
|
321
|
+
cookies["Content-Type"] = "application/x-www-form-urlencoded"
|
|
322
|
+
|
|
323
|
+
r = requests.post(
|
|
324
|
+
f"https://scratch.mit.edu/discuss/post/{self.id}/edit/",
|
|
325
|
+
headers = {
|
|
326
|
+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
|
327
|
+
"accept-language": "de,en;q=0.9",
|
|
328
|
+
"cache-control": "max-age=0",
|
|
329
|
+
"content-type": "application/x-www-form-urlencoded",
|
|
330
|
+
"sec-ch-ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Google Chrome\";v=\"101\"",
|
|
331
|
+
"sec-ch-ua-mobile": "?0",
|
|
332
|
+
"sec-ch-ua-platform": "\"Windows\"",
|
|
333
|
+
"sec-fetch-dest": "document",
|
|
334
|
+
"sec-fetch-mode": "navigate",
|
|
335
|
+
"sec-fetch-site": "same-origin",
|
|
336
|
+
"sec-fetch-user": "?1",
|
|
337
|
+
"upgrade-insecure-requests": "1",
|
|
338
|
+
"Referer": f"https://scratch.mit.edu/discuss/post/{self.id}/edit/",
|
|
339
|
+
"x-csrftoken": "a"
|
|
340
|
+
},
|
|
341
|
+
cookies = cookies,
|
|
342
|
+
json = f"csrfmiddlewaretoken=a&body={new_content}&",
|
|
343
|
+
timeout = 10,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def get_topic(topic_id) -> ForumTopic:
|
|
348
|
+
|
|
349
|
+
"""
|
|
350
|
+
Gets a forum topic without logging in. Data received from Scratch's RSS feed XML API.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
topic_id (int): ID of the requested forum topic
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
scratchattach.forum.ForumTopic: An object representing the requested forum topic
|
|
357
|
+
|
|
358
|
+
Warning:
|
|
359
|
+
Scratch's API uses very heavy caching for logged out users, therefore the returned data will not be up to date.
|
|
360
|
+
|
|
361
|
+
Any methods that require authentication will not work on the returned object.
|
|
362
|
+
|
|
363
|
+
If you need up-to-date data or want to use methods that require authentication, create the object with :meth:`scratchattach.session.Session.connect_topic` instead.
|
|
364
|
+
"""
|
|
365
|
+
return commons._get_object("id", topic_id, ForumTopic, exceptions.ForumContentNotFound)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def get_topic_list(category_id, *, page=1):
|
|
369
|
+
|
|
370
|
+
"""
|
|
371
|
+
Gets the topics from a forum category without logging in. Data web-scraped from Scratch's forums UI.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
category_id (str): ID of the forum category
|
|
375
|
+
|
|
376
|
+
Keyword Arguments:
|
|
377
|
+
page (str): Page of the category topics that should be returned
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
list<scratchattach.forum.ForumTopic>: A list containing the forum topics from the specified category
|
|
381
|
+
|
|
382
|
+
Warning:
|
|
383
|
+
Scratch's API uses very heavy caching for logged out users, therefore the returned data will not be up to date.
|
|
384
|
+
|
|
385
|
+
Any methods that require authentication will not work on the returned objects.
|
|
386
|
+
|
|
387
|
+
If you need up-to-date data or want to use methods that require authentication, get the forum topics with :meth:`scratchattach.session.Session.connect_topic_list` instead.
|
|
388
|
+
"""
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
response = requests.get(f"https://scratch.mit.edu/discuss/{category_id}/?page={page}")
|
|
392
|
+
soup = BeautifulSoup(response.content, 'html.parser')
|
|
393
|
+
except Exception as e:
|
|
394
|
+
raise exceptions.FetchError(str(e))
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
category_name = soup.find('h4').find("span").get_text()
|
|
398
|
+
except Exception as e:
|
|
399
|
+
raise exceptions.BadRequest("Invalid category id")
|
|
400
|
+
|
|
401
|
+
try:
|
|
402
|
+
topics = soup.find_all('tr')
|
|
403
|
+
topics.pop(0)
|
|
404
|
+
return_topics = []
|
|
405
|
+
|
|
406
|
+
for topic in topics:
|
|
407
|
+
title_link = topic.find('a')
|
|
408
|
+
title = title_link.text.strip()
|
|
409
|
+
topic_id = title_link['href'].split('/')[-2]
|
|
410
|
+
|
|
411
|
+
columns = topic.find_all('td')
|
|
412
|
+
columns = [column.text for column in columns]
|
|
413
|
+
if len(columns) == 1:
|
|
414
|
+
# This is a sticky topic -> Skip it
|
|
415
|
+
continue
|
|
416
|
+
|
|
417
|
+
last_updated = columns[3].split(" ")[0] + " " + columns[3].split(" ")[1]
|
|
418
|
+
|
|
419
|
+
return_topics.append(ForumTopic(id=int(topic_id), title=title, category_name=category_name, last_updated=last_updated, reply_count=int(columns[1]), view_count=int(columns[2])))
|
|
420
|
+
return return_topics
|
|
421
|
+
except Exception as e:
|
|
422
|
+
raise exceptions.ScrapeError(str(e))
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def youtube_link_to_scratch(link: str):
|
|
426
|
+
"""
|
|
427
|
+
Converts a YouTube url (in multiple formats) like https://youtu.be/1JTgg4WVAX8?si=fIEskaEaOIRZyTAz
|
|
428
|
+
to a link like https://scratch.mit.edu/discuss/youtube/1JTgg4WVAX8
|
|
429
|
+
"""
|
|
430
|
+
url_parse = urlparse(link)
|
|
431
|
+
query_parse = parse_qs(url_parse.query)
|
|
432
|
+
if 'v' in query_parse:
|
|
433
|
+
video_id = query_parse['v'][0]
|
|
434
|
+
else:
|
|
435
|
+
video_id = url_parse.path.split('/')[-1]
|
|
436
|
+
return f"https://scratch.mit.edu/discuss/youtube/{video_id}"
|
site/placeholder.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Classes and methods for interacting with turbowarp placeholder (https://share.turbowarp.org/)
|
|
2
|
+
import re
|
|
3
|
+
import bs4
|
|
4
|
+
import json
|
|
5
|
+
import io
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing_extensions import Optional
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
|
|
11
|
+
from scratchattach.site import session
|
|
12
|
+
from scratchattach.site.typed_dicts import PlaceholderProjectDataDict
|
|
13
|
+
from scratchattach.utils.requests import requests
|
|
14
|
+
from scratchattach import editor
|
|
15
|
+
from scratchattach.utils import commons
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class PlaceholderProject:
|
|
20
|
+
id: str
|
|
21
|
+
|
|
22
|
+
title: Optional[str] = None
|
|
23
|
+
description: Optional[str] = None
|
|
24
|
+
md5exts_to_sha256: Optional[dict[str, str]] = None
|
|
25
|
+
admin_ownership_token: Optional[str] = None # guessing it's a str
|
|
26
|
+
|
|
27
|
+
_session: Optional[session.Session] = None
|
|
28
|
+
|
|
29
|
+
def get_json(self):
|
|
30
|
+
with requests.no_error_handling():
|
|
31
|
+
return requests.get(f"https://share.turbowarp.org/api/projects/{self.id}").json()
|
|
32
|
+
|
|
33
|
+
def update_by_html(self) -> None:
|
|
34
|
+
"""
|
|
35
|
+
Scrape JS to update the project. Requires hjson
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
import hjson # type: ignore
|
|
39
|
+
except ImportError as e:
|
|
40
|
+
raise ImportError("Please use pip install hjson if you want to use placeholder projects!") from e
|
|
41
|
+
|
|
42
|
+
with requests.no_error_handling():
|
|
43
|
+
resp = requests.get(f"https://share.turbowarp.org/projects/{self.id}")
|
|
44
|
+
soup = BeautifulSoup(resp.text, "html.parser")
|
|
45
|
+
|
|
46
|
+
for script in soup.find_all("script"):
|
|
47
|
+
if not isinstance(script, bs4.element.Tag):
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
if raw_data := re.search("const data = \\[.*\"data\":{metadata:{.*},md5extsToSha256:.*];",
|
|
51
|
+
str(script.contents[0])):
|
|
52
|
+
data = raw_data.group().removeprefix("const data = ").removesuffix(";")
|
|
53
|
+
# this data is NOT json. Therefore, we can't just JSON.parse it.
|
|
54
|
+
# it's actually native JavaScript, but we can extract the information in a relatively stable way using hjson
|
|
55
|
+
# maybe, instead, a request should be made to GarboMuffin.
|
|
56
|
+
data = hjson.loads(data)
|
|
57
|
+
# i am unsure if the other data here is of any use. It may be artifacts coming from svelte
|
|
58
|
+
parsed_data: PlaceholderProjectDataDict = data[1]["data"]
|
|
59
|
+
|
|
60
|
+
self.title = parsed_data["metadata"]["title"]
|
|
61
|
+
self.description = parsed_data["metadata"]["description"]
|
|
62
|
+
self.md5exts_to_sha256 = dict(parsed_data["md5extsToSha256"])
|
|
63
|
+
self.admin_ownership_token = parsed_data["adminOwnershipToken"]
|
|
64
|
+
|
|
65
|
+
break
|
|
66
|
+
|
|
67
|
+
def get_project_body(self):
|
|
68
|
+
self.update_by_html()
|
|
69
|
+
|
|
70
|
+
data = self.get_json()
|
|
71
|
+
body = editor.Project.from_json(data)
|
|
72
|
+
body.name = self.title
|
|
73
|
+
|
|
74
|
+
for asset in body.assets:
|
|
75
|
+
table = self.md5exts_to_sha256
|
|
76
|
+
assert table is not None # this should never happen
|
|
77
|
+
data = get_asset(table[asset.md5ext])
|
|
78
|
+
asset.asset_file.data = data
|
|
79
|
+
|
|
80
|
+
return body
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_asset(sha256: str) -> bytes:
|
|
84
|
+
with requests.no_error_handling():
|
|
85
|
+
return requests.get(f"https://share.turbowarp.org/api/assets/{sha256}").content
|
|
86
|
+
|
|
87
|
+
def get_placeholder_project(_id: str):
|
|
88
|
+
return PlaceholderProject(_id)
|
|
89
|
+
|
|
90
|
+
def create_placeholder_project(title: str, data: bytes):
|
|
91
|
+
body = editor.Project.from_sb3(data)
|
|
92
|
+
|
|
93
|
+
asset_information: dict[str, dict[str, str | int]] = {}
|
|
94
|
+
for asset in body.assets:
|
|
95
|
+
print(asset)
|
|
96
|
+
print(asset.asset_file.sha256)
|
|
97
|
+
asset_information[asset.md5ext] = {
|
|
98
|
+
"sha256": asset.asset_file.sha256,
|
|
99
|
+
"size": len(asset.asset_file.data)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
print(f"{asset_information = }")
|
|
103
|
+
print(f"{body.name = }")
|
|
104
|
+
with requests.no_error_handling():
|
|
105
|
+
resp = requests.post("https://share.turbowarp.org/api/projects/new", data={
|
|
106
|
+
"title": title,
|
|
107
|
+
"assetInformation": asset_information,
|
|
108
|
+
}, files={
|
|
109
|
+
"project": ("blob", data, 'application/octet-stream'),
|
|
110
|
+
}, headers={
|
|
111
|
+
'accept': '*/*',
|
|
112
|
+
'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
|
|
113
|
+
# 'content-type': 'multipart/form-data; boundary=----WebKitFormBoundaryYzpNqB5A2GEr99Vd',
|
|
114
|
+
'dnt': '1',
|
|
115
|
+
'origin': 'https://share.turbowarp.org',
|
|
116
|
+
'priority': 'u=1, i',
|
|
117
|
+
'referer': 'https://share.turbowarp.org/',
|
|
118
|
+
'sec-ch-ua': '"Google Chrome";v="141", "Not?A_Brand";v="8", "Chromium";v="141"',
|
|
119
|
+
'sec-ch-ua-mobile': '?0',
|
|
120
|
+
'sec-ch-ua-platform': '"Windows"',
|
|
121
|
+
'sec-fetch-dest': 'empty',
|
|
122
|
+
'sec-fetch-mode': 'cors',
|
|
123
|
+
'sec-fetch-site': 'same-origin',
|
|
124
|
+
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36'
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
print(resp, resp.content)
|
|
128
|
+
|
|
129
|
+
if __name__ == '__main__':
|
|
130
|
+
p = get_placeholder_project("44c35afc-fe00-49d8-afe7-d71f4430c121")
|
|
131
|
+
pb = p.get_project_body()
|
|
132
|
+
pb.export("test plac.sb3")
|