scratchattach 2.1.14__py3-none-any.whl → 3.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scratchattach/__init__.py +14 -6
- scratchattach/__main__.py +93 -0
- {scratchattach-2.1.14.dist-info → scratchattach-3.0.0b0.dist-info}/METADATA +7 -11
- scratchattach-3.0.0b0.dist-info/RECORD +8 -0
- {scratchattach-2.1.14.dist-info → scratchattach-3.0.0b0.dist-info}/WHEEL +1 -1
- scratchattach-3.0.0b0.dist-info/entry_points.txt +2 -0
- scratchattach/cloud/__init__.py +0 -2
- scratchattach/cloud/_base.py +0 -458
- scratchattach/cloud/cloud.py +0 -183
- scratchattach/editor/__init__.py +0 -21
- scratchattach/editor/asset.py +0 -253
- scratchattach/editor/backpack_json.py +0 -117
- scratchattach/editor/base.py +0 -193
- scratchattach/editor/block.py +0 -579
- scratchattach/editor/blockshape.py +0 -357
- scratchattach/editor/build_defaulting.py +0 -51
- scratchattach/editor/code_translation/__init__.py +0 -0
- scratchattach/editor/code_translation/parse.py +0 -177
- scratchattach/editor/comment.py +0 -80
- scratchattach/editor/commons.py +0 -306
- scratchattach/editor/extension.py +0 -50
- scratchattach/editor/field.py +0 -99
- scratchattach/editor/inputs.py +0 -135
- scratchattach/editor/meta.py +0 -114
- scratchattach/editor/monitor.py +0 -183
- scratchattach/editor/mutation.py +0 -324
- scratchattach/editor/pallete.py +0 -90
- scratchattach/editor/prim.py +0 -170
- scratchattach/editor/project.py +0 -279
- scratchattach/editor/sprite.py +0 -599
- scratchattach/editor/twconfig.py +0 -114
- scratchattach/editor/vlb.py +0 -134
- scratchattach/eventhandlers/__init__.py +0 -0
- scratchattach/eventhandlers/_base.py +0 -100
- scratchattach/eventhandlers/cloud_events.py +0 -110
- scratchattach/eventhandlers/cloud_recorder.py +0 -26
- scratchattach/eventhandlers/cloud_requests.py +0 -459
- scratchattach/eventhandlers/cloud_server.py +0 -246
- scratchattach/eventhandlers/cloud_storage.py +0 -136
- scratchattach/eventhandlers/combine.py +0 -30
- scratchattach/eventhandlers/filterbot.py +0 -161
- scratchattach/eventhandlers/message_events.py +0 -42
- scratchattach/other/__init__.py +0 -0
- scratchattach/other/other_apis.py +0 -284
- scratchattach/other/project_json_capabilities.py +0 -475
- scratchattach/site/__init__.py +0 -0
- scratchattach/site/_base.py +0 -66
- scratchattach/site/activity.py +0 -382
- scratchattach/site/alert.py +0 -227
- scratchattach/site/backpack_asset.py +0 -118
- scratchattach/site/browser_cookie3_stub.py +0 -17
- scratchattach/site/browser_cookies.py +0 -61
- scratchattach/site/classroom.py +0 -447
- scratchattach/site/cloud_activity.py +0 -107
- scratchattach/site/comment.py +0 -242
- scratchattach/site/forum.py +0 -432
- scratchattach/site/project.py +0 -825
- scratchattach/site/session.py +0 -1238
- scratchattach/site/studio.py +0 -611
- scratchattach/site/user.py +0 -956
- scratchattach/utils/__init__.py +0 -0
- scratchattach/utils/commons.py +0 -255
- scratchattach/utils/encoder.py +0 -158
- scratchattach/utils/enums.py +0 -236
- scratchattach/utils/exceptions.py +0 -243
- scratchattach/utils/requests.py +0 -93
- scratchattach-2.1.14.dist-info/RECORD +0 -66
- {scratchattach-2.1.14.dist-info → scratchattach-3.0.0b0.dist-info}/licenses/LICENSE +0 -0
- {scratchattach-2.1.14.dist-info → scratchattach-3.0.0b0.dist-info}/top_level.txt +0 -0
scratchattach/site/forum.py
DELETED
|
@@ -1,432 +0,0 @@
|
|
|
1
|
-
"""ForumTopic and ForumPost classes"""
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
|
-
from typing import Optional, Any
|
|
6
|
-
from urllib.parse import urlparse, parse_qs
|
|
7
|
-
import xml.etree.ElementTree as ET
|
|
8
|
-
|
|
9
|
-
from bs4 import BeautifulSoup, Tag
|
|
10
|
-
|
|
11
|
-
from . import user
|
|
12
|
-
from . import session as module_session
|
|
13
|
-
from scratchattach.utils.commons import headers
|
|
14
|
-
from scratchattach.utils import exceptions, commons
|
|
15
|
-
from ._base import BaseSiteComponent
|
|
16
|
-
from scratchattach.utils.requests import requests
|
|
17
|
-
|
|
18
|
-
@dataclass
|
|
19
|
-
class ForumTopic(BaseSiteComponent):
|
|
20
|
-
'''
|
|
21
|
-
Represents a Scratch forum topic.
|
|
22
|
-
|
|
23
|
-
Attributes:
|
|
24
|
-
|
|
25
|
-
:.id:
|
|
26
|
-
|
|
27
|
-
:.title:
|
|
28
|
-
|
|
29
|
-
:.category_name:
|
|
30
|
-
|
|
31
|
-
:.last_updated:
|
|
32
|
-
|
|
33
|
-
Attributes only available if the object was created using scratchattach.get_topic_list or scratchattach.Session.connect_topic_list:
|
|
34
|
-
|
|
35
|
-
:.reply_count:
|
|
36
|
-
|
|
37
|
-
:.view_count:
|
|
38
|
-
|
|
39
|
-
:.update(): Updates the attributes
|
|
40
|
-
'''
|
|
41
|
-
id: int
|
|
42
|
-
title: str
|
|
43
|
-
category_name: str
|
|
44
|
-
last_updated: str
|
|
45
|
-
_session: Optional[module_session.Session] = field(default=None)
|
|
46
|
-
reply_count: Optional[int] = field(default=None)
|
|
47
|
-
view_count: Optional[int] = field(default=None)
|
|
48
|
-
|
|
49
|
-
def __post_init__(self):
|
|
50
|
-
# Info on how the .update method has to fetch the data:
|
|
51
|
-
self.update_function = requests.get
|
|
52
|
-
self.update_api = f"https://scratch.mit.edu/discuss/feeds/topic/{self.id}/"
|
|
53
|
-
|
|
54
|
-
# Headers and cookies:
|
|
55
|
-
if self._session is None:
|
|
56
|
-
self._headers = headers
|
|
57
|
-
self._cookies = {}
|
|
58
|
-
else:
|
|
59
|
-
self._headers = self._session.get_headers()
|
|
60
|
-
self._cookies = self._session.get_cookies()
|
|
61
|
-
|
|
62
|
-
# Headers for operations that require accept and Content-Type fields:
|
|
63
|
-
self._json_headers = dict(self._headers)
|
|
64
|
-
self._json_headers["accept"] = "application/json"
|
|
65
|
-
self._json_headers["Content-Type"] = "application/json"
|
|
66
|
-
|
|
67
|
-
def update(self):
|
|
68
|
-
# As there is no JSON API for getting forum topics anymore,
|
|
69
|
-
# the data has to be retrieved from the XML feed.
|
|
70
|
-
response = self.update_function(
|
|
71
|
-
self.update_api,
|
|
72
|
-
headers = self._headers,
|
|
73
|
-
cookies = self._cookies, timeout=20 # fetching forums can take very long
|
|
74
|
-
)
|
|
75
|
-
# Check for 429 error:
|
|
76
|
-
if "429" in str(response):
|
|
77
|
-
return "429"
|
|
78
|
-
|
|
79
|
-
# Parse XML response
|
|
80
|
-
if response.status_code == 200:
|
|
81
|
-
try:
|
|
82
|
-
root = ET.fromstring(response.text)
|
|
83
|
-
namespace = {'atom': 'http://www.w3.org/2005/Atom'}
|
|
84
|
-
|
|
85
|
-
title = root.findtext('atom:title', namespaces=namespace).replace("Latest posts on ","")
|
|
86
|
-
category_name = root.findall('.//atom:entry', namespaces=namespace)[0].findtext('.//atom:title', namespaces=namespace).split(" :: ")[1]
|
|
87
|
-
last_updated = root.findtext('atom:updated', namespaces=namespace)
|
|
88
|
-
|
|
89
|
-
except Exception as e:
|
|
90
|
-
raise exceptions.ScrapeError(str(e))
|
|
91
|
-
else:
|
|
92
|
-
raise exceptions.ForumContentNotFound
|
|
93
|
-
self.title = title
|
|
94
|
-
self.category_name = category_name
|
|
95
|
-
self.last_updated = last_updated
|
|
96
|
-
return True
|
|
97
|
-
|
|
98
|
-
@classmethod
|
|
99
|
-
def from_id(cls, __id: int, session: module_session.Session, update: bool = False):
|
|
100
|
-
new = cls(id=__id, _session=session, title="", last_updated="", category_name="")
|
|
101
|
-
if update:
|
|
102
|
-
new.update()
|
|
103
|
-
return new
|
|
104
|
-
|
|
105
|
-
def _update_from_dict(self, data: dict[str, Any]):
|
|
106
|
-
self.__dict__.update(data)
|
|
107
|
-
|
|
108
|
-
def posts(self, *, page=1, order="oldest") -> list[ForumPost]:
|
|
109
|
-
"""
|
|
110
|
-
Args:
|
|
111
|
-
page (int): The page of the forum topic that should be returned. First page is at index 1.
|
|
112
|
-
|
|
113
|
-
Returns:
|
|
114
|
-
list<scratchattach.forum.ForumPost>: A list containing the posts from the specified page of the forum topic
|
|
115
|
-
"""
|
|
116
|
-
if order != "oldest":
|
|
117
|
-
print("Warning: All post orders except for 'oldest' are deprecated and no longer work") # For backwards compatibility
|
|
118
|
-
|
|
119
|
-
posts = []
|
|
120
|
-
|
|
121
|
-
try:
|
|
122
|
-
url = f"https://scratch.mit.edu/discuss/topic/{self.id}/?page={page}"
|
|
123
|
-
response = requests.get(url, headers=headers, cookies=self._cookies)
|
|
124
|
-
except Exception as e:
|
|
125
|
-
raise exceptions.FetchError(str(e))
|
|
126
|
-
try:
|
|
127
|
-
soup = BeautifulSoup(response.content, 'html.parser')
|
|
128
|
-
soup_elm = soup.find("div", class_="djangobb")
|
|
129
|
-
assert isinstance(soup_elm, Tag)
|
|
130
|
-
try:
|
|
131
|
-
pagination_div = soup_elm.find('div', class_='pagination')
|
|
132
|
-
assert isinstance(pagination_div, Tag)
|
|
133
|
-
num_pages = int(pagination_div.find_all('a', class_='page')[-1].text)
|
|
134
|
-
except Exception:
|
|
135
|
-
num_pages = 1
|
|
136
|
-
|
|
137
|
-
try:
|
|
138
|
-
# get topic category:
|
|
139
|
-
topic_category = ""
|
|
140
|
-
breadcrumb_ul = soup_elm.find_all('ul')[1] # Find the second ul element
|
|
141
|
-
if breadcrumb_ul:
|
|
142
|
-
assert isinstance(breadcrumb_ul, Tag)
|
|
143
|
-
link = breadcrumb_ul.find_all('a')[1] # Get the right anchor tag
|
|
144
|
-
topic_category = link.text.strip() # Extract and strip text content
|
|
145
|
-
except Exception as e:
|
|
146
|
-
print(f"Warning: Couldn't scrape topic category for topic {self.id} - {e}")
|
|
147
|
-
topic_category = ""
|
|
148
|
-
|
|
149
|
-
# get corresponding posts:
|
|
150
|
-
post_htmls = soup.find_all('div', class_='blockpost')
|
|
151
|
-
for raw_post in post_htmls:
|
|
152
|
-
if not isinstance(raw_post, Tag):
|
|
153
|
-
continue
|
|
154
|
-
post = ForumPost(id=int(str(raw_post['id']).replace("p", "")), topic_id=self.id, _session=self._session, topic_category=topic_category, topic_num_pages=num_pages)
|
|
155
|
-
post.update_from_html(raw_post)
|
|
156
|
-
|
|
157
|
-
posts.append(post)
|
|
158
|
-
except Exception as e:
|
|
159
|
-
raise exceptions.ScrapeError() from e
|
|
160
|
-
|
|
161
|
-
return posts
|
|
162
|
-
|
|
163
|
-
def first_post(self):
|
|
164
|
-
"""
|
|
165
|
-
Returns:
|
|
166
|
-
scratchattach.forum.ForumPost: An object representing the first topic post
|
|
167
|
-
"""
|
|
168
|
-
posts = self.posts(page=1)
|
|
169
|
-
if len(posts) > 0:
|
|
170
|
-
return posts[0]
|
|
171
|
-
|
|
172
|
-
@dataclass
|
|
173
|
-
class ForumPost(BaseSiteComponent):
|
|
174
|
-
'''
|
|
175
|
-
Represents a Scratch forum post.
|
|
176
|
-
|
|
177
|
-
Attributes:
|
|
178
|
-
|
|
179
|
-
:.id:
|
|
180
|
-
|
|
181
|
-
:.author_name: The name of the user who created this post
|
|
182
|
-
|
|
183
|
-
:.author_avatar_url:
|
|
184
|
-
|
|
185
|
-
:.posted: The date the post was made
|
|
186
|
-
|
|
187
|
-
:.topic_id: The id of the topic this post is in
|
|
188
|
-
|
|
189
|
-
:.topic_name: The name of the topic the post is in
|
|
190
|
-
|
|
191
|
-
:.topic_category: The name of the category the post topic is in
|
|
192
|
-
|
|
193
|
-
:.topic_num_pages: The number of pages the post topic has
|
|
194
|
-
|
|
195
|
-
:.deleted: Whether the post was deleted (always False because deleted posts can't be retrieved anymore)
|
|
196
|
-
|
|
197
|
-
:.html_content: Returns the content as HTML
|
|
198
|
-
|
|
199
|
-
:.content: Returns the content as text
|
|
200
|
-
|
|
201
|
-
:.post_index: The index that the post has in the topic
|
|
202
|
-
|
|
203
|
-
:.update(): Updates the attributes
|
|
204
|
-
'''
|
|
205
|
-
id: int = field(default=0)
|
|
206
|
-
topic_id: int = field(default=0)
|
|
207
|
-
topic_name: str = field(default="")
|
|
208
|
-
topic_category: str = field(default="")
|
|
209
|
-
topic_num_pages: int = field(default=0)
|
|
210
|
-
author_name: str = field(default="")
|
|
211
|
-
author_avatar_url: str = field(default="")
|
|
212
|
-
posted: str = field(default="")
|
|
213
|
-
deleted: bool = field(default=False)
|
|
214
|
-
html_content: str = field(default="")
|
|
215
|
-
content: str = field(default="")
|
|
216
|
-
post_index: int = field(default=0)
|
|
217
|
-
_session: Optional[module_session.Session] = field(default=None)
|
|
218
|
-
def __post_init__(self):
|
|
219
|
-
|
|
220
|
-
# A forum post can't be updated the usual way as there is no API anymore
|
|
221
|
-
self.update_api = ""
|
|
222
|
-
|
|
223
|
-
# Headers and cookies:
|
|
224
|
-
if self._session is None:
|
|
225
|
-
self._headers = headers
|
|
226
|
-
self._cookies = {}
|
|
227
|
-
else:
|
|
228
|
-
self._headers = self._session.get_headers()
|
|
229
|
-
self._cookies = self._session.get_cookies()
|
|
230
|
-
|
|
231
|
-
# Headers for operations that require accept and Content-Type fields:
|
|
232
|
-
self._json_headers = dict(self._headers)
|
|
233
|
-
self._json_headers["accept"] = "application/json"
|
|
234
|
-
self._json_headers["Content-Type"] = "application/json"
|
|
235
|
-
|
|
236
|
-
def update_function(self, *args, **kwargs):
|
|
237
|
-
raise TypeError("Forum posts cannot be updated like this")
|
|
238
|
-
|
|
239
|
-
def update(self):
|
|
240
|
-
"""
|
|
241
|
-
Updates the attributes of the ForumPost object.
|
|
242
|
-
As there is no API for retrieving a single post anymore, this requires reloading the forum page.
|
|
243
|
-
"""
|
|
244
|
-
page = 1
|
|
245
|
-
posts = ForumTopic.from_id(self.topic_id, session=self._session).posts(page=1)
|
|
246
|
-
while posts != []:
|
|
247
|
-
matching = list(filter(lambda x : int(x.id) == int(self.id), posts))
|
|
248
|
-
if len(matching) > 0:
|
|
249
|
-
this = matching[0]
|
|
250
|
-
break
|
|
251
|
-
page += 1
|
|
252
|
-
posts = ForumTopic.from_id(self.topic_id, session=self._session).posts(page=page)
|
|
253
|
-
else:
|
|
254
|
-
return False
|
|
255
|
-
self._update_from_dict(vars(this))
|
|
256
|
-
|
|
257
|
-
def _update_from_dict(self, data: dict[str, Any]):
|
|
258
|
-
self.__dict__.update(data)
|
|
259
|
-
return True
|
|
260
|
-
|
|
261
|
-
def update_from_html(self, soup_html: Tag):
|
|
262
|
-
return self._update_from_html(soup_html)
|
|
263
|
-
|
|
264
|
-
def _update_from_html(self, soup_html: Tag):
|
|
265
|
-
post_index_elm = soup_html.find('span', class_='conr')
|
|
266
|
-
assert isinstance(post_index_elm, Tag)
|
|
267
|
-
id_attr = soup_html['id']
|
|
268
|
-
assert isinstance(id_attr, str)
|
|
269
|
-
posted_elm = soup_html.find('a', href=True)
|
|
270
|
-
assert isinstance(posted_elm, Tag)
|
|
271
|
-
content_elm = soup_html.find('div', class_='post_body_html')
|
|
272
|
-
assert isinstance(content_elm, Tag)
|
|
273
|
-
author_name_elm = soup_html.select_one('dl dt a')
|
|
274
|
-
assert isinstance(author_name_elm, Tag)
|
|
275
|
-
topic_name_elm = soup_html.find('h3')
|
|
276
|
-
assert isinstance(topic_name_elm, Tag)
|
|
277
|
-
|
|
278
|
-
self.post_index = int(post_index_elm.text.strip('#'))
|
|
279
|
-
self.id = int(id_attr.replace("p", ""))
|
|
280
|
-
self.posted = posted_elm.text.strip()
|
|
281
|
-
self.content = content_elm.text.strip()
|
|
282
|
-
self.html_content = str(soup_html.find('div', class_='post_body_html'))
|
|
283
|
-
self.author_name = author_name_elm.text.strip()
|
|
284
|
-
self.author_avatar_url = str(author_name_elm['href'])
|
|
285
|
-
self.topic_name = topic_name_elm.text.strip()
|
|
286
|
-
return True
|
|
287
|
-
|
|
288
|
-
def topic(self):
|
|
289
|
-
"""
|
|
290
|
-
Returns:
|
|
291
|
-
scratchattach.forum.ForumTopic: An object representing the forum topic this post is in.
|
|
292
|
-
"""
|
|
293
|
-
return self._make_linked_object("id", self.topic_id, ForumTopic, exceptions.ForumContentNotFound)
|
|
294
|
-
|
|
295
|
-
def ocular_reactions(self):
|
|
296
|
-
return requests.get(f"https://my-ocular.jeffalo.net/api/reactions/{self.id}", timeout=10).json()
|
|
297
|
-
|
|
298
|
-
def author(self):
|
|
299
|
-
"""
|
|
300
|
-
Returns:
|
|
301
|
-
scratchattach.user.User: An object representing the user who created this forum post.
|
|
302
|
-
"""
|
|
303
|
-
return self._make_linked_object("username", self.author_name, user.User, exceptions.UserNotFound)
|
|
304
|
-
|
|
305
|
-
def edit(self, new_content: str):
|
|
306
|
-
"""
|
|
307
|
-
Changes the content of the forum post. You can only use this function if this object was created using :meth:`scratchattach.session.Session.connect_post` or through another method that requires authentication. You must own the forum post.
|
|
308
|
-
|
|
309
|
-
Args:
|
|
310
|
-
new_content (str): The text that the forum post will be set to.
|
|
311
|
-
"""
|
|
312
|
-
|
|
313
|
-
self._assert_auth()
|
|
314
|
-
|
|
315
|
-
cookies = dict(self._cookies)
|
|
316
|
-
cookies["accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
|
|
317
|
-
cookies["Content-Type"] = "application/x-www-form-urlencoded"
|
|
318
|
-
|
|
319
|
-
r = requests.post(
|
|
320
|
-
f"https://scratch.mit.edu/discuss/post/{self.id}/edit/",
|
|
321
|
-
headers = {
|
|
322
|
-
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
|
323
|
-
"accept-language": "de,en;q=0.9",
|
|
324
|
-
"cache-control": "max-age=0",
|
|
325
|
-
"content-type": "application/x-www-form-urlencoded",
|
|
326
|
-
"sec-ch-ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Google Chrome\";v=\"101\"",
|
|
327
|
-
"sec-ch-ua-mobile": "?0",
|
|
328
|
-
"sec-ch-ua-platform": "\"Windows\"",
|
|
329
|
-
"sec-fetch-dest": "document",
|
|
330
|
-
"sec-fetch-mode": "navigate",
|
|
331
|
-
"sec-fetch-site": "same-origin",
|
|
332
|
-
"sec-fetch-user": "?1",
|
|
333
|
-
"upgrade-insecure-requests": "1",
|
|
334
|
-
"Referer": f"https://scratch.mit.edu/discuss/post/{self.id}/edit/",
|
|
335
|
-
"x-csrftoken": "a"
|
|
336
|
-
},
|
|
337
|
-
cookies = cookies,
|
|
338
|
-
json = f"csrfmiddlewaretoken=a&body={new_content}&",
|
|
339
|
-
timeout = 10,
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
def get_topic(topic_id) -> ForumTopic:
|
|
344
|
-
|
|
345
|
-
"""
|
|
346
|
-
Gets a forum topic without logging in. Data received from Scratch's RSS feed XML API.
|
|
347
|
-
|
|
348
|
-
Args:
|
|
349
|
-
topic_id (int): ID of the requested forum topic
|
|
350
|
-
|
|
351
|
-
Returns:
|
|
352
|
-
scratchattach.forum.ForumTopic: An object representing the requested forum topic
|
|
353
|
-
|
|
354
|
-
Warning:
|
|
355
|
-
Scratch's API uses very heavy caching for logged out users, therefore the returned data will not be up to date.
|
|
356
|
-
|
|
357
|
-
Any methods that require authentication will not work on the returned object.
|
|
358
|
-
|
|
359
|
-
If you need up-to-date data or want to use methods that require authentication, create the object with :meth:`scratchattach.session.Session.connect_topic` instead.
|
|
360
|
-
"""
|
|
361
|
-
return commons._get_object("id", topic_id, ForumTopic, exceptions.ForumContentNotFound)
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
def get_topic_list(category_id, *, page=1):
|
|
365
|
-
|
|
366
|
-
"""
|
|
367
|
-
Gets the topics from a forum category without logging in. Data web-scraped from Scratch's forums UI.
|
|
368
|
-
|
|
369
|
-
Args:
|
|
370
|
-
category_id (str): ID of the forum category
|
|
371
|
-
|
|
372
|
-
Keyword Arguments:
|
|
373
|
-
page (str): Page of the category topics that should be returned
|
|
374
|
-
|
|
375
|
-
Returns:
|
|
376
|
-
list<scratchattach.forum.ForumTopic>: A list containing the forum topics from the specified category
|
|
377
|
-
|
|
378
|
-
Warning:
|
|
379
|
-
Scratch's API uses very heavy caching for logged out users, therefore the returned data will not be up to date.
|
|
380
|
-
|
|
381
|
-
Any methods that require authentication will not work on the returned objects.
|
|
382
|
-
|
|
383
|
-
If you need up-to-date data or want to use methods that require authentication, get the forum topics with :meth:`scratchattach.session.Session.connect_topic_list` instead.
|
|
384
|
-
"""
|
|
385
|
-
|
|
386
|
-
try:
|
|
387
|
-
response = requests.get(f"https://scratch.mit.edu/discuss/{category_id}/?page={page}")
|
|
388
|
-
soup = BeautifulSoup(response.content, 'html.parser')
|
|
389
|
-
except Exception as e:
|
|
390
|
-
raise exceptions.FetchError(str(e))
|
|
391
|
-
|
|
392
|
-
try:
|
|
393
|
-
category_name = soup.find('h4').find("span").get_text()
|
|
394
|
-
except Exception as e:
|
|
395
|
-
raise exceptions.BadRequest("Invalid category id")
|
|
396
|
-
|
|
397
|
-
try:
|
|
398
|
-
topics = soup.find_all('tr')
|
|
399
|
-
topics.pop(0)
|
|
400
|
-
return_topics = []
|
|
401
|
-
|
|
402
|
-
for topic in topics:
|
|
403
|
-
title_link = topic.find('a')
|
|
404
|
-
title = title_link.text.strip()
|
|
405
|
-
topic_id = title_link['href'].split('/')[-2]
|
|
406
|
-
|
|
407
|
-
columns = topic.find_all('td')
|
|
408
|
-
columns = [column.text for column in columns]
|
|
409
|
-
if len(columns) == 1:
|
|
410
|
-
# This is a sticky topic -> Skip it
|
|
411
|
-
continue
|
|
412
|
-
|
|
413
|
-
last_updated = columns[3].split(" ")[0] + " " + columns[3].split(" ")[1]
|
|
414
|
-
|
|
415
|
-
return_topics.append(ForumTopic(id=int(topic_id), title=title, category_name=category_name, last_updated=last_updated, reply_count=int(columns[1]), view_count=int(columns[2])))
|
|
416
|
-
return return_topics
|
|
417
|
-
except Exception as e:
|
|
418
|
-
raise exceptions.ScrapeError(str(e))
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
def youtube_link_to_scratch(link: str):
|
|
422
|
-
"""
|
|
423
|
-
Converts a YouTube url (in multiple formats) like https://youtu.be/1JTgg4WVAX8?si=fIEskaEaOIRZyTAz
|
|
424
|
-
to a link like https://scratch.mit.edu/discuss/youtube/1JTgg4WVAX8
|
|
425
|
-
"""
|
|
426
|
-
url_parse = urlparse(link)
|
|
427
|
-
query_parse = parse_qs(url_parse.query)
|
|
428
|
-
if 'v' in query_parse:
|
|
429
|
-
video_id = query_parse['v'][0]
|
|
430
|
-
else:
|
|
431
|
-
video_id = url_parse.path.split('/')[-1]
|
|
432
|
-
return f"https://scratch.mit.edu/discuss/youtube/{video_id}"
|