qcanvas 0.0.5.7a0__py3-none-any.whl → 1.0.3.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qcanvas might be problematic. Click here for more details.
- qcanvas/app_start/__init__.py +47 -0
- qcanvas/backend_connectors/__init__.py +2 -0
- qcanvas/backend_connectors/frontend_resource_manager.py +63 -0
- qcanvas/backend_connectors/qcanvas_task_master.py +28 -0
- qcanvas/icons/__init__.py +6 -0
- qcanvas/icons/file-download-failed.svg +6 -0
- qcanvas/icons/file-downloaded.svg +6 -0
- qcanvas/icons/file-not-downloaded.svg +6 -0
- qcanvas/icons/file-unknown.svg +6 -0
- qcanvas/icons/icons.qrc +4 -0
- qcanvas/icons/main_icon.svg +7 -7
- qcanvas/icons/rc_icons.py +580 -214
- qcanvas/icons/sync.svg +6 -6
- qcanvas/run.py +29 -0
- qcanvas/ui/course_viewer/__init__.py +2 -0
- qcanvas/ui/course_viewer/content_tree.py +123 -0
- qcanvas/ui/course_viewer/course_tree.py +93 -0
- qcanvas/ui/course_viewer/course_viewer.py +62 -0
- qcanvas/ui/course_viewer/tabs/__init__.py +3 -0
- qcanvas/ui/course_viewer/tabs/assignment_tab/__init__.py +1 -0
- qcanvas/ui/course_viewer/tabs/assignment_tab/assignment_tab.py +168 -0
- qcanvas/ui/course_viewer/tabs/assignment_tab/assignment_tree.py +104 -0
- qcanvas/ui/course_viewer/tabs/content_tab.py +96 -0
- qcanvas/ui/course_viewer/tabs/mail_tab/__init__.py +1 -0
- qcanvas/ui/course_viewer/tabs/mail_tab/mail_tab.py +68 -0
- qcanvas/ui/course_viewer/tabs/mail_tab/mail_tree.py +70 -0
- qcanvas/ui/course_viewer/tabs/page_tab/__init__.py +1 -0
- qcanvas/ui/course_viewer/tabs/page_tab/page_tab.py +36 -0
- qcanvas/ui/course_viewer/tabs/page_tab/page_tree.py +74 -0
- qcanvas/ui/course_viewer/tabs/resource_rich_browser.py +176 -0
- qcanvas/ui/course_viewer/tabs/util.py +1 -0
- qcanvas/ui/main_ui/course_viewer_container.py +52 -0
- qcanvas/ui/main_ui/options/__init__.py +3 -0
- qcanvas/ui/main_ui/options/quick_sync_option.py +25 -0
- qcanvas/ui/main_ui/options/sync_on_start_option.py +25 -0
- qcanvas/ui/main_ui/qcanvas_window.py +192 -0
- qcanvas/ui/main_ui/status_bar_progress_display.py +153 -0
- qcanvas/ui/memory_tree/__init__.py +2 -0
- qcanvas/ui/memory_tree/_tree_memory.py +66 -0
- qcanvas/ui/memory_tree/memory_tree_widget.py +133 -0
- qcanvas/ui/memory_tree/memory_tree_widget_item.py +19 -0
- qcanvas/ui/setup/__init__.py +2 -0
- qcanvas/ui/setup/setup_checker.py +17 -0
- qcanvas/ui/setup/setup_dialog.py +212 -0
- qcanvas/util/__init__.py +2 -0
- qcanvas/util/basic_fonts.py +12 -0
- qcanvas/util/fe_resource_manager.py +23 -0
- qcanvas/util/html_cleaner.py +25 -0
- qcanvas/util/layouts.py +52 -0
- qcanvas/util/logs.py +6 -0
- qcanvas/util/paths.py +41 -0
- qcanvas/util/settings/__init__.py +9 -0
- qcanvas/util/settings/_client_settings.py +29 -0
- qcanvas/util/settings/_mapped_setting.py +63 -0
- qcanvas/util/settings/_ui_settings.py +34 -0
- qcanvas/util/ui_tools.py +41 -0
- qcanvas/util/url_checker.py +13 -0
- qcanvas-1.0.3.post1.dist-info/METADATA +59 -0
- qcanvas-1.0.3.post1.dist-info/RECORD +64 -0
- {qcanvas-0.0.5.7a0.dist-info → qcanvas-1.0.3.post1.dist-info}/WHEEL +1 -1
- qcanvas-1.0.3.post1.dist-info/entry_points.txt +3 -0
- qcanvas/__main__.py +0 -155
- qcanvas/db/__init__.py +0 -5
- qcanvas/db/database.py +0 -338
- qcanvas/db/db_converter_helper.py +0 -81
- qcanvas/net/canvas/__init__.py +0 -2
- qcanvas/net/canvas/canvas_client.py +0 -209
- qcanvas/net/canvas/legacy_canvas_types.py +0 -124
- qcanvas/net/custom_httpx_async_transport.py +0 -34
- qcanvas/net/self_authenticating.py +0 -108
- qcanvas/queries/__init__.py +0 -4
- qcanvas/queries/all_courses.gql +0 -7
- qcanvas/queries/all_courses.py +0 -108
- qcanvas/queries/canvas_course_data.gql +0 -51
- qcanvas/queries/canvas_course_data.py +0 -143
- qcanvas/ui/container_item.py +0 -11
- qcanvas/ui/main_ui.py +0 -251
- qcanvas/ui/menu_bar/__init__.py +0 -0
- qcanvas/ui/menu_bar/grouping_preferences_menu.py +0 -61
- qcanvas/ui/menu_bar/theme_selection_menu.py +0 -39
- qcanvas/ui/setup_dialog.py +0 -190
- qcanvas/ui/status_bar_reporter.py +0 -40
- qcanvas/ui/viewer/__init__.py +0 -0
- qcanvas/ui/viewer/course_list.py +0 -96
- qcanvas/ui/viewer/file_list.py +0 -195
- qcanvas/ui/viewer/file_view_tab.py +0 -62
- qcanvas/ui/viewer/page_list_viewer.py +0 -150
- qcanvas/util/app_settings.py +0 -98
- qcanvas/util/constants.py +0 -5
- qcanvas/util/course_indexer/__init__.py +0 -1
- qcanvas/util/course_indexer/conversion_helpers.py +0 -78
- qcanvas/util/course_indexer/data_manager.py +0 -447
- qcanvas/util/course_indexer/resource_helpers.py +0 -191
- qcanvas/util/download_pool.py +0 -58
- qcanvas/util/helpers/__init__.py +0 -0
- qcanvas/util/helpers/canvas_sanitiser.py +0 -47
- qcanvas/util/helpers/file_icon_helper.py +0 -34
- qcanvas/util/helpers/qaction_helper.py +0 -25
- qcanvas/util/helpers/theme_helper.py +0 -48
- qcanvas/util/link_scanner/__init__.py +0 -2
- qcanvas/util/link_scanner/canvas_link_scanner.py +0 -41
- qcanvas/util/link_scanner/canvas_media_object_scanner.py +0 -60
- qcanvas/util/link_scanner/dropbox_scanner.py +0 -68
- qcanvas/util/link_scanner/resource_scanner.py +0 -69
- qcanvas/util/progress_reporter.py +0 -101
- qcanvas/util/self_updater.py +0 -55
- qcanvas/util/task_pool.py +0 -253
- qcanvas/util/tree_util/__init__.py +0 -3
- qcanvas/util/tree_util/expanding_tree.py +0 -165
- qcanvas/util/tree_util/model_helpers.py +0 -36
- qcanvas/util/tree_util/tree_model.py +0 -85
- qcanvas-0.0.5.7a0.dist-info/METADATA +0 -21
- qcanvas-0.0.5.7a0.dist-info/RECORD +0 -62
- /qcanvas/{net → ui/main_ui}/__init__.py +0 -0
|
@@ -1,447 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import logging
|
|
3
|
-
import traceback
|
|
4
|
-
from asyncio import Task
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
from typing import Sequence
|
|
7
|
-
|
|
8
|
-
from gql import gql
|
|
9
|
-
from sqlalchemy import select
|
|
10
|
-
from sqlalchemy.dialects.sqlite import insert as sqlite_upsert
|
|
11
|
-
from sqlalchemy.ext.asyncio.session import async_sessionmaker as AsyncSessionMaker, AsyncSession
|
|
12
|
-
from sqlalchemy.orm import selectin_polymorphic, selectinload
|
|
13
|
-
|
|
14
|
-
import qcanvas.db as db
|
|
15
|
-
import qcanvas.queries as queries
|
|
16
|
-
import qcanvas.util.course_indexer.conversion_helpers as conv_helper
|
|
17
|
-
import qcanvas.util.course_indexer.resource_helpers as resource_helper
|
|
18
|
-
from qcanvas.net.canvas import CanvasClient
|
|
19
|
-
from qcanvas.util.download_pool import DownloadPool
|
|
20
|
-
from qcanvas.util.link_scanner.canvas_link_scanner import canvas_resource_id_prefix
|
|
21
|
-
from qcanvas.util.link_scanner.resource_scanner import ResourceScanner
|
|
22
|
-
from qcanvas.util.progress_reporter import ProgressReporter, noop_reporter
|
|
23
|
-
from qcanvas.util.task_pool import TaskPool
|
|
24
|
-
|
|
25
|
-
_logger = logging.getLogger("course_loader")
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class TransientModulePage:
|
|
30
|
-
page: queries.Page | queries.File
|
|
31
|
-
course_id: str
|
|
32
|
-
module_id: str
|
|
33
|
-
position: int
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def _prepare_out_of_date_pages_for_loading(g_courses: Sequence[queries.Course], pages: Sequence[db.ModuleItem]) -> list[
|
|
37
|
-
TransientModulePage]:
|
|
38
|
-
"""
|
|
39
|
-
Removes pages that are up-to-date from the pages list by comparing the last update time of the pages from the query
|
|
40
|
-
to the last update time of the pages in the database.
|
|
41
|
-
|
|
42
|
-
Parameters
|
|
43
|
-
----------
|
|
44
|
-
g_courses
|
|
45
|
-
The list of courses (with module items) to check for an update.
|
|
46
|
-
pages
|
|
47
|
-
The list of pages already existing in the database.
|
|
48
|
-
Returns
|
|
49
|
-
-------
|
|
50
|
-
list[TransientModulePage]
|
|
51
|
-
A list of pages that have had an update to them.
|
|
52
|
-
"""
|
|
53
|
-
pages_id_mapped = {x.id: x for x in pages}
|
|
54
|
-
|
|
55
|
-
result: list[TransientModulePage] = []
|
|
56
|
-
|
|
57
|
-
for g_course in g_courses:
|
|
58
|
-
for g_module in g_course.modules_connection.nodes:
|
|
59
|
-
for item_position, g_module_item in enumerate(g_module.module_items):
|
|
60
|
-
content = g_module_item.content
|
|
61
|
-
|
|
62
|
-
if isinstance(content, (queries.File, queries.Page)):
|
|
63
|
-
# todo need to decide how to only rescan old pages or only rescan new pages without fetching content of old pages again for no good reason
|
|
64
|
-
if (
|
|
65
|
-
content.m_id not in pages_id_mapped
|
|
66
|
-
or content.updated_at.replace(tzinfo=None) > pages_id_mapped[content.m_id].updated_at
|
|
67
|
-
):
|
|
68
|
-
result.append(TransientModulePage(content, g_course.m_id, g_module.q_id, item_position))
|
|
69
|
-
else:
|
|
70
|
-
_logger.debug("Page %s is already up to date", content.m_id)
|
|
71
|
-
|
|
72
|
-
return result
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
# todo make this reusable and add some way of refreshing only a list of pages or one page or one course or something
|
|
76
|
-
# todo use logger instead of print and put some signals around the place for useful things, e.g. indexing progress
|
|
77
|
-
class DataManager:
|
|
78
|
-
"""
|
|
79
|
-
Responsible for storing all data pulled from canvas or other websites in the database.
|
|
80
|
-
Provides functions for synchronizing with canvas and downloading files.
|
|
81
|
-
"""
|
|
82
|
-
def __init__(self,
|
|
83
|
-
client: CanvasClient,
|
|
84
|
-
sessionmaker: AsyncSessionMaker,
|
|
85
|
-
link_scanners: Sequence[ResourceScanner]):
|
|
86
|
-
|
|
87
|
-
self.client = client
|
|
88
|
-
self._link_scanners = link_scanners
|
|
89
|
-
self._session_maker = sessionmaker
|
|
90
|
-
|
|
91
|
-
self._resource_pool = TaskPool[db.Resource]()
|
|
92
|
-
# todo just remove this thing it does nothing
|
|
93
|
-
self._moduleitem_pool = TaskPool[db.ModuleItem]()
|
|
94
|
-
self.download_pool = DownloadPool()
|
|
95
|
-
|
|
96
|
-
# Map all the scanners we have to their own name
|
|
97
|
-
self._scanner_name_map = {scanner.name: scanner for scanner in self._link_scanners}
|
|
98
|
-
|
|
99
|
-
self._init_called = False
|
|
100
|
-
|
|
101
|
-
async def init(self):
|
|
102
|
-
"""
|
|
103
|
-
Load existing pages and resources from the database, so they don't have to be fetched from canvas again
|
|
104
|
-
"""
|
|
105
|
-
self._init_called = True
|
|
106
|
-
|
|
107
|
-
async with self._session_maker.begin() as session:
|
|
108
|
-
# Load existing pages and resources from the database
|
|
109
|
-
existing_pages = (await session.execute(
|
|
110
|
-
select(db.ModuleItem)
|
|
111
|
-
.options(selectin_polymorphic(db.ModuleItem, [db.ModulePage]))
|
|
112
|
-
)).scalars().all()
|
|
113
|
-
|
|
114
|
-
existing_resources = (await session.execute(
|
|
115
|
-
select(db.Resource)
|
|
116
|
-
)).scalars().all()
|
|
117
|
-
|
|
118
|
-
# Add the existing items to the relevant taskpools
|
|
119
|
-
self._add_resources_and_pages_to_taskpool(existing_pages=existing_pages,
|
|
120
|
-
existing_resources=existing_resources)
|
|
121
|
-
|
|
122
|
-
async def _download_resource_helper(self, link_handler: ResourceScanner, resource: db.Resource):
|
|
123
|
-
try:
|
|
124
|
-
async for progress in link_handler.download(resource):
|
|
125
|
-
yield progress
|
|
126
|
-
|
|
127
|
-
# Do this here because this function will only be called once for this resource
|
|
128
|
-
async with self._session_maker.begin() as session:
|
|
129
|
-
session.add(resource)
|
|
130
|
-
resource.state = db.ResourceState.DOWNLOADED
|
|
131
|
-
except BaseException as e:
|
|
132
|
-
# Something went wrong, record the failure in the database
|
|
133
|
-
async with self._session_maker.begin() as session:
|
|
134
|
-
session.add(resource)
|
|
135
|
-
resource.state = db.ResourceState.FAILED
|
|
136
|
-
resource.fail_message = str(e)
|
|
137
|
-
|
|
138
|
-
raise e
|
|
139
|
-
|
|
140
|
-
async def download_resource(self, resource: db.Resource):
|
|
141
|
-
if not self._init_called:
|
|
142
|
-
raise Exception("Init was not called")
|
|
143
|
-
|
|
144
|
-
# Resource ids look like this: "canvas_file:387837", and we just want the "canvas_file" part
|
|
145
|
-
scanner_name: str = resource.id.split(':', 2)[0]
|
|
146
|
-
# Find the scanner that will deal with this resource
|
|
147
|
-
scanner = self._scanner_name_map[scanner_name]
|
|
148
|
-
|
|
149
|
-
await self.download_pool.submit(resource.id, lambda: self._download_resource_helper(scanner, resource))
|
|
150
|
-
|
|
151
|
-
async def update_item(self, item: db.Base):
|
|
152
|
-
async with self._session_maker.begin() as session:
|
|
153
|
-
await session.merge(item)
|
|
154
|
-
|
|
155
|
-
async def get_data(self):
|
|
156
|
-
"""
|
|
157
|
-
Loads all the course data
|
|
158
|
-
"""
|
|
159
|
-
async with self._session_maker.begin() as session:
|
|
160
|
-
module_items_load = selectinload(db.Course.modules).joinedload(db.Module.items)
|
|
161
|
-
|
|
162
|
-
# Eagerly load fucking everything
|
|
163
|
-
options = [
|
|
164
|
-
selectinload(db.Course.modules)
|
|
165
|
-
.joinedload(db.Module.course),
|
|
166
|
-
|
|
167
|
-
module_items_load.selectin_polymorphic([db.ModulePage, db.ModuleFile])
|
|
168
|
-
.joinedload(db.ModuleItem.module),
|
|
169
|
-
|
|
170
|
-
module_items_load.joinedload(db.ModuleItem.resources),
|
|
171
|
-
selectinload(db.Course.assignments)
|
|
172
|
-
.joinedload(db.Assignment.course),
|
|
173
|
-
|
|
174
|
-
selectinload(db.Course.assignments)
|
|
175
|
-
.joinedload(db.Assignment.resources),
|
|
176
|
-
|
|
177
|
-
selectinload(db.Course.term),
|
|
178
|
-
|
|
179
|
-
selectinload(db.Course.module_items)
|
|
180
|
-
.joinedload(db.ModuleItem.course),
|
|
181
|
-
|
|
182
|
-
selectinload(db.Course.preferences)
|
|
183
|
-
.joinedload(db.CoursePreferences.course),
|
|
184
|
-
|
|
185
|
-
selectinload(db.Course.resources)
|
|
186
|
-
.joinedload(db.Resource.course)
|
|
187
|
-
]
|
|
188
|
-
|
|
189
|
-
return (await session.execute(select(db.Course).options(*options))).scalars().all()
|
|
190
|
-
|
|
191
|
-
async def synchronize_with_canvas(self, progress_reporter: ProgressReporter = noop_reporter):
|
|
192
|
-
section = progress_reporter.section("Loading index", 0)
|
|
193
|
-
raw_query = (await self.client.do_graphql_query(gql(queries.all_courses.DEFINITION), detailed=True))
|
|
194
|
-
section.increment_progress()
|
|
195
|
-
|
|
196
|
-
await self.load_courses_data(queries.AllCoursesQueryData(**raw_query).all_courses, progress_reporter)
|
|
197
|
-
|
|
198
|
-
async def load_courses_data(self, g_courses: Sequence[queries.Course], progress_reporter: ProgressReporter):
|
|
199
|
-
"""
|
|
200
|
-
Loads data for all specified courses, including loading module pages and scanning for resources.
|
|
201
|
-
"""
|
|
202
|
-
|
|
203
|
-
if not self._init_called:
|
|
204
|
-
raise Exception("Init was not called")
|
|
205
|
-
|
|
206
|
-
try:
|
|
207
|
-
async with self._session_maker.begin() as session:
|
|
208
|
-
# Load module pages/files for the courses
|
|
209
|
-
await self._load_module_items(g_courses, session, progress_reporter)
|
|
210
|
-
|
|
211
|
-
# Collect assignments from the courses
|
|
212
|
-
assignments = []
|
|
213
|
-
|
|
214
|
-
for g_course in g_courses:
|
|
215
|
-
# Create needed data in the session
|
|
216
|
-
term = await conv_helper.create_term(g_course, session)
|
|
217
|
-
await conv_helper.create_course(g_course, session, term)
|
|
218
|
-
await conv_helper.create_modules(g_course, session)
|
|
219
|
-
|
|
220
|
-
# Add course assignments to the list
|
|
221
|
-
assignments.extend(await conv_helper.create_assignments(g_course, session))
|
|
222
|
-
|
|
223
|
-
# Scan assignments for resources
|
|
224
|
-
await self._scan_assignments_for_resources(assignments, session, progress_reporter)
|
|
225
|
-
|
|
226
|
-
# Add all resources back into the session
|
|
227
|
-
session.add_all(self._resource_pool.results())
|
|
228
|
-
progress_reporter.finished()
|
|
229
|
-
except BaseException as e:
|
|
230
|
-
traceback.print_exc()
|
|
231
|
-
progress_reporter.errored(e)
|
|
232
|
-
|
|
233
|
-
async def _scan_assignments_for_resources(self, assignments: Sequence[db.Assignment], session: AsyncSession,
|
|
234
|
-
progress_reporter: ProgressReporter):
|
|
235
|
-
"""
|
|
236
|
-
Scans assignments for resources
|
|
237
|
-
"""
|
|
238
|
-
|
|
239
|
-
# Link the resources found to each page in the database
|
|
240
|
-
await resource_helper.create_assignment_resource_relations(
|
|
241
|
-
# Find all the resources in each assignment description
|
|
242
|
-
await resource_helper.find_resources_in_pages(
|
|
243
|
-
link_scanners=self._link_scanners,
|
|
244
|
-
resource_pool=self._resource_pool,
|
|
245
|
-
items=assignments,
|
|
246
|
-
progress_reporter=progress_reporter
|
|
247
|
-
),
|
|
248
|
-
session
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
async def _load_module_items(self, g_courses: Sequence[queries.Course], session: AsyncSession,
|
|
252
|
-
progress_reporter: ProgressReporter):
|
|
253
|
-
# Get the ids of all the courses we are going to index/load
|
|
254
|
-
course_ids = [g_course.m_id for g_course in g_courses]
|
|
255
|
-
|
|
256
|
-
# Prepare pages for loading
|
|
257
|
-
existing_pages = (
|
|
258
|
-
await session.execute(
|
|
259
|
-
select(db.ModulePage)
|
|
260
|
-
.where(db.ModuleItem.course_id.in_(course_ids))
|
|
261
|
-
)).scalars().all()
|
|
262
|
-
|
|
263
|
-
# Filter out pages that don't need updating
|
|
264
|
-
pages_to_update = _prepare_out_of_date_pages_for_loading(g_courses, existing_pages)
|
|
265
|
-
|
|
266
|
-
if len(pages_to_update) == 0:
|
|
267
|
-
return
|
|
268
|
-
|
|
269
|
-
# Load the content for all the pages that need updating
|
|
270
|
-
module_items: list[db.ModuleItem] = await self._load_content_for_pages(pages_to_update, progress_reporter)
|
|
271
|
-
module_pages = [item for item in module_items if isinstance(item, db.ModulePage)]
|
|
272
|
-
|
|
273
|
-
# Link the resources found to the pages they were found on and add them to the database
|
|
274
|
-
await resource_helper.create_module_item_resource_relations(
|
|
275
|
-
# Find all the resources in each page
|
|
276
|
-
await resource_helper.find_resources_in_pages(
|
|
277
|
-
link_scanners=self._link_scanners,
|
|
278
|
-
resource_pool=self._resource_pool,
|
|
279
|
-
progress_reporter=progress_reporter,
|
|
280
|
-
# Collect just the module pages for scanning
|
|
281
|
-
items=module_pages
|
|
282
|
-
),
|
|
283
|
-
session
|
|
284
|
-
)
|
|
285
|
-
|
|
286
|
-
# empty inserts/upserts causes an sql error. don't do them
|
|
287
|
-
if len(module_pages) > 0:
|
|
288
|
-
# Add all the module items to the session
|
|
289
|
-
# shitty bandaid fix
|
|
290
|
-
upsert_item = sqlite_upsert(db.ModuleItem).values([self.moduleitem_dict(item) for item in module_pages])
|
|
291
|
-
upsert_item = upsert_item.on_conflict_do_update(
|
|
292
|
-
index_elements=[db.ModuleItem.id],
|
|
293
|
-
set_=dict(name=upsert_item.excluded.name, updated_at=upsert_item.excluded.updated_at,
|
|
294
|
-
position=upsert_item.excluded.position),
|
|
295
|
-
|
|
296
|
-
)
|
|
297
|
-
|
|
298
|
-
upsert_page = sqlite_upsert(db.ModulePage).values([self.page_dict(item) for item in module_pages])
|
|
299
|
-
upsert_page = upsert_page.on_conflict_do_update(
|
|
300
|
-
index_elements=[db.ModulePage.id],
|
|
301
|
-
set_=dict(content=upsert_page.excluded.content)
|
|
302
|
-
)
|
|
303
|
-
|
|
304
|
-
await session.execute(upsert_item)
|
|
305
|
-
await session.execute(upsert_page)
|
|
306
|
-
|
|
307
|
-
session.add_all([item for item in module_items if isinstance(item, db.ModuleFile)])
|
|
308
|
-
|
|
309
|
-
@staticmethod
|
|
310
|
-
def page_dict(page: db.ModulePage) -> dict[str, object]:
|
|
311
|
-
return {"id": page.id, "content": page.content}
|
|
312
|
-
|
|
313
|
-
@staticmethod
|
|
314
|
-
def moduleitem_dict(page: db.ModuleItem) -> dict[str, object]:
|
|
315
|
-
return {"id": page.id, "name": page.name, "updated_at": page.updated_at, "position": page.position,
|
|
316
|
-
"module_id": page.module_id, "course_id": page.course_id, "type": page.type,
|
|
317
|
-
"created_at": page.created_at}
|
|
318
|
-
|
|
319
|
-
def _add_resources_and_pages_to_taskpool(self, existing_pages: Sequence[db.ModuleItem],
|
|
320
|
-
existing_resources: Sequence[db.Resource]):
|
|
321
|
-
self._moduleitem_pool.add_values({page.id: page for page in existing_pages})
|
|
322
|
-
self._resource_pool.add_values({resource.id: resource for resource in existing_resources})
|
|
323
|
-
# Add downloaded resources to the resource pool so we don't download them again
|
|
324
|
-
self.download_pool.add_values(
|
|
325
|
-
{resource.id: None for resource in existing_resources if resource.state == db.ResourceState.DOWNLOADED})
|
|
326
|
-
|
|
327
|
-
async def _load_content_for_pages(self, pages: Sequence[TransientModulePage],
|
|
328
|
-
progress_reporter: ProgressReporter) -> list[db.ModuleItem]:
|
|
329
|
-
"""
|
|
330
|
-
Loads the page content for the specified pages
|
|
331
|
-
Parameters
|
|
332
|
-
----------
|
|
333
|
-
pages
|
|
334
|
-
The pages to load
|
|
335
|
-
Returns
|
|
336
|
-
-------
|
|
337
|
-
list
|
|
338
|
-
The list of complete pages with page content loaded.
|
|
339
|
-
"""
|
|
340
|
-
progress = progress_reporter.section("Loading page content", len(pages))
|
|
341
|
-
tasks: list[Task[db.ModuleItem | None]] = []
|
|
342
|
-
|
|
343
|
-
for page in pages:
|
|
344
|
-
content = page.page
|
|
345
|
-
|
|
346
|
-
# Load the content for the pages
|
|
347
|
-
if isinstance(content, queries.File):
|
|
348
|
-
task = asyncio.create_task(
|
|
349
|
-
self._load_module_file(content, page.course_id, page.module_id, page.position))
|
|
350
|
-
task.add_done_callback(progress.increment_progress)
|
|
351
|
-
tasks.append(task)
|
|
352
|
-
elif isinstance(content, queries.Page):
|
|
353
|
-
task = asyncio.create_task(
|
|
354
|
-
self.load_module_page(content, page.course_id, page.module_id, page.position))
|
|
355
|
-
task.add_done_callback(progress.increment_progress)
|
|
356
|
-
tasks.append(task)
|
|
357
|
-
|
|
358
|
-
if len(tasks) > 0:
|
|
359
|
-
await asyncio.wait(tasks)
|
|
360
|
-
|
|
361
|
-
# Collect results and filter out nulls
|
|
362
|
-
return [task.result() for task in tasks if task.result() is not None]
|
|
363
|
-
else:
|
|
364
|
-
return []
|
|
365
|
-
|
|
366
|
-
async def _load_module_file(self, g_file: queries.File, course_id: str, module_id: str,
|
|
367
|
-
position: int) -> db.ModuleFile:
|
|
368
|
-
"""
|
|
369
|
-
Fetches resource information for the module file and converts it into a module item
|
|
370
|
-
"""
|
|
371
|
-
_logger.debug(f"Loading module file %s %s", g_file.m_id, g_file.display_name)
|
|
372
|
-
|
|
373
|
-
resource = await self._resource_pool.submit(
|
|
374
|
-
f"{canvas_resource_id_prefix}:{g_file.m_id}", # to match the format used by canvas link extractor
|
|
375
|
-
lambda: self._fetch_module_file_resource(g_file, course_id)
|
|
376
|
-
)
|
|
377
|
-
|
|
378
|
-
return await self._moduleitem_pool.submit(
|
|
379
|
-
g_file.m_id,
|
|
380
|
-
lambda: self._fetch_module_file_page(g_file, resource, course_id, module_id, position)
|
|
381
|
-
)
|
|
382
|
-
|
|
383
|
-
async def _fetch_module_file_resource(self, file: queries.File, course_id: str) -> db.Resource:
|
|
384
|
-
"""
|
|
385
|
-
Fetches information about the specified file from canvas
|
|
386
|
-
"""
|
|
387
|
-
_logger.debug(f"Fetching file (for module file) %s %s", file.m_id, file.display_name)
|
|
388
|
-
result = await self.client.get_file(file.m_id, course_id)
|
|
389
|
-
resource = db.convert_file(file, result.size)
|
|
390
|
-
resource.id = f"{canvas_resource_id_prefix}:{resource.id}"
|
|
391
|
-
resource.course_id = course_id
|
|
392
|
-
|
|
393
|
-
return resource
|
|
394
|
-
|
|
395
|
-
async def load_module_page(self, g_page: queries.Page, course_id: str, module_id: str,
|
|
396
|
-
position: int) -> db.ModulePage | None:
|
|
397
|
-
"""
|
|
398
|
-
Creates task for loading the specified module page
|
|
399
|
-
"""
|
|
400
|
-
return await self._fetch_module_item_page(g_page, course_id, module_id, position)
|
|
401
|
-
|
|
402
|
-
async def _fetch_module_item_page(self, page: queries.Page, course_id: str, module_id: str,
|
|
403
|
-
position: int) -> db.ModulePage | None:
|
|
404
|
-
"""
|
|
405
|
-
Fetches module page content from canvas. Returns None if the page could not be loaded.
|
|
406
|
-
"""
|
|
407
|
-
_logger.debug("Fetching module page %s %s", page.m_id, page.title)
|
|
408
|
-
|
|
409
|
-
try:
|
|
410
|
-
# Get the page
|
|
411
|
-
result = await self.client.get_page(page.m_id, course_id)
|
|
412
|
-
except BaseException as e:
|
|
413
|
-
# Handle any errors
|
|
414
|
-
_logger.error(e)
|
|
415
|
-
traceback.print_exc()
|
|
416
|
-
return None
|
|
417
|
-
|
|
418
|
-
if result.locked_for_user:
|
|
419
|
-
_logger.error("Page %s %s is locked", page.m_id, page.title)
|
|
420
|
-
return None
|
|
421
|
-
|
|
422
|
-
page = db.convert_page(page, result.body)
|
|
423
|
-
page.module_id = module_id
|
|
424
|
-
page.course_id = course_id
|
|
425
|
-
page.position = position
|
|
426
|
-
|
|
427
|
-
return page
|
|
428
|
-
|
|
429
|
-
@staticmethod
|
|
430
|
-
async def _fetch_module_file_page(file: queries.File, resource: db.Resource, course_id: str,
|
|
431
|
-
module_id: str, position: int) -> db.ModuleFile:
|
|
432
|
-
"""
|
|
433
|
-
Converts module file information into the database format
|
|
434
|
-
"""
|
|
435
|
-
_logger.debug(f"Creating page for module file %s %s", file.m_id, file.display_name)
|
|
436
|
-
|
|
437
|
-
page = db.convert_file_page(file)
|
|
438
|
-
page.module_id = module_id
|
|
439
|
-
page.course_id = course_id
|
|
440
|
-
page.position = position
|
|
441
|
-
page.resources.append(resource)
|
|
442
|
-
|
|
443
|
-
return page
|
|
444
|
-
|
|
445
|
-
@property
|
|
446
|
-
def link_scanners(self):
|
|
447
|
-
return self._link_scanners
|
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import logging
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from typing import Sequence
|
|
5
|
-
|
|
6
|
-
from bs4 import Tag, BeautifulSoup
|
|
7
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
|
-
|
|
9
|
-
import qcanvas.db as db
|
|
10
|
-
from qcanvas.util.link_scanner import ResourceScanner
|
|
11
|
-
from qcanvas.util.progress_reporter import ProgressReporter
|
|
12
|
-
from qcanvas.util.task_pool import TaskPool
|
|
13
|
-
|
|
14
|
-
_logger = logging.getLogger(__name__)
|
|
15
|
-
|
|
16
|
-
resource_elements = ["a", "iframe", "img"]
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
# todo could probably just use the database types directly now
|
|
20
|
-
@dataclass
|
|
21
|
-
class TransientResourceToPageLink:
|
|
22
|
-
"""
|
|
23
|
-
Represents a temporary link between a page and a resource that will be added to the database soon.
|
|
24
|
-
"""
|
|
25
|
-
page_id: str
|
|
26
|
-
resource_id: str
|
|
27
|
-
|
|
28
|
-
def __hash__(self):
|
|
29
|
-
return hash(self.page_id) ^ hash(self.resource_id)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
async def create_module_item_resource_relations(relations: Sequence[TransientResourceToPageLink],
|
|
33
|
-
session: AsyncSession):
|
|
34
|
-
"""
|
|
35
|
-
Creates a link between module items/pages and resources found on those pages
|
|
36
|
-
"""
|
|
37
|
-
for relation in relations:
|
|
38
|
-
existing_relation = await session.get(
|
|
39
|
-
db.ResourceToModuleItemAssociation,
|
|
40
|
-
(relation.page_id, relation.resource_id)
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
if existing_relation is None:
|
|
44
|
-
session.add(
|
|
45
|
-
db.ResourceToModuleItemAssociation(
|
|
46
|
-
module_item_id=relation.page_id,
|
|
47
|
-
resource_id=relation.resource_id
|
|
48
|
-
)
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
async def create_assignment_resource_relations(relations: Sequence[TransientResourceToPageLink], session: AsyncSession):
|
|
53
|
-
"""
|
|
54
|
-
Turns temporary TransientResourceToPageLink into a persistent relation in the database
|
|
55
|
-
"""
|
|
56
|
-
for relation in relations:
|
|
57
|
-
if await session.get(db.ResourceToAssignmentAssociation, (relation.page_id, relation.resource_id)) is None:
|
|
58
|
-
session.add(
|
|
59
|
-
db.ResourceToAssignmentAssociation(
|
|
60
|
-
assignment_id=relation.page_id,
|
|
61
|
-
resource_id=relation.resource_id
|
|
62
|
-
)
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
# todo change resource system to think of resources as links on a page with a shallow id (that may be the same as the deep id) which links to one or more deep ids
|
|
67
|
-
async def find_resources_in_pages(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource],
|
|
68
|
-
items: Sequence[db.PageLike], progress_reporter: ProgressReporter) -> list[
|
|
69
|
-
TransientResourceToPageLink]:
|
|
70
|
-
"""
|
|
71
|
-
Produce a list of resource to page links from resources extracted from the specified `items` using `link_scanners`.
|
|
72
|
-
Extracted resources will be added to `resource_pool`
|
|
73
|
-
"""
|
|
74
|
-
progress = progress_reporter.section("Indexing resources", len(items))
|
|
75
|
-
tasks = []
|
|
76
|
-
|
|
77
|
-
for item in items:
|
|
78
|
-
# Assignment descriptions may be null. Avoid creating extra tasks by checking here
|
|
79
|
-
if item.content is None:
|
|
80
|
-
progress.increment_progress()
|
|
81
|
-
continue
|
|
82
|
-
|
|
83
|
-
# extract resources from the page
|
|
84
|
-
task = asyncio.create_task(_extract_resources_from_page(link_scanners, resource_pool, item))
|
|
85
|
-
task.add_done_callback(progress.increment_progress)
|
|
86
|
-
tasks.append(task)
|
|
87
|
-
|
|
88
|
-
if len(tasks) > 0:
|
|
89
|
-
# Wait for all tasks to complete
|
|
90
|
-
await asyncio.wait(tasks)
|
|
91
|
-
|
|
92
|
-
result = []
|
|
93
|
-
# Flatten the array of results
|
|
94
|
-
for task in tasks:
|
|
95
|
-
result.extend(task.result())
|
|
96
|
-
|
|
97
|
-
return result
|
|
98
|
-
else:
|
|
99
|
-
return []
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
async def _extract_resources_from_page(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource],
|
|
103
|
-
page: db.PageLike) -> list[TransientResourceToPageLink]:
|
|
104
|
-
"""
|
|
105
|
-
Extracts any detected resource links from the specified page and then uses `link_scanners` to extract information
|
|
106
|
-
about which is then added to the `resource_pool`.
|
|
107
|
-
|
|
108
|
-
Returns
|
|
109
|
-
-------
|
|
110
|
-
list
|
|
111
|
-
A list of resource to page links for any resources found on this page.
|
|
112
|
-
"""
|
|
113
|
-
_logger.debug(f"Scanning %s %s for files", page.id, page.name)
|
|
114
|
-
tasks = []
|
|
115
|
-
|
|
116
|
-
# Extract iframes, hyperlinks, etc from the page
|
|
117
|
-
for link in _scan_page_for_links(page):
|
|
118
|
-
tasks.append(asyncio.create_task(_process_link(link_scanners, resource_pool, link, page.course_id)))
|
|
119
|
-
|
|
120
|
-
if len(tasks) > 0:
|
|
121
|
-
# Wait for all tasks to complete
|
|
122
|
-
await asyncio.wait(tasks)
|
|
123
|
-
|
|
124
|
-
task_results = [task.result() for task in tasks]
|
|
125
|
-
|
|
126
|
-
# Convert every non-null result in the task results to a resource page link and return it
|
|
127
|
-
return [
|
|
128
|
-
TransientResourceToPageLink(page_id=page.id, resource_id=result.id)
|
|
129
|
-
for result in task_results if result is not None
|
|
130
|
-
]
|
|
131
|
-
else:
|
|
132
|
-
return []
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
def _scan_page_for_links(page: db.PageLike) -> list[Tag]:
|
|
136
|
-
"""
|
|
137
|
-
Extracts (potential) resource elements from a PageLike object
|
|
138
|
-
"""
|
|
139
|
-
soup = BeautifulSoup(page.content, 'html.parser')
|
|
140
|
-
return list(soup.find_all(resource_elements))
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
async def _process_link(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource], link: Tag,
|
|
144
|
-
course_id: str) -> db.Resource | None:
|
|
145
|
-
"""
|
|
146
|
-
Iterates over `link_scanners` to find one that will accept `link`, then uses it to fetch resource information and
|
|
147
|
-
adds it to the `resource_pool`.
|
|
148
|
-
If no scanner accepts the link then None is returned.
|
|
149
|
-
"""
|
|
150
|
-
for scanner in link_scanners:
|
|
151
|
-
if scanner.accepts_link(link):
|
|
152
|
-
resource_id = scanner.extract_id(link)
|
|
153
|
-
|
|
154
|
-
return await resource_pool.submit(
|
|
155
|
-
f"{scanner.name}:{resource_id}", # match the format used by the resource id
|
|
156
|
-
lambda: _extract_file_info(link, scanner, resource_id, course_id)
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
return None
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
async def _extract_file_info(link: Tag, scanner: ResourceScanner, resource_id: str,
|
|
163
|
-
course_id: str) -> db.Resource | None:
|
|
164
|
-
"""
|
|
165
|
-
Extracts file info from `link` using `scanner` and assigns the course_id to the resulting resource.
|
|
166
|
-
|
|
167
|
-
Parameters
|
|
168
|
-
----------
|
|
169
|
-
link
|
|
170
|
-
The html element to scan
|
|
171
|
-
scanner
|
|
172
|
-
The scanner to process the link with
|
|
173
|
-
course_id
|
|
174
|
-
The id of the course the file belongs to
|
|
175
|
-
Returns
|
|
176
|
-
-------
|
|
177
|
-
db.Resource
|
|
178
|
-
The resource if the link was processed successfully.
|
|
179
|
-
None
|
|
180
|
-
If processing failed
|
|
181
|
-
"""
|
|
182
|
-
try:
|
|
183
|
-
_logger.debug(f"Fetching info for file %s with scanner %s", scanner.extract_id(link), scanner.name)
|
|
184
|
-
|
|
185
|
-
result = await scanner.extract_resource(link, resource_id)
|
|
186
|
-
result.id = f"{scanner.name}:{result.id}" # Prefix the scanner name to prevent resources from different sites potentially clashing
|
|
187
|
-
result.course_id = course_id
|
|
188
|
-
return result
|
|
189
|
-
except BaseException as e:
|
|
190
|
-
_logger.error(f"Failed to retrieve info for file id %s: %s", f"{scanner.name}:{resource_id}", str(e))
|
|
191
|
-
return None
|
qcanvas/util/download_pool.py
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from typing import Callable, Any
|
|
3
|
-
|
|
4
|
-
from PySide6.QtCore import QObject, Signal
|
|
5
|
-
|
|
6
|
-
from qcanvas.util.task_pool import TaskPool
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class DownloadPool(TaskPool[None], QObject):
|
|
10
|
-
"""
|
|
11
|
-
Like the TaskPool, but uses a queue (a channel in kotlin terms) in the task to report download progress.
|
|
12
|
-
When using .submit(), it is EXPECTED that the lambda returns a function that uses yield to signal progress, e.g:
|
|
13
|
-
```
|
|
14
|
-
pool.submit(my_task_id, lambda: my_func())
|
|
15
|
-
|
|
16
|
-
async def my_func(channel: asyncio.Queue):
|
|
17
|
-
for i in range(0, 10):
|
|
18
|
-
await slow_thing()
|
|
19
|
-
yield (i / 10) # can be whatever format/type you want
|
|
20
|
-
```
|
|
21
|
-
"""
|
|
22
|
-
download_progress_updated = Signal(object, Any)
|
|
23
|
-
download_failed = Signal(object)
|
|
24
|
-
download_finished = Signal(object)
|
|
25
|
-
|
|
26
|
-
def __init__(self, parent: QObject | None = None):
|
|
27
|
-
QObject.__init__(self, parent)
|
|
28
|
-
TaskPool.__init__(self)
|
|
29
|
-
|
|
30
|
-
async def _handle_task(self, func: Callable, task_id: object, event: asyncio.Event, func_args: dict):
|
|
31
|
-
sem = self._semaphore
|
|
32
|
-
|
|
33
|
-
try:
|
|
34
|
-
# Consume progress updates 'yield'ed from the function
|
|
35
|
-
async for progress in func(**func_args):
|
|
36
|
-
# Fire the download progress update signal
|
|
37
|
-
self.download_progress_updated.emit(task_id, progress)
|
|
38
|
-
except BaseException as e:
|
|
39
|
-
# Try to maintain integrity when a task fails
|
|
40
|
-
async with sem:
|
|
41
|
-
# Release anything else waiting for this task
|
|
42
|
-
event.set()
|
|
43
|
-
# Remove the record from the results map
|
|
44
|
-
del self._results[task_id]
|
|
45
|
-
|
|
46
|
-
# Emit failure signal and rethrow
|
|
47
|
-
self.download_failed.emit(task_id)
|
|
48
|
-
raise e
|
|
49
|
-
|
|
50
|
-
async with sem:
|
|
51
|
-
self._logger.debug("Task %s finished.", task_id)
|
|
52
|
-
# Record this task as done
|
|
53
|
-
self._results[task_id] = None
|
|
54
|
-
event.set()
|
|
55
|
-
|
|
56
|
-
self.download_finished.emit(task_id)
|
|
57
|
-
|
|
58
|
-
return None
|
qcanvas/util/helpers/__init__.py
DELETED
|
File without changes
|