qcanvas 0.0.5.7a0__py3-none-any.whl → 1.0.3.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qcanvas might be problematic. Click here for more details.

Files changed (114) hide show
  1. qcanvas/app_start/__init__.py +47 -0
  2. qcanvas/backend_connectors/__init__.py +2 -0
  3. qcanvas/backend_connectors/frontend_resource_manager.py +63 -0
  4. qcanvas/backend_connectors/qcanvas_task_master.py +28 -0
  5. qcanvas/icons/__init__.py +6 -0
  6. qcanvas/icons/file-download-failed.svg +6 -0
  7. qcanvas/icons/file-downloaded.svg +6 -0
  8. qcanvas/icons/file-not-downloaded.svg +6 -0
  9. qcanvas/icons/file-unknown.svg +6 -0
  10. qcanvas/icons/icons.qrc +4 -0
  11. qcanvas/icons/main_icon.svg +7 -7
  12. qcanvas/icons/rc_icons.py +580 -214
  13. qcanvas/icons/sync.svg +6 -6
  14. qcanvas/run.py +29 -0
  15. qcanvas/ui/course_viewer/__init__.py +2 -0
  16. qcanvas/ui/course_viewer/content_tree.py +123 -0
  17. qcanvas/ui/course_viewer/course_tree.py +93 -0
  18. qcanvas/ui/course_viewer/course_viewer.py +62 -0
  19. qcanvas/ui/course_viewer/tabs/__init__.py +3 -0
  20. qcanvas/ui/course_viewer/tabs/assignment_tab/__init__.py +1 -0
  21. qcanvas/ui/course_viewer/tabs/assignment_tab/assignment_tab.py +168 -0
  22. qcanvas/ui/course_viewer/tabs/assignment_tab/assignment_tree.py +104 -0
  23. qcanvas/ui/course_viewer/tabs/content_tab.py +96 -0
  24. qcanvas/ui/course_viewer/tabs/mail_tab/__init__.py +1 -0
  25. qcanvas/ui/course_viewer/tabs/mail_tab/mail_tab.py +68 -0
  26. qcanvas/ui/course_viewer/tabs/mail_tab/mail_tree.py +70 -0
  27. qcanvas/ui/course_viewer/tabs/page_tab/__init__.py +1 -0
  28. qcanvas/ui/course_viewer/tabs/page_tab/page_tab.py +36 -0
  29. qcanvas/ui/course_viewer/tabs/page_tab/page_tree.py +74 -0
  30. qcanvas/ui/course_viewer/tabs/resource_rich_browser.py +176 -0
  31. qcanvas/ui/course_viewer/tabs/util.py +1 -0
  32. qcanvas/ui/main_ui/course_viewer_container.py +52 -0
  33. qcanvas/ui/main_ui/options/__init__.py +3 -0
  34. qcanvas/ui/main_ui/options/quick_sync_option.py +25 -0
  35. qcanvas/ui/main_ui/options/sync_on_start_option.py +25 -0
  36. qcanvas/ui/main_ui/qcanvas_window.py +192 -0
  37. qcanvas/ui/main_ui/status_bar_progress_display.py +153 -0
  38. qcanvas/ui/memory_tree/__init__.py +2 -0
  39. qcanvas/ui/memory_tree/_tree_memory.py +66 -0
  40. qcanvas/ui/memory_tree/memory_tree_widget.py +133 -0
  41. qcanvas/ui/memory_tree/memory_tree_widget_item.py +19 -0
  42. qcanvas/ui/setup/__init__.py +2 -0
  43. qcanvas/ui/setup/setup_checker.py +17 -0
  44. qcanvas/ui/setup/setup_dialog.py +212 -0
  45. qcanvas/util/__init__.py +2 -0
  46. qcanvas/util/basic_fonts.py +12 -0
  47. qcanvas/util/fe_resource_manager.py +23 -0
  48. qcanvas/util/html_cleaner.py +25 -0
  49. qcanvas/util/layouts.py +52 -0
  50. qcanvas/util/logs.py +6 -0
  51. qcanvas/util/paths.py +41 -0
  52. qcanvas/util/settings/__init__.py +9 -0
  53. qcanvas/util/settings/_client_settings.py +29 -0
  54. qcanvas/util/settings/_mapped_setting.py +63 -0
  55. qcanvas/util/settings/_ui_settings.py +34 -0
  56. qcanvas/util/ui_tools.py +41 -0
  57. qcanvas/util/url_checker.py +13 -0
  58. qcanvas-1.0.3.post0.dist-info/METADATA +61 -0
  59. qcanvas-1.0.3.post0.dist-info/RECORD +64 -0
  60. {qcanvas-0.0.5.7a0.dist-info → qcanvas-1.0.3.post0.dist-info}/WHEEL +1 -1
  61. qcanvas-1.0.3.post0.dist-info/entry_points.txt +3 -0
  62. qcanvas/__main__.py +0 -155
  63. qcanvas/db/__init__.py +0 -5
  64. qcanvas/db/database.py +0 -338
  65. qcanvas/db/db_converter_helper.py +0 -81
  66. qcanvas/net/canvas/__init__.py +0 -2
  67. qcanvas/net/canvas/canvas_client.py +0 -209
  68. qcanvas/net/canvas/legacy_canvas_types.py +0 -124
  69. qcanvas/net/custom_httpx_async_transport.py +0 -34
  70. qcanvas/net/self_authenticating.py +0 -108
  71. qcanvas/queries/__init__.py +0 -4
  72. qcanvas/queries/all_courses.gql +0 -7
  73. qcanvas/queries/all_courses.py +0 -108
  74. qcanvas/queries/canvas_course_data.gql +0 -51
  75. qcanvas/queries/canvas_course_data.py +0 -143
  76. qcanvas/ui/container_item.py +0 -11
  77. qcanvas/ui/main_ui.py +0 -251
  78. qcanvas/ui/menu_bar/__init__.py +0 -0
  79. qcanvas/ui/menu_bar/grouping_preferences_menu.py +0 -61
  80. qcanvas/ui/menu_bar/theme_selection_menu.py +0 -39
  81. qcanvas/ui/setup_dialog.py +0 -190
  82. qcanvas/ui/status_bar_reporter.py +0 -40
  83. qcanvas/ui/viewer/__init__.py +0 -0
  84. qcanvas/ui/viewer/course_list.py +0 -96
  85. qcanvas/ui/viewer/file_list.py +0 -195
  86. qcanvas/ui/viewer/file_view_tab.py +0 -62
  87. qcanvas/ui/viewer/page_list_viewer.py +0 -150
  88. qcanvas/util/app_settings.py +0 -98
  89. qcanvas/util/constants.py +0 -5
  90. qcanvas/util/course_indexer/__init__.py +0 -1
  91. qcanvas/util/course_indexer/conversion_helpers.py +0 -78
  92. qcanvas/util/course_indexer/data_manager.py +0 -447
  93. qcanvas/util/course_indexer/resource_helpers.py +0 -191
  94. qcanvas/util/download_pool.py +0 -58
  95. qcanvas/util/helpers/__init__.py +0 -0
  96. qcanvas/util/helpers/canvas_sanitiser.py +0 -47
  97. qcanvas/util/helpers/file_icon_helper.py +0 -34
  98. qcanvas/util/helpers/qaction_helper.py +0 -25
  99. qcanvas/util/helpers/theme_helper.py +0 -48
  100. qcanvas/util/link_scanner/__init__.py +0 -2
  101. qcanvas/util/link_scanner/canvas_link_scanner.py +0 -41
  102. qcanvas/util/link_scanner/canvas_media_object_scanner.py +0 -60
  103. qcanvas/util/link_scanner/dropbox_scanner.py +0 -68
  104. qcanvas/util/link_scanner/resource_scanner.py +0 -69
  105. qcanvas/util/progress_reporter.py +0 -101
  106. qcanvas/util/self_updater.py +0 -55
  107. qcanvas/util/task_pool.py +0 -253
  108. qcanvas/util/tree_util/__init__.py +0 -3
  109. qcanvas/util/tree_util/expanding_tree.py +0 -165
  110. qcanvas/util/tree_util/model_helpers.py +0 -36
  111. qcanvas/util/tree_util/tree_model.py +0 -85
  112. qcanvas-0.0.5.7a0.dist-info/METADATA +0 -21
  113. qcanvas-0.0.5.7a0.dist-info/RECORD +0 -62
  114. /qcanvas/{net → ui/main_ui}/__init__.py +0 -0
@@ -1,447 +0,0 @@
1
- import asyncio
2
- import logging
3
- import traceback
4
- from asyncio import Task
5
- from dataclasses import dataclass
6
- from typing import Sequence
7
-
8
- from gql import gql
9
- from sqlalchemy import select
10
- from sqlalchemy.dialects.sqlite import insert as sqlite_upsert
11
- from sqlalchemy.ext.asyncio.session import async_sessionmaker as AsyncSessionMaker, AsyncSession
12
- from sqlalchemy.orm import selectin_polymorphic, selectinload
13
-
14
- import qcanvas.db as db
15
- import qcanvas.queries as queries
16
- import qcanvas.util.course_indexer.conversion_helpers as conv_helper
17
- import qcanvas.util.course_indexer.resource_helpers as resource_helper
18
- from qcanvas.net.canvas import CanvasClient
19
- from qcanvas.util.download_pool import DownloadPool
20
- from qcanvas.util.link_scanner.canvas_link_scanner import canvas_resource_id_prefix
21
- from qcanvas.util.link_scanner.resource_scanner import ResourceScanner
22
- from qcanvas.util.progress_reporter import ProgressReporter, noop_reporter
23
- from qcanvas.util.task_pool import TaskPool
24
-
25
- _logger = logging.getLogger("course_loader")
26
-
27
-
28
- @dataclass
29
- class TransientModulePage:
30
- page: queries.Page | queries.File
31
- course_id: str
32
- module_id: str
33
- position: int
34
-
35
-
36
- def _prepare_out_of_date_pages_for_loading(g_courses: Sequence[queries.Course], pages: Sequence[db.ModuleItem]) -> list[
37
- TransientModulePage]:
38
- """
39
- Removes pages that are up-to-date from the pages list by comparing the last update time of the pages from the query
40
- to the last update time of the pages in the database.
41
-
42
- Parameters
43
- ----------
44
- g_courses
45
- The list of courses (with module items) to check for an update.
46
- pages
47
- The list of pages already existing in the database.
48
- Returns
49
- -------
50
- list[TransientModulePage]
51
- A list of pages that have had an update to them.
52
- """
53
- pages_id_mapped = {x.id: x for x in pages}
54
-
55
- result: list[TransientModulePage] = []
56
-
57
- for g_course in g_courses:
58
- for g_module in g_course.modules_connection.nodes:
59
- for item_position, g_module_item in enumerate(g_module.module_items):
60
- content = g_module_item.content
61
-
62
- if isinstance(content, (queries.File, queries.Page)):
63
- # todo need to decide how to only rescan old pages or only rescan new pages without fetching content of old pages again for no good reason
64
- if (
65
- content.m_id not in pages_id_mapped
66
- or content.updated_at.replace(tzinfo=None) > pages_id_mapped[content.m_id].updated_at
67
- ):
68
- result.append(TransientModulePage(content, g_course.m_id, g_module.q_id, item_position))
69
- else:
70
- _logger.debug("Page %s is already up to date", content.m_id)
71
-
72
- return result
73
-
74
-
75
- # todo make this reusable and add some way of refreshing only a list of pages or one page or one course or something
76
- # todo use logger instead of print and put some signals around the place for useful things, e.g. indexing progress
77
- class DataManager:
78
- """
79
- Responsible for storing all data pulled from canvas or other websites in the database.
80
- Provides functions for synchronizing with canvas and downloading files.
81
- """
82
- def __init__(self,
83
- client: CanvasClient,
84
- sessionmaker: AsyncSessionMaker,
85
- link_scanners: Sequence[ResourceScanner]):
86
-
87
- self.client = client
88
- self._link_scanners = link_scanners
89
- self._session_maker = sessionmaker
90
-
91
- self._resource_pool = TaskPool[db.Resource]()
92
- # todo just remove this thing it does nothing
93
- self._moduleitem_pool = TaskPool[db.ModuleItem]()
94
- self.download_pool = DownloadPool()
95
-
96
- # Map all the scanners we have to their own name
97
- self._scanner_name_map = {scanner.name: scanner for scanner in self._link_scanners}
98
-
99
- self._init_called = False
100
-
101
- async def init(self):
102
- """
103
- Load existing pages and resources from the database, so they don't have to be fetched from canvas again
104
- """
105
- self._init_called = True
106
-
107
- async with self._session_maker.begin() as session:
108
- # Load existing pages and resources from the database
109
- existing_pages = (await session.execute(
110
- select(db.ModuleItem)
111
- .options(selectin_polymorphic(db.ModuleItem, [db.ModulePage]))
112
- )).scalars().all()
113
-
114
- existing_resources = (await session.execute(
115
- select(db.Resource)
116
- )).scalars().all()
117
-
118
- # Add the existing items to the relevant taskpools
119
- self._add_resources_and_pages_to_taskpool(existing_pages=existing_pages,
120
- existing_resources=existing_resources)
121
-
122
- async def _download_resource_helper(self, link_handler: ResourceScanner, resource: db.Resource):
123
- try:
124
- async for progress in link_handler.download(resource):
125
- yield progress
126
-
127
- # Do this here because this function will only be called once for this resource
128
- async with self._session_maker.begin() as session:
129
- session.add(resource)
130
- resource.state = db.ResourceState.DOWNLOADED
131
- except BaseException as e:
132
- # Something went wrong, record the failure in the database
133
- async with self._session_maker.begin() as session:
134
- session.add(resource)
135
- resource.state = db.ResourceState.FAILED
136
- resource.fail_message = str(e)
137
-
138
- raise e
139
-
140
- async def download_resource(self, resource: db.Resource):
141
- if not self._init_called:
142
- raise Exception("Init was not called")
143
-
144
- # Resource ids look like this: "canvas_file:387837", and we just want the "canvas_file" part
145
- scanner_name: str = resource.id.split(':', 2)[0]
146
- # Find the scanner that will deal with this resource
147
- scanner = self._scanner_name_map[scanner_name]
148
-
149
- await self.download_pool.submit(resource.id, lambda: self._download_resource_helper(scanner, resource))
150
-
151
- async def update_item(self, item: db.Base):
152
- async with self._session_maker.begin() as session:
153
- await session.merge(item)
154
-
155
- async def get_data(self):
156
- """
157
- Loads all the course data
158
- """
159
- async with self._session_maker.begin() as session:
160
- module_items_load = selectinload(db.Course.modules).joinedload(db.Module.items)
161
-
162
- # Eagerly load fucking everything
163
- options = [
164
- selectinload(db.Course.modules)
165
- .joinedload(db.Module.course),
166
-
167
- module_items_load.selectin_polymorphic([db.ModulePage, db.ModuleFile])
168
- .joinedload(db.ModuleItem.module),
169
-
170
- module_items_load.joinedload(db.ModuleItem.resources),
171
- selectinload(db.Course.assignments)
172
- .joinedload(db.Assignment.course),
173
-
174
- selectinload(db.Course.assignments)
175
- .joinedload(db.Assignment.resources),
176
-
177
- selectinload(db.Course.term),
178
-
179
- selectinload(db.Course.module_items)
180
- .joinedload(db.ModuleItem.course),
181
-
182
- selectinload(db.Course.preferences)
183
- .joinedload(db.CoursePreferences.course),
184
-
185
- selectinload(db.Course.resources)
186
- .joinedload(db.Resource.course)
187
- ]
188
-
189
- return (await session.execute(select(db.Course).options(*options))).scalars().all()
190
-
191
- async def synchronize_with_canvas(self, progress_reporter: ProgressReporter = noop_reporter):
192
- section = progress_reporter.section("Loading index", 0)
193
- raw_query = (await self.client.do_graphql_query(gql(queries.all_courses.DEFINITION), detailed=True))
194
- section.increment_progress()
195
-
196
- await self.load_courses_data(queries.AllCoursesQueryData(**raw_query).all_courses, progress_reporter)
197
-
198
- async def load_courses_data(self, g_courses: Sequence[queries.Course], progress_reporter: ProgressReporter):
199
- """
200
- Loads data for all specified courses, including loading module pages and scanning for resources.
201
- """
202
-
203
- if not self._init_called:
204
- raise Exception("Init was not called")
205
-
206
- try:
207
- async with self._session_maker.begin() as session:
208
- # Load module pages/files for the courses
209
- await self._load_module_items(g_courses, session, progress_reporter)
210
-
211
- # Collect assignments from the courses
212
- assignments = []
213
-
214
- for g_course in g_courses:
215
- # Create needed data in the session
216
- term = await conv_helper.create_term(g_course, session)
217
- await conv_helper.create_course(g_course, session, term)
218
- await conv_helper.create_modules(g_course, session)
219
-
220
- # Add course assignments to the list
221
- assignments.extend(await conv_helper.create_assignments(g_course, session))
222
-
223
- # Scan assignments for resources
224
- await self._scan_assignments_for_resources(assignments, session, progress_reporter)
225
-
226
- # Add all resources back into the session
227
- session.add_all(self._resource_pool.results())
228
- progress_reporter.finished()
229
- except BaseException as e:
230
- traceback.print_exc()
231
- progress_reporter.errored(e)
232
-
233
- async def _scan_assignments_for_resources(self, assignments: Sequence[db.Assignment], session: AsyncSession,
234
- progress_reporter: ProgressReporter):
235
- """
236
- Scans assignments for resources
237
- """
238
-
239
- # Link the resources found to each page in the database
240
- await resource_helper.create_assignment_resource_relations(
241
- # Find all the resources in each assignment description
242
- await resource_helper.find_resources_in_pages(
243
- link_scanners=self._link_scanners,
244
- resource_pool=self._resource_pool,
245
- items=assignments,
246
- progress_reporter=progress_reporter
247
- ),
248
- session
249
- )
250
-
251
- async def _load_module_items(self, g_courses: Sequence[queries.Course], session: AsyncSession,
252
- progress_reporter: ProgressReporter):
253
- # Get the ids of all the courses we are going to index/load
254
- course_ids = [g_course.m_id for g_course in g_courses]
255
-
256
- # Prepare pages for loading
257
- existing_pages = (
258
- await session.execute(
259
- select(db.ModulePage)
260
- .where(db.ModuleItem.course_id.in_(course_ids))
261
- )).scalars().all()
262
-
263
- # Filter out pages that don't need updating
264
- pages_to_update = _prepare_out_of_date_pages_for_loading(g_courses, existing_pages)
265
-
266
- if len(pages_to_update) == 0:
267
- return
268
-
269
- # Load the content for all the pages that need updating
270
- module_items: list[db.ModuleItem] = await self._load_content_for_pages(pages_to_update, progress_reporter)
271
- module_pages = [item for item in module_items if isinstance(item, db.ModulePage)]
272
-
273
- # Link the resources found to the pages they were found on and add them to the database
274
- await resource_helper.create_module_item_resource_relations(
275
- # Find all the resources in each page
276
- await resource_helper.find_resources_in_pages(
277
- link_scanners=self._link_scanners,
278
- resource_pool=self._resource_pool,
279
- progress_reporter=progress_reporter,
280
- # Collect just the module pages for scanning
281
- items=module_pages
282
- ),
283
- session
284
- )
285
-
286
- # empty inserts/upserts causes an sql error. don't do them
287
- if len(module_pages) > 0:
288
- # Add all the module items to the session
289
- # shitty bandaid fix
290
- upsert_item = sqlite_upsert(db.ModuleItem).values([self.moduleitem_dict(item) for item in module_pages])
291
- upsert_item = upsert_item.on_conflict_do_update(
292
- index_elements=[db.ModuleItem.id],
293
- set_=dict(name=upsert_item.excluded.name, updated_at=upsert_item.excluded.updated_at,
294
- position=upsert_item.excluded.position),
295
-
296
- )
297
-
298
- upsert_page = sqlite_upsert(db.ModulePage).values([self.page_dict(item) for item in module_pages])
299
- upsert_page = upsert_page.on_conflict_do_update(
300
- index_elements=[db.ModulePage.id],
301
- set_=dict(content=upsert_page.excluded.content)
302
- )
303
-
304
- await session.execute(upsert_item)
305
- await session.execute(upsert_page)
306
-
307
- session.add_all([item for item in module_items if isinstance(item, db.ModuleFile)])
308
-
309
- @staticmethod
310
- def page_dict(page: db.ModulePage) -> dict[str, object]:
311
- return {"id": page.id, "content": page.content}
312
-
313
- @staticmethod
314
- def moduleitem_dict(page: db.ModuleItem) -> dict[str, object]:
315
- return {"id": page.id, "name": page.name, "updated_at": page.updated_at, "position": page.position,
316
- "module_id": page.module_id, "course_id": page.course_id, "type": page.type,
317
- "created_at": page.created_at}
318
-
319
- def _add_resources_and_pages_to_taskpool(self, existing_pages: Sequence[db.ModuleItem],
320
- existing_resources: Sequence[db.Resource]):
321
- self._moduleitem_pool.add_values({page.id: page for page in existing_pages})
322
- self._resource_pool.add_values({resource.id: resource for resource in existing_resources})
323
- # Add downloaded resources to the resource pool so we don't download them again
324
- self.download_pool.add_values(
325
- {resource.id: None for resource in existing_resources if resource.state == db.ResourceState.DOWNLOADED})
326
-
327
- async def _load_content_for_pages(self, pages: Sequence[TransientModulePage],
328
- progress_reporter: ProgressReporter) -> list[db.ModuleItem]:
329
- """
330
- Loads the page content for the specified pages
331
- Parameters
332
- ----------
333
- pages
334
- The pages to load
335
- Returns
336
- -------
337
- list
338
- The list of complete pages with page content loaded.
339
- """
340
- progress = progress_reporter.section("Loading page content", len(pages))
341
- tasks: list[Task[db.ModuleItem | None]] = []
342
-
343
- for page in pages:
344
- content = page.page
345
-
346
- # Load the content for the pages
347
- if isinstance(content, queries.File):
348
- task = asyncio.create_task(
349
- self._load_module_file(content, page.course_id, page.module_id, page.position))
350
- task.add_done_callback(progress.increment_progress)
351
- tasks.append(task)
352
- elif isinstance(content, queries.Page):
353
- task = asyncio.create_task(
354
- self.load_module_page(content, page.course_id, page.module_id, page.position))
355
- task.add_done_callback(progress.increment_progress)
356
- tasks.append(task)
357
-
358
- if len(tasks) > 0:
359
- await asyncio.wait(tasks)
360
-
361
- # Collect results and filter out nulls
362
- return [task.result() for task in tasks if task.result() is not None]
363
- else:
364
- return []
365
-
366
- async def _load_module_file(self, g_file: queries.File, course_id: str, module_id: str,
367
- position: int) -> db.ModuleFile:
368
- """
369
- Fetches resource information for the module file and converts it into a module item
370
- """
371
- _logger.debug(f"Loading module file %s %s", g_file.m_id, g_file.display_name)
372
-
373
- resource = await self._resource_pool.submit(
374
- f"{canvas_resource_id_prefix}:{g_file.m_id}", # to match the format used by canvas link extractor
375
- lambda: self._fetch_module_file_resource(g_file, course_id)
376
- )
377
-
378
- return await self._moduleitem_pool.submit(
379
- g_file.m_id,
380
- lambda: self._fetch_module_file_page(g_file, resource, course_id, module_id, position)
381
- )
382
-
383
- async def _fetch_module_file_resource(self, file: queries.File, course_id: str) -> db.Resource:
384
- """
385
- Fetches information about the specified file from canvas
386
- """
387
- _logger.debug(f"Fetching file (for module file) %s %s", file.m_id, file.display_name)
388
- result = await self.client.get_file(file.m_id, course_id)
389
- resource = db.convert_file(file, result.size)
390
- resource.id = f"{canvas_resource_id_prefix}:{resource.id}"
391
- resource.course_id = course_id
392
-
393
- return resource
394
-
395
- async def load_module_page(self, g_page: queries.Page, course_id: str, module_id: str,
396
- position: int) -> db.ModulePage | None:
397
- """
398
- Creates task for loading the specified module page
399
- """
400
- return await self._fetch_module_item_page(g_page, course_id, module_id, position)
401
-
402
- async def _fetch_module_item_page(self, page: queries.Page, course_id: str, module_id: str,
403
- position: int) -> db.ModulePage | None:
404
- """
405
- Fetches module page content from canvas. Returns None if the page could not be loaded.
406
- """
407
- _logger.debug("Fetching module page %s %s", page.m_id, page.title)
408
-
409
- try:
410
- # Get the page
411
- result = await self.client.get_page(page.m_id, course_id)
412
- except BaseException as e:
413
- # Handle any errors
414
- _logger.error(e)
415
- traceback.print_exc()
416
- return None
417
-
418
- if result.locked_for_user:
419
- _logger.error("Page %s %s is locked", page.m_id, page.title)
420
- return None
421
-
422
- page = db.convert_page(page, result.body)
423
- page.module_id = module_id
424
- page.course_id = course_id
425
- page.position = position
426
-
427
- return page
428
-
429
- @staticmethod
430
- async def _fetch_module_file_page(file: queries.File, resource: db.Resource, course_id: str,
431
- module_id: str, position: int) -> db.ModuleFile:
432
- """
433
- Converts module file information into the database format
434
- """
435
- _logger.debug(f"Creating page for module file %s %s", file.m_id, file.display_name)
436
-
437
- page = db.convert_file_page(file)
438
- page.module_id = module_id
439
- page.course_id = course_id
440
- page.position = position
441
- page.resources.append(resource)
442
-
443
- return page
444
-
445
- @property
446
- def link_scanners(self):
447
- return self._link_scanners
@@ -1,191 +0,0 @@
1
- import asyncio
2
- import logging
3
- from dataclasses import dataclass
4
- from typing import Sequence
5
-
6
- from bs4 import Tag, BeautifulSoup
7
- from sqlalchemy.ext.asyncio import AsyncSession
8
-
9
- import qcanvas.db as db
10
- from qcanvas.util.link_scanner import ResourceScanner
11
- from qcanvas.util.progress_reporter import ProgressReporter
12
- from qcanvas.util.task_pool import TaskPool
13
-
14
- _logger = logging.getLogger(__name__)
15
-
16
- resource_elements = ["a", "iframe", "img"]
17
-
18
-
19
- # todo could probably just use the database types directly now
20
- @dataclass
21
- class TransientResourceToPageLink:
22
- """
23
- Represents a temporary link between a page and a resource that will be added to the database soon.
24
- """
25
- page_id: str
26
- resource_id: str
27
-
28
- def __hash__(self):
29
- return hash(self.page_id) ^ hash(self.resource_id)
30
-
31
-
32
- async def create_module_item_resource_relations(relations: Sequence[TransientResourceToPageLink],
33
- session: AsyncSession):
34
- """
35
- Creates a link between module items/pages and resources found on those pages
36
- """
37
- for relation in relations:
38
- existing_relation = await session.get(
39
- db.ResourceToModuleItemAssociation,
40
- (relation.page_id, relation.resource_id)
41
- )
42
-
43
- if existing_relation is None:
44
- session.add(
45
- db.ResourceToModuleItemAssociation(
46
- module_item_id=relation.page_id,
47
- resource_id=relation.resource_id
48
- )
49
- )
50
-
51
-
52
- async def create_assignment_resource_relations(relations: Sequence[TransientResourceToPageLink], session: AsyncSession):
53
- """
54
- Turns temporary TransientResourceToPageLink into a persistent relation in the database
55
- """
56
- for relation in relations:
57
- if await session.get(db.ResourceToAssignmentAssociation, (relation.page_id, relation.resource_id)) is None:
58
- session.add(
59
- db.ResourceToAssignmentAssociation(
60
- assignment_id=relation.page_id,
61
- resource_id=relation.resource_id
62
- )
63
- )
64
-
65
-
66
- # todo change resource system to think of resources as links on a page with a shallow id (that may be the same as the deep id) which links to one or more deep ids
67
- async def find_resources_in_pages(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource],
68
- items: Sequence[db.PageLike], progress_reporter: ProgressReporter) -> list[
69
- TransientResourceToPageLink]:
70
- """
71
- Produce a list of resource to page links from resources extracted from the specified `items` using `link_scanners`.
72
- Extracted resources will be added to `resource_pool`
73
- """
74
- progress = progress_reporter.section("Indexing resources", len(items))
75
- tasks = []
76
-
77
- for item in items:
78
- # Assignment descriptions may be null. Avoid creating extra tasks by checking here
79
- if item.content is None:
80
- progress.increment_progress()
81
- continue
82
-
83
- # extract resources from the page
84
- task = asyncio.create_task(_extract_resources_from_page(link_scanners, resource_pool, item))
85
- task.add_done_callback(progress.increment_progress)
86
- tasks.append(task)
87
-
88
- if len(tasks) > 0:
89
- # Wait for all tasks to complete
90
- await asyncio.wait(tasks)
91
-
92
- result = []
93
- # Flatten the array of results
94
- for task in tasks:
95
- result.extend(task.result())
96
-
97
- return result
98
- else:
99
- return []
100
-
101
-
102
- async def _extract_resources_from_page(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource],
103
- page: db.PageLike) -> list[TransientResourceToPageLink]:
104
- """
105
- Extracts any detected resource links from the specified page and then uses `link_scanners` to extract information
106
- about which is then added to the `resource_pool`.
107
-
108
- Returns
109
- -------
110
- list
111
- A list of resource to page links for any resources found on this page.
112
- """
113
- _logger.debug(f"Scanning %s %s for files", page.id, page.name)
114
- tasks = []
115
-
116
- # Extract iframes, hyperlinks, etc from the page
117
- for link in _scan_page_for_links(page):
118
- tasks.append(asyncio.create_task(_process_link(link_scanners, resource_pool, link, page.course_id)))
119
-
120
- if len(tasks) > 0:
121
- # Wait for all tasks to complete
122
- await asyncio.wait(tasks)
123
-
124
- task_results = [task.result() for task in tasks]
125
-
126
- # Convert every non-null result in the task results to a resource page link and return it
127
- return [
128
- TransientResourceToPageLink(page_id=page.id, resource_id=result.id)
129
- for result in task_results if result is not None
130
- ]
131
- else:
132
- return []
133
-
134
-
135
- def _scan_page_for_links(page: db.PageLike) -> list[Tag]:
136
- """
137
- Extracts (potential) resource elements from a PageLike object
138
- """
139
- soup = BeautifulSoup(page.content, 'html.parser')
140
- return list(soup.find_all(resource_elements))
141
-
142
-
143
- async def _process_link(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource], link: Tag,
144
- course_id: str) -> db.Resource | None:
145
- """
146
- Iterates over `link_scanners` to find one that will accept `link`, then uses it to fetch resource information and
147
- adds it to the `resource_pool`.
148
- If no scanner accepts the link then None is returned.
149
- """
150
- for scanner in link_scanners:
151
- if scanner.accepts_link(link):
152
- resource_id = scanner.extract_id(link)
153
-
154
- return await resource_pool.submit(
155
- f"{scanner.name}:{resource_id}", # match the format used by the resource id
156
- lambda: _extract_file_info(link, scanner, resource_id, course_id)
157
- )
158
-
159
- return None
160
-
161
-
162
- async def _extract_file_info(link: Tag, scanner: ResourceScanner, resource_id: str,
163
- course_id: str) -> db.Resource | None:
164
- """
165
- Extracts file info from `link` using `scanner` and assigns the course_id to the resulting resource.
166
-
167
- Parameters
168
- ----------
169
- link
170
- The html element to scan
171
- scanner
172
- The scanner to process the link with
173
- course_id
174
- The id of the course the file belongs to
175
- Returns
176
- -------
177
- db.Resource
178
- The resource if the link was processed successfully.
179
- None
180
- If processing failed
181
- """
182
- try:
183
- _logger.debug(f"Fetching info for file %s with scanner %s", scanner.extract_id(link), scanner.name)
184
-
185
- result = await scanner.extract_resource(link, resource_id)
186
- result.id = f"{scanner.name}:{result.id}" # Prefix the scanner name to prevent resources from different sites potentially clashing
187
- result.course_id = course_id
188
- return result
189
- except BaseException as e:
190
- _logger.error(f"Failed to retrieve info for file id %s: %s", f"{scanner.name}:{resource_id}", str(e))
191
- return None
@@ -1,58 +0,0 @@
1
- import asyncio
2
- from typing import Callable, Any
3
-
4
- from PySide6.QtCore import QObject, Signal
5
-
6
- from qcanvas.util.task_pool import TaskPool
7
-
8
-
9
- class DownloadPool(TaskPool[None], QObject):
10
- """
11
- Like the TaskPool, but uses a queue (a channel in kotlin terms) in the task to report download progress.
12
- When using .submit(), it is EXPECTED that the lambda returns a function that uses yield to signal progress, e.g:
13
- ```
14
- pool.submit(my_task_id, lambda: my_func())
15
-
16
- async def my_func(channel: asyncio.Queue):
17
- for i in range(0, 10):
18
- await slow_thing()
19
- yield (i / 10) # can be whatever format/type you want
20
- ```
21
- """
22
- download_progress_updated = Signal(object, Any)
23
- download_failed = Signal(object)
24
- download_finished = Signal(object)
25
-
26
- def __init__(self, parent: QObject | None = None):
27
- QObject.__init__(self, parent)
28
- TaskPool.__init__(self)
29
-
30
- async def _handle_task(self, func: Callable, task_id: object, event: asyncio.Event, func_args: dict):
31
- sem = self._semaphore
32
-
33
- try:
34
- # Consume progress updates 'yield'ed from the function
35
- async for progress in func(**func_args):
36
- # Fire the download progress update signal
37
- self.download_progress_updated.emit(task_id, progress)
38
- except BaseException as e:
39
- # Try to maintain integrity when a task fails
40
- async with sem:
41
- # Release anything else waiting for this task
42
- event.set()
43
- # Remove the record from the results map
44
- del self._results[task_id]
45
-
46
- # Emit failure signal and rethrow
47
- self.download_failed.emit(task_id)
48
- raise e
49
-
50
- async with sem:
51
- self._logger.debug("Task %s finished.", task_id)
52
- # Record this task as done
53
- self._results[task_id] = None
54
- event.set()
55
-
56
- self.download_finished.emit(task_id)
57
-
58
- return None
File without changes