qcanvas 0.0.5.6a0__py3-none-any.whl → 1.0.3.post0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qcanvas might be problematic. Click here for more details.

Files changed (114) hide show
  1. qcanvas/app_start/__init__.py +47 -0
  2. qcanvas/backend_connectors/__init__.py +2 -0
  3. qcanvas/backend_connectors/frontend_resource_manager.py +63 -0
  4. qcanvas/backend_connectors/qcanvas_task_master.py +28 -0
  5. qcanvas/icons/__init__.py +6 -0
  6. qcanvas/icons/file-download-failed.svg +6 -0
  7. qcanvas/icons/file-downloaded.svg +6 -0
  8. qcanvas/icons/file-not-downloaded.svg +6 -0
  9. qcanvas/icons/file-unknown.svg +6 -0
  10. qcanvas/icons/icons.qrc +4 -0
  11. qcanvas/icons/main_icon.svg +7 -7
  12. qcanvas/icons/rc_icons.py +580 -214
  13. qcanvas/icons/sync.svg +7 -0
  14. qcanvas/run.py +29 -0
  15. qcanvas/ui/course_viewer/__init__.py +2 -0
  16. qcanvas/ui/course_viewer/content_tree.py +123 -0
  17. qcanvas/ui/course_viewer/course_tree.py +93 -0
  18. qcanvas/ui/course_viewer/course_viewer.py +62 -0
  19. qcanvas/ui/course_viewer/tabs/__init__.py +3 -0
  20. qcanvas/ui/course_viewer/tabs/assignment_tab/__init__.py +1 -0
  21. qcanvas/ui/course_viewer/tabs/assignment_tab/assignment_tab.py +168 -0
  22. qcanvas/ui/course_viewer/tabs/assignment_tab/assignment_tree.py +104 -0
  23. qcanvas/ui/course_viewer/tabs/content_tab.py +96 -0
  24. qcanvas/ui/course_viewer/tabs/mail_tab/__init__.py +1 -0
  25. qcanvas/ui/course_viewer/tabs/mail_tab/mail_tab.py +68 -0
  26. qcanvas/ui/course_viewer/tabs/mail_tab/mail_tree.py +70 -0
  27. qcanvas/ui/course_viewer/tabs/page_tab/__init__.py +1 -0
  28. qcanvas/ui/course_viewer/tabs/page_tab/page_tab.py +36 -0
  29. qcanvas/ui/course_viewer/tabs/page_tab/page_tree.py +74 -0
  30. qcanvas/ui/course_viewer/tabs/resource_rich_browser.py +176 -0
  31. qcanvas/ui/course_viewer/tabs/util.py +1 -0
  32. qcanvas/ui/main_ui/course_viewer_container.py +52 -0
  33. qcanvas/ui/main_ui/options/__init__.py +3 -0
  34. qcanvas/ui/main_ui/options/quick_sync_option.py +25 -0
  35. qcanvas/ui/main_ui/options/sync_on_start_option.py +25 -0
  36. qcanvas/ui/main_ui/qcanvas_window.py +192 -0
  37. qcanvas/ui/main_ui/status_bar_progress_display.py +153 -0
  38. qcanvas/ui/memory_tree/__init__.py +2 -0
  39. qcanvas/ui/memory_tree/_tree_memory.py +66 -0
  40. qcanvas/ui/memory_tree/memory_tree_widget.py +133 -0
  41. qcanvas/ui/memory_tree/memory_tree_widget_item.py +19 -0
  42. qcanvas/ui/setup/__init__.py +2 -0
  43. qcanvas/ui/setup/setup_checker.py +17 -0
  44. qcanvas/ui/setup/setup_dialog.py +212 -0
  45. qcanvas/util/__init__.py +2 -0
  46. qcanvas/util/basic_fonts.py +12 -0
  47. qcanvas/util/fe_resource_manager.py +23 -0
  48. qcanvas/util/html_cleaner.py +25 -0
  49. qcanvas/util/layouts.py +52 -0
  50. qcanvas/util/logs.py +6 -0
  51. qcanvas/util/paths.py +41 -0
  52. qcanvas/util/settings/__init__.py +9 -0
  53. qcanvas/util/settings/_client_settings.py +29 -0
  54. qcanvas/util/settings/_mapped_setting.py +63 -0
  55. qcanvas/util/settings/_ui_settings.py +34 -0
  56. qcanvas/util/ui_tools.py +41 -0
  57. qcanvas/util/url_checker.py +13 -0
  58. qcanvas-1.0.3.post0.dist-info/METADATA +61 -0
  59. qcanvas-1.0.3.post0.dist-info/RECORD +64 -0
  60. {qcanvas-0.0.5.6a0.dist-info → qcanvas-1.0.3.post0.dist-info}/WHEEL +1 -1
  61. qcanvas-1.0.3.post0.dist-info/entry_points.txt +3 -0
  62. qcanvas/__main__.py +0 -155
  63. qcanvas/db/__init__.py +0 -5
  64. qcanvas/db/database.py +0 -337
  65. qcanvas/db/db_converter_helper.py +0 -81
  66. qcanvas/net/canvas/__init__.py +0 -2
  67. qcanvas/net/canvas/canvas_client.py +0 -209
  68. qcanvas/net/canvas/legacy_canvas_types.py +0 -124
  69. qcanvas/net/custom_httpx_async_transport.py +0 -34
  70. qcanvas/net/self_authenticating.py +0 -108
  71. qcanvas/queries/__init__.py +0 -4
  72. qcanvas/queries/all_courses.gql +0 -7
  73. qcanvas/queries/all_courses.py +0 -108
  74. qcanvas/queries/canvas_course_data.gql +0 -51
  75. qcanvas/queries/canvas_course_data.py +0 -143
  76. qcanvas/ui/container_item.py +0 -11
  77. qcanvas/ui/main_ui.py +0 -249
  78. qcanvas/ui/menu_bar/__init__.py +0 -0
  79. qcanvas/ui/menu_bar/grouping_preferences_menu.py +0 -61
  80. qcanvas/ui/menu_bar/theme_selection_menu.py +0 -39
  81. qcanvas/ui/setup_dialog.py +0 -190
  82. qcanvas/ui/status_bar_reporter.py +0 -40
  83. qcanvas/ui/viewer/__init__.py +0 -0
  84. qcanvas/ui/viewer/course_list.py +0 -96
  85. qcanvas/ui/viewer/file_list.py +0 -195
  86. qcanvas/ui/viewer/file_view_tab.py +0 -62
  87. qcanvas/ui/viewer/page_list_viewer.py +0 -150
  88. qcanvas/util/app_settings.py +0 -98
  89. qcanvas/util/constants.py +0 -5
  90. qcanvas/util/course_indexer/__init__.py +0 -1
  91. qcanvas/util/course_indexer/conversion_helpers.py +0 -78
  92. qcanvas/util/course_indexer/data_manager.py +0 -447
  93. qcanvas/util/course_indexer/resource_helpers.py +0 -191
  94. qcanvas/util/download_pool.py +0 -58
  95. qcanvas/util/helpers/__init__.py +0 -0
  96. qcanvas/util/helpers/canvas_sanitiser.py +0 -47
  97. qcanvas/util/helpers/file_icon_helper.py +0 -34
  98. qcanvas/util/helpers/qaction_helper.py +0 -25
  99. qcanvas/util/helpers/theme_helper.py +0 -45
  100. qcanvas/util/link_scanner/__init__.py +0 -2
  101. qcanvas/util/link_scanner/canvas_link_scanner.py +0 -41
  102. qcanvas/util/link_scanner/canvas_media_object_scanner.py +0 -60
  103. qcanvas/util/link_scanner/dropbox_scanner.py +0 -68
  104. qcanvas/util/link_scanner/resource_scanner.py +0 -69
  105. qcanvas/util/progress_reporter.py +0 -101
  106. qcanvas/util/self_updater.py +0 -55
  107. qcanvas/util/task_pool.py +0 -253
  108. qcanvas/util/tree_util/__init__.py +0 -3
  109. qcanvas/util/tree_util/expanding_tree.py +0 -165
  110. qcanvas/util/tree_util/model_helpers.py +0 -36
  111. qcanvas/util/tree_util/tree_model.py +0 -85
  112. qcanvas-0.0.5.6a0.dist-info/METADATA +0 -21
  113. qcanvas-0.0.5.6a0.dist-info/RECORD +0 -61
  114. /qcanvas/{net → ui/main_ui}/__init__.py +0 -0
@@ -1,191 +0,0 @@
1
- import asyncio
2
- import logging
3
- from dataclasses import dataclass
4
- from typing import Sequence
5
-
6
- from bs4 import Tag, BeautifulSoup
7
- from sqlalchemy.ext.asyncio import AsyncSession
8
-
9
- import qcanvas.db as db
10
- from qcanvas.util.link_scanner import ResourceScanner
11
- from qcanvas.util.progress_reporter import ProgressReporter
12
- from qcanvas.util.task_pool import TaskPool
13
-
14
- _logger = logging.getLogger(__name__)
15
-
16
- resource_elements = ["a", "iframe", "img"]
17
-
18
-
19
- # todo could probably just use the database types directly now
20
- @dataclass
21
- class TransientResourceToPageLink:
22
- """
23
- Represents a temporary link between a page and a resource that will be added to the database soon.
24
- """
25
- page_id: str
26
- resource_id: str
27
-
28
- def __hash__(self):
29
- return hash(self.page_id) ^ hash(self.resource_id)
30
-
31
-
32
- async def create_module_item_resource_relations(relations: Sequence[TransientResourceToPageLink],
33
- session: AsyncSession):
34
- """
35
- Creates a link between module items/pages and resources found on those pages
36
- """
37
- for relation in relations:
38
- existing_relation = await session.get(
39
- db.ResourceToModuleItemAssociation,
40
- (relation.page_id, relation.resource_id)
41
- )
42
-
43
- if existing_relation is None:
44
- session.add(
45
- db.ResourceToModuleItemAssociation(
46
- module_item_id=relation.page_id,
47
- resource_id=relation.resource_id
48
- )
49
- )
50
-
51
-
52
- async def create_assignment_resource_relations(relations: Sequence[TransientResourceToPageLink], session: AsyncSession):
53
- """
54
- Turns temporary TransientResourceToPageLink into a persistent relation in the database
55
- """
56
- for relation in relations:
57
- if await session.get(db.ResourceToAssignmentAssociation, (relation.page_id, relation.resource_id)) is None:
58
- session.add(
59
- db.ResourceToAssignmentAssociation(
60
- assignment_id=relation.page_id,
61
- resource_id=relation.resource_id
62
- )
63
- )
64
-
65
-
66
- # todo change resource system to think of resources as links on a page with a shallow id (that may be the same as the deep id) which links to one or more deep ids
67
- async def find_resources_in_pages(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource],
68
- items: Sequence[db.PageLike], progress_reporter: ProgressReporter) -> list[
69
- TransientResourceToPageLink]:
70
- """
71
- Produce a list of resource to page links from resources extracted from the specified `items` using `link_scanners`.
72
- Extracted resources will be added to `resource_pool`
73
- """
74
- progress = progress_reporter.section("Indexing resources", len(items))
75
- tasks = []
76
-
77
- for item in items:
78
- # Assignment descriptions may be null. Avoid creating extra tasks by checking here
79
- if item.content is None:
80
- progress.increment_progress()
81
- continue
82
-
83
- # extract resources from the page
84
- task = asyncio.create_task(_extract_resources_from_page(link_scanners, resource_pool, item))
85
- task.add_done_callback(progress.increment_progress)
86
- tasks.append(task)
87
-
88
- if len(tasks) > 0:
89
- # Wait for all tasks to complete
90
- await asyncio.wait(tasks)
91
-
92
- result = []
93
- # Flatten the array of results
94
- for task in tasks:
95
- result.extend(task.result())
96
-
97
- return result
98
- else:
99
- return []
100
-
101
-
102
- async def _extract_resources_from_page(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource],
103
- page: db.PageLike) -> list[TransientResourceToPageLink]:
104
- """
105
- Extracts any detected resource links from the specified page and then uses `link_scanners` to extract information
106
- about which is then added to the `resource_pool`.
107
-
108
- Returns
109
- -------
110
- list
111
- A list of resource to page links for any resources found on this page.
112
- """
113
- _logger.debug(f"Scanning %s %s for files", page.id, page.name)
114
- tasks = []
115
-
116
- # Extract iframes, hyperlinks, etc from the page
117
- for link in _scan_page_for_links(page):
118
- tasks.append(asyncio.create_task(_process_link(link_scanners, resource_pool, link, page.course_id)))
119
-
120
- if len(tasks) > 0:
121
- # Wait for all tasks to complete
122
- await asyncio.wait(tasks)
123
-
124
- task_results = [task.result() for task in tasks]
125
-
126
- # Convert every non-null result in the task results to a resource page link and return it
127
- return [
128
- TransientResourceToPageLink(page_id=page.id, resource_id=result.id)
129
- for result in task_results if result is not None
130
- ]
131
- else:
132
- return []
133
-
134
-
135
- def _scan_page_for_links(page: db.PageLike) -> list[Tag]:
136
- """
137
- Extracts (potential) resource elements from a PageLike object
138
- """
139
- soup = BeautifulSoup(page.content, 'html.parser')
140
- return list(soup.find_all(resource_elements))
141
-
142
-
143
- async def _process_link(link_scanners: Sequence[ResourceScanner], resource_pool: TaskPool[db.Resource], link: Tag,
144
- course_id: str) -> db.Resource | None:
145
- """
146
- Iterates over `link_scanners` to find one that will accept `link`, then uses it to fetch resource information and
147
- adds it to the `resource_pool`.
148
- If no scanner accepts the link then None is returned.
149
- """
150
- for scanner in link_scanners:
151
- if scanner.accepts_link(link):
152
- resource_id = scanner.extract_id(link)
153
-
154
- return await resource_pool.submit(
155
- f"{scanner.name}:{resource_id}", # match the format used by the resource id
156
- lambda: _extract_file_info(link, scanner, resource_id, course_id)
157
- )
158
-
159
- return None
160
-
161
-
162
- async def _extract_file_info(link: Tag, scanner: ResourceScanner, resource_id: str,
163
- course_id: str) -> db.Resource | None:
164
- """
165
- Extracts file info from `link` using `scanner` and assigns the course_id to the resulting resource.
166
-
167
- Parameters
168
- ----------
169
- link
170
- The html element to scan
171
- scanner
172
- The scanner to process the link with
173
- course_id
174
- The id of the course the file belongs to
175
- Returns
176
- -------
177
- db.Resource
178
- The resource if the link was processed successfully.
179
- None
180
- If processing failed
181
- """
182
- try:
183
- _logger.debug(f"Fetching info for file %s with scanner %s", scanner.extract_id(link), scanner.name)
184
-
185
- result = await scanner.extract_resource(link, resource_id)
186
- result.id = f"{scanner.name}:{result.id}" # Prefix the scanner name to prevent resources from different sites potentially clashing
187
- result.course_id = course_id
188
- return result
189
- except BaseException as e:
190
- _logger.error(f"Failed to retrieve info for file id %s: %s", f"{scanner.name}:{resource_id}", str(e))
191
- return None
@@ -1,58 +0,0 @@
1
- import asyncio
2
- from typing import Callable, Any
3
-
4
- from PySide6.QtCore import QObject, Signal
5
-
6
- from qcanvas.util.task_pool import TaskPool
7
-
8
-
9
- class DownloadPool(TaskPool[None], QObject):
10
- """
11
- Like the TaskPool, but uses a queue (a channel in kotlin terms) in the task to report download progress.
12
- When using .submit(), it is EXPECTED that the lambda returns a function that uses yield to signal progress, e.g:
13
- ```
14
- pool.submit(my_task_id, lambda: my_func())
15
-
16
- async def my_func(channel: asyncio.Queue):
17
- for i in range(0, 10):
18
- await slow_thing()
19
- yield (i / 10) # can be whatever format/type you want
20
- ```
21
- """
22
- download_progress_updated = Signal(object, Any)
23
- download_failed = Signal(object)
24
- download_finished = Signal(object)
25
-
26
- def __init__(self, parent: QObject | None = None):
27
- QObject.__init__(self, parent)
28
- TaskPool.__init__(self)
29
-
30
- async def _handle_task(self, func: Callable, task_id: object, event: asyncio.Event, func_args: dict):
31
- sem = self._semaphore
32
-
33
- try:
34
- # Consume progress updates 'yield'ed from the function
35
- async for progress in func(**func_args):
36
- # Fire the download progress update signal
37
- self.download_progress_updated.emit(task_id, progress)
38
- except BaseException as e:
39
- # Try to maintain integrity when a task fails
40
- async with sem:
41
- # Release anything else waiting for this task
42
- event.set()
43
- # Remove the record from the results map
44
- del self._results[task_id]
45
-
46
- # Emit failure signal and rethrow
47
- self.download_failed.emit(task_id)
48
- raise e
49
-
50
- async with sem:
51
- self._logger.debug("Task %s finished.", task_id)
52
- # Record this task as done
53
- self._results[task_id] = None
54
- event.set()
55
-
56
- self.download_finished.emit(task_id)
57
-
58
- return None
File without changes
@@ -1,47 +0,0 @@
1
- """
2
- I dedicate this file to removing the random crap canvas puts in its data.
3
- Like random NBSPs. Gotta love those fuckers.
4
- Thanks instructure btw for you state of the art WYSIWYG dogshit editor.
5
- """
6
- from bs4 import BeautifulSoup
7
-
8
- NBSP = " "
9
-
10
-
11
- def remove_garbage_from_title(smelly_canvas_title: str) -> str:
12
- """
13
- Removes trailing tabs, spaces and NBSPs from smelly canvas titles.
14
- Parameters
15
- ----------
16
- smelly_canvas_title
17
-
18
- Returns
19
- -------
20
- str
21
- Clean title that is not smelly and has no NBSPs.
22
- """
23
- return (smelly_canvas_title
24
- .strip(f"\t {NBSP}") # remove trailing garbage
25
- .replace(NBSP, " ") # remove any other NBSPs
26
- )
27
-
28
-
29
- def remove_stylesheets_from_html(smelly_html: str) -> str:
30
- """
31
- Removes all stylesheet links from `smelly_html`.
32
-
33
- Parameters
34
- ----------
35
- smelly_html
36
- The html to remove style sheets from
37
- Returns
38
- -------
39
- The non-smelly html with all stylesheet links removed
40
- """
41
- bs = BeautifulSoup(smelly_html, "html.parser")
42
-
43
- # remove links
44
- for ele in bs.find_all("link", {"rel": "stylesheet"}):
45
- ele.decompose()
46
-
47
- return str(bs)
@@ -1,34 +0,0 @@
1
- from PySide6.QtCore import QMimeDatabase
2
- from PySide6.QtGui import QIcon
3
- from PySide6.QtWidgets import QApplication, QStyle
4
-
5
- _mime_database = QMimeDatabase()
6
- _default_icon = None
7
-
8
-
9
- def icon_for_filename(file_name: str) -> QIcon:
10
- """
11
- Gets the icon for a filename, based on its extension
12
- Parameters
13
- ----------
14
- file_name
15
- The name of the file
16
- Returns
17
- -------
18
- QIcon
19
- The icon for the file
20
- """
21
- global _default_icon
22
-
23
- for mime_type in _mime_database.mimeTypesForFileName(file_name):
24
- icon = QIcon.fromTheme(mime_type.iconName())
25
-
26
- # Return the appropriate icon if it's found
27
- if not icon.isNull():
28
- return icon
29
-
30
- # Cache the default icon, used when the icon for a file is unknown/not found
31
- if _default_icon is None:
32
- _default_icon = QApplication.style().standardIcon(QStyle.StandardPixmap.SP_FileIcon)
33
-
34
- return _default_icon
@@ -1,25 +0,0 @@
1
- from typing import Any
2
-
3
- from PySide6.QtGui import QAction, QKeySequence
4
-
5
-
6
- def create_qaction(name: str, shortcut: QKeySequence | None = None, parent: Any = None, triggered: Any = None,
7
- checkable: bool | None = None, checked: bool | None = None) -> QAction:
8
- action = QAction(name)
9
-
10
- if shortcut is not None:
11
- action.setShortcut(shortcut)
12
-
13
- if parent is not None:
14
- action.setParent(parent)
15
-
16
- if triggered is not None:
17
- action.triggered.connect(triggered)
18
-
19
- if checkable is not None:
20
- action.setCheckable(checkable)
21
-
22
- if checked is not None:
23
- action.setChecked(checked)
24
-
25
- return action
@@ -1,45 +0,0 @@
1
- from datetime import datetime
2
-
3
- import qdarktheme
4
-
5
- from qcanvas.util.app_settings import settings
6
-
7
-
8
- def apply_selected_theme() -> None:
9
- """
10
- Applies the selected theme from the app's settings
11
- """
12
- if settings.theme != "native":
13
- qdarktheme.setup_theme(
14
- settings.theme,
15
- custom_colors=_get_colours()
16
- )
17
-
18
-
19
- red_theme = {
20
- "primary": "e21d31",
21
- "[light]": {"foreground": "480910", "background": "fcf8f8"},
22
- "[dark]": {"foreground": "fbdfe2", "background": "231f1f"}
23
- }
24
-
25
-
26
- def _get_colours() -> dict:
27
- now = datetime.now()
28
-
29
- if now.year >= 2025:
30
- print("I certainly hope not")
31
-
32
- if now.month == 3 and now.day == 17:
33
- # And this is on the weekend...
34
- return {"primary": "08ff00"}
35
- elif now.month == 2 and now.day == 14:
36
- print("Why are you looking at canvas? Don't you have something better to do?")
37
-
38
- # Nobody will ever see this because uni starts around the 20th
39
- # Too bad, I kinda liked the theme
40
- return red_theme
41
- elif now.month == 8 and now.day == 20:
42
- # Some random day... I just wanted to see the red theme
43
- return red_theme
44
- else:
45
- return {"primary": "FF804F"}
@@ -1,2 +0,0 @@
1
- from .canvas_link_scanner import CanvasFileScanner
2
- from .resource_scanner import ResourceScanner
@@ -1,41 +0,0 @@
1
- from bs4 import Tag
2
- from httpx import URL
3
-
4
- from qcanvas import db as db
5
- from qcanvas.net.canvas import CanvasClient
6
- from qcanvas.util.link_scanner.resource_scanner import ResourceScanner
7
-
8
- canvas_resource_id_prefix = "canvas_file"
9
-
10
-
11
- class CanvasFileScanner(ResourceScanner):
12
- _canvas_client: CanvasClient
13
-
14
- def __init__(self, canvas_client: CanvasClient):
15
- self._canvas_client = canvas_client
16
-
17
- def accepts_link(self, link: Tag) -> bool:
18
- if link.name not in ["a", "img"]:
19
- return False
20
-
21
- return "data-api-returntype" in link.attrs.keys() and link["data-api-returntype"] == "File"
22
-
23
- async def extract_resource(self, link: Tag, file_id: str) -> db.Resource:
24
- return db.convert_legacy_file(await self._canvas_client.get_file_from_endpoint(link["data-api-endpoint"]))
25
-
26
- def extract_id(self, link: Tag) -> str:
27
- # https://canvas.newcastle.edu.au/courses/27716/files/5975585/...
28
- # --------------------------------- Extract this part ^^^^^^^
29
- return URL(link["data-api-endpoint"]).path.rsplit('/', 2)[-1]
30
-
31
- async def download(self, resource):
32
- path = resource.download_location
33
- path.parent.mkdir(parents=True, exist_ok=True)
34
-
35
- with open(path, "wb") as file:
36
- async for progress in self._canvas_client.download_file(resource, file):
37
- yield progress
38
-
39
- @property
40
- def name(self) -> str:
41
- return canvas_resource_id_prefix
@@ -1,60 +0,0 @@
1
- import json
2
- from typing import Any
3
-
4
- from bs4 import Tag, BeautifulSoup
5
- from httpx import AsyncClient
6
-
7
- from qcanvas import db as db
8
- from qcanvas.util.link_scanner import ResourceScanner
9
-
10
-
11
- class CanvasMediaObjectScanner(ResourceScanner):
12
-
13
- def __init__(self, client: AsyncClient):
14
- self.client = client
15
-
16
- @property
17
- def name(self) -> str:
18
- return "canvas_media_object"
19
-
20
- def accepts_link(self, link: Tag) -> bool:
21
- return (
22
- link.name == "iframe"
23
- and "data-media-type" in link.attrs.keys()
24
- and link.attrs["data-media-type"] == "video"
25
- )
26
-
27
- async def extract_resource(self, link: Tag, file_id: str) -> db.Resource:
28
- # Get the page for the embedded player (I could not find another way to get the needed data from canvas)
29
- response = (await self.client.get(link.attrs["src"])).text
30
- # Parse the HTML response
31
- doc = BeautifulSoup(response, "html.parser")
32
- media_data: None | dict[str, Any] = None
33
-
34
- # Find all script tags (one of them has the data we are interested in)
35
- for script_tag in doc.find_all("script", {}):
36
- body = script_tag.text.strip()
37
-
38
- # If the tag content starts with this then it has the data we want
39
- if "INST = {" in body:
40
- # Find the data that we are interested in (is on a line that starts with "ENV = ")
41
- line: str = next(filter(lambda x: x.strip().startswith("ENV ="), script_tag.text.splitlines()))
42
- # Parse the json embedded in the script tag
43
- media_data = json.loads(line.lstrip("ENV = ").rstrip(";"))["media_object"]
44
- break
45
-
46
- if media_data is None:
47
- raise Exception("Could not extract media info")
48
-
49
- # The highest quality stream is the first
50
- media_source = media_data["media_sources"][0]
51
-
52
- return db.Resource(
53
- id=file_id,
54
- url=media_source["src"],
55
- file_name=media_data["title"],
56
- file_size=int(media_source["size"]) * 1024 # Seems to be recorded in KiB, not bytes
57
- )
58
-
59
- def extract_id(self, link: Tag) -> str:
60
- return link.attrs["data-media-id"]
@@ -1,68 +0,0 @@
1
- import httpx
2
- from bs4 import Tag
3
- from httpx import URL
4
-
5
- from qcanvas import db as db
6
- from qcanvas.util.link_scanner import ResourceScanner
7
-
8
-
9
- # from httpx import URL
10
-
11
-
12
- def parse_content_disposition(header: str) -> dict[str, str | None]:
13
- bad_chars = "\" \t"
14
- result = {}
15
-
16
- for statement in header.split(";"):
17
- split = statement.split("=", 2)
18
-
19
- result[split[0].strip(bad_chars)] = None if len(split) == 1 else split[1].strip(bad_chars)
20
-
21
- return result
22
-
23
-
24
- class DropboxScanner(ResourceScanner):
25
- def __init__(self, client: httpx.AsyncClient):
26
- self.client = client
27
-
28
- def accepts_link(self, link: Tag) -> bool:
29
- if link.name != "a":
30
- return False
31
-
32
- if "href" in link.attrs:
33
- url = URL(link.attrs["href"])
34
-
35
- return url.host == "www.dropbox.com" and url.path.split("/", 2)[1] == "s"
36
- else:
37
- return False
38
-
39
- async def extract_resource(self, link: Tag, file_id: str) -> db.Resource:
40
- url = URL(link.attrs["href"]).copy_set_param("dl", 1)
41
-
42
- req = self.client.build_request(
43
- method="GET",
44
- url=url
45
- )
46
-
47
- # The following code essentially starts downloading the file, reads the headers and then stops downloading it,
48
- # just to ge the size of the file
49
- resp = await self.client.send(req, follow_redirects=True, stream=True)
50
-
51
- try:
52
- resp.raise_for_status()
53
-
54
- filename = parse_content_disposition(resp.headers["content-disposition"])["filename"]
55
- size = int(resp.headers["content-length"])
56
-
57
- return db.Resource(id=file_id, url=str(url), file_name=filename, file_size=size)
58
- finally:
59
- await resp.aclose()
60
-
61
- def extract_id(self, link: Tag) -> str:
62
- # https://www.dropbox.com/s/vwk48ajl9nw6pqh/Lab1_ENGG1500_robot.pdf?dl=0
63
- # ------- Extract this part ^^^^^^^^^^^^^^^
64
- return URL(link.attrs["href"]).path.split("/", 3)[2]
65
-
66
- @property
67
- def name(self) -> str:
68
- return "dropbox"
@@ -1,69 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import AsyncIterator
3
-
4
- import httpx
5
- from bs4 import Tag
6
-
7
- import qcanvas.db as db
8
-
9
-
10
- class ResourceScanner(ABC):
11
- """
12
- A resource scanner extracts resources from canvas pages.
13
- The resource may be an embedded video, a file or anything that will be of use to the user.
14
- Each scanner should be responsible for only 1 type of resource.
15
- """
16
-
17
- @property
18
- @abstractmethod
19
- def name(self) -> str:
20
- """
21
- The name of the resource scanner.
22
- Will be attached to the resource id externally.
23
- """
24
- ...
25
-
26
- @abstractmethod
27
- def accepts_link(self, link: Tag) -> bool:
28
- """
29
- Whether this resource scanner accepts the specified link
30
- """
31
- ...
32
-
33
- @abstractmethod
34
- async def extract_resource(self, link: Tag, file_id: str) -> db.Resource:
35
- """
36
- Extract information about the resource in the specified tag
37
- Parameters
38
- ----------
39
- link
40
- The element that links to the resource
41
- file_id
42
- The id of the file (as produced from `extract_id`)
43
- Returns
44
- -------
45
- The resource
46
- """
47
- ...
48
-
49
- @abstractmethod
50
- def extract_id(self, link: Tag) -> str:
51
- """
52
- Extracts a unique id from a file link
53
- """
54
- ...
55
-
56
- async def download(self, resource: db.Resource) -> AsyncIterator[int]:
57
- yield 0
58
-
59
- download_destination = resource.download_location
60
- download_destination.parent.mkdir(parents=True, exist_ok=True)
61
-
62
- with open(download_destination, "wb") as file:
63
- async with httpx.AsyncClient(follow_redirects=True) as client:
64
- async with client.stream(method='get', url=resource.url) as resp:
65
- resp.raise_for_status()
66
-
67
- async for chunk in resp.aiter_bytes():
68
- file.write(chunk)
69
- yield resp.num_bytes_downloaded