getdocs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
getdocs/jobs.py ADDED
@@ -0,0 +1,204 @@
1
+ """Jobs: run Crawls as subprocesses and track their state.
2
+
3
+ Per ADR-0002, a job is the getdocs CLI run with --format jsonl; its stdout
4
+ stream is the event protocol. One subprocess per Crawl sidesteps the
5
+ one-reactor-per-process constraint and isolates crashes.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import sys
11
+ import tempfile
12
+ import uuid
13
+ from dataclasses import dataclass, field
14
+
15
+ _BOOL_FLAGS = {
16
+ "allow_backward": "--allow-backward",
17
+ "allow_subdomains": "--allow-subdomains",
18
+ "ignore_robots": "--ignore-robots",
19
+ "keep_html": "--keep-html",
20
+ }
21
+ _VALUE_FLAGS = {
22
+ "limit": "--limit",
23
+ "depth": "--depth",
24
+ "delay": "--delay",
25
+ "concurrency": "--concurrency",
26
+ "render": "--render",
27
+ "selector": "--selector",
28
+ }
29
+ _LIST_FLAGS = {
30
+ "include_paths": "--include-paths",
31
+ "exclude_paths": "--exclude-paths",
32
+ }
33
+ _SITEMAP_FLAGS = {"off": "--no-sitemap", "only": "--sitemap-only"}
34
+
35
+ # JSONL lines carry whole pages (and raw HTML with keep_html).
36
+ _STREAM_LIMIT = 32 * 1024 * 1024
37
+
38
+
39
+ def build_args(options: dict, output_dir: str) -> list[str]:
40
+ seeds = options.get("urls") or [options["url"]]
41
+ args = ["crawl", *seeds, "--format", "jsonl", "-o", output_dir]
42
+ for key, flag in _VALUE_FLAGS.items():
43
+ if options.get(key) is not None:
44
+ args += [flag, str(options[key])]
45
+ for key, flag in _BOOL_FLAGS.items():
46
+ if options.get(key):
47
+ args.append(flag)
48
+ for key, flag in _LIST_FLAGS.items():
49
+ for value in options.get(key) or []:
50
+ args += [flag, value]
51
+ sitemap_flag = _SITEMAP_FLAGS.get(options.get("sitemap", ""))
52
+ if sitemap_flag:
53
+ args.append(sitemap_flag)
54
+ return args
55
+
56
+
57
+ @dataclass
58
+ class CrawlJob:
59
+ id: str
60
+ seeds: list[str]
61
+ status: str = "running" # running | completed | failed | cancelled
62
+ pages: list[dict] = field(default_factory=list)
63
+ manifest: dict | None = None
64
+ error: str | None = None
65
+ webhook_failures: int = 0
66
+
67
+
68
+ class JobManager:
69
+ def __init__(self):
70
+ self.jobs: dict[str, CrawlJob] = {}
71
+ self._tasks: dict[str, asyncio.Task] = {}
72
+ self._processes: dict[str, asyncio.subprocess.Process] = {}
73
+ self._subscribers: dict[str, list[asyncio.Queue]] = {}
74
+
75
+ def start(self, options: dict) -> CrawlJob:
76
+ seeds = options.get("urls") or [options["url"]]
77
+ job = CrawlJob(id=uuid.uuid4().hex, seeds=seeds)
78
+ self.jobs[job.id] = job
79
+ output_dir = tempfile.mkdtemp(prefix=f"getdocs-{job.id[:8]}-")
80
+ args = build_args(options, output_dir=output_dir)
81
+ self._tasks[job.id] = asyncio.ensure_future(
82
+ self._run(job, args, webhook=options.get("webhook"))
83
+ )
84
+ return job
85
+
86
+ def get(self, job_id: str) -> CrawlJob | None:
87
+ return self.jobs.get(job_id)
88
+
89
+ def cancel(self, job_id: str) -> CrawlJob | None:
90
+ """Cancel a running job (terminates its subprocess, keeps partial
91
+ results). A no-op on finished jobs; None for unknown ids."""
92
+ job = self.jobs.get(job_id)
93
+ if job is None:
94
+ return None
95
+ if job.status == "running":
96
+ job.status = "cancelled"
97
+ process = self._processes.get(job_id)
98
+ if process is not None and process.returncode is None:
99
+ process.terminate()
100
+ return job
101
+
102
+ async def wait(self, job_id: str) -> CrawlJob:
103
+ await self._tasks[job_id]
104
+ return self.jobs[job_id]
105
+
106
+ def _publish(self, job_id: str, event: dict) -> None:
107
+ for queue in self._subscribers.get(job_id, []):
108
+ queue.put_nowait(event)
109
+
110
+ async def stream(self, job_id: str):
111
+ """Yield a job's events: a replay of everything so far, then live
112
+ page events, ending with the manifest (when one was produced).
113
+
114
+ The queue is attached in the same event-loop step as the replay
115
+ snapshot, so no event is missed or duplicated around the boundary.
116
+ """
117
+ job = self.jobs[job_id]
118
+ queue: asyncio.Queue = asyncio.Queue()
119
+ self._subscribers.setdefault(job_id, []).append(queue)
120
+ try:
121
+ replay = list(job.pages)
122
+ finished = job.status != "running"
123
+ for record in replay:
124
+ yield {"type": "page", **record}
125
+ if finished:
126
+ if job.manifest is not None:
127
+ yield {"type": "manifest", **job.manifest}
128
+ return
129
+ while True:
130
+ event = await queue.get()
131
+ if event["type"] == "end":
132
+ return
133
+ yield event
134
+ if event["type"] == "manifest":
135
+ return
136
+ finally:
137
+ self._subscribers[job_id].remove(queue)
138
+
139
+ async def _deliver(self, job: CrawlJob, url: str, payload: dict) -> None:
140
+ """Bounded-retry webhook POST; failures are recorded, never raised."""
141
+ import httpx
142
+
143
+ for attempt in range(3):
144
+ try:
145
+ async with httpx.AsyncClient(timeout=5) as client:
146
+ response = await client.post(url, json=payload)
147
+ if response.status_code < 400:
148
+ return
149
+ except httpx.HTTPError:
150
+ pass
151
+ await asyncio.sleep(0.05 * (attempt + 1))
152
+ job.webhook_failures += 1
153
+
154
+ async def _run(self, job: CrawlJob, args: list[str], webhook: str | None = None) -> None:
155
+ if webhook:
156
+ await self._deliver(
157
+ job, webhook, {"event": "started", "id": job.id, "seeds": job.seeds}
158
+ )
159
+ process = await asyncio.create_subprocess_exec(
160
+ sys.executable, "-m", "getdocs", *args,
161
+ stdout=asyncio.subprocess.PIPE,
162
+ stderr=asyncio.subprocess.PIPE,
163
+ limit=_STREAM_LIMIT,
164
+ )
165
+ self._processes[job.id] = process
166
+ async for line in process.stdout:
167
+ try:
168
+ record = json.loads(line)
169
+ except json.JSONDecodeError:
170
+ continue
171
+ if record.get("type") == "page":
172
+ page = {k: v for k, v in record.items() if k != "type"}
173
+ job.pages.append(page)
174
+ self._publish(job.id, record)
175
+ if webhook:
176
+ await self._deliver(
177
+ job, webhook, {"event": "page", "id": job.id, "page": page}
178
+ )
179
+ elif record.get("type") == "manifest":
180
+ job.manifest = {k: v for k, v in record.items() if k != "type"}
181
+ self._publish(job.id, record)
182
+ stderr = await process.stderr.read()
183
+ returncode = await process.wait()
184
+ if job.status == "cancelled":
185
+ pass # keep the cancelled status and partial pages
186
+ elif returncode == 0:
187
+ job.status = "completed"
188
+ else:
189
+ job.status = "failed"
190
+ job.error = stderr.decode(errors="replace").strip()[-2000:] or (
191
+ f"crawl exited with code {returncode}"
192
+ )
193
+ self._publish(job.id, {"type": "end", "status": job.status})
194
+ if webhook:
195
+ await self._deliver(
196
+ job,
197
+ webhook,
198
+ {
199
+ "event": "completed",
200
+ "id": job.id,
201
+ "status": job.status,
202
+ "manifest": job.manifest,
203
+ },
204
+ )
getdocs/navharvest.py ADDED
@@ -0,0 +1,242 @@
1
+ """Nav harvesting: capture the original site's Nav Order and Reading Order.
2
+
3
+ Per ADR-0004, three signals are harvested from each fetched Page's raw HTML
4
+ and merged at Crawl end: header tabs and sidebar trees build the Nav Order
5
+ (grouping, labels, nesting); prev/next link chains are authoritative for the
6
+ linear Reading Order. Everything is plain dicts so harvests serialize
7
+ directly into the resume state and the Manifest.
8
+
9
+ Node shape: {"title": str, "url": str | None, "children": [node, ...]}
10
+ Harvest shape: {"tree": [node], "tabs": [node], "prev": url?, "next": url?}
11
+ """
12
+
13
+ from urllib.parse import urljoin
14
+
15
+ from bs4 import BeautifulSoup
16
+
17
+ from getdocs.urlnorm import normalize
18
+
19
+ _TAB_SELECTORS = [
20
+ ".md-tabs a.md-tabs__link", # MkDocs Material tabs
21
+ "nav.navbar a.navbar__item[href]", # Docusaurus navbar
22
+ ]
23
+ _SIDEBAR_SELECTORS = [
24
+ "nav.md-nav--primary", # MkDocs Material
25
+ "nav.menu", # Docusaurus
26
+ "div.sphinxsidebarwrapper", # Sphinx
27
+ "aside nav",
28
+ "aside",
29
+ '[class*="sidebar"] nav',
30
+ ]
31
+ _PREV_SELECTORS = ['a[rel="prev"]', "a.md-footer__link--prev", "a.pagination-nav__link--prev"]
32
+ _NEXT_SELECTORS = ['a[rel="next"]', "a.md-footer__link--next", "a.pagination-nav__link--next"]
33
+
34
+
35
+ def _node(title: str, url: str | None, children: list) -> dict:
36
+ return {"title": title, "url": url, "children": children}
37
+
38
+
39
+ def _own_link(li, nested_ul):
40
+ for a in li.find_all("a", href=True):
41
+ if nested_ul is None or nested_ul not in a.parents:
42
+ return a
43
+ return None
44
+
45
+
46
+ def _parse_list(ul, page_url: str) -> list[dict]:
47
+ nodes = []
48
+ for li in ul.find_all("li", recursive=False):
49
+ nested = li.find("ul")
50
+ link = _own_link(li, nested)
51
+ if link is not None:
52
+ title = link.get_text(strip=True)
53
+ url = urljoin(page_url, link["href"])
54
+ else:
55
+ label = li.find("label")
56
+ title = (label or li).find(string=True, recursive=bool(label))
57
+ title = (title or "").strip()
58
+ url = None
59
+ children = _parse_list(nested, page_url) if nested else []
60
+ if title or children:
61
+ nodes.append(_node(title, url, children))
62
+ return nodes
63
+
64
+
65
+ def _first_href(soup, selectors: list[str], page_url: str) -> str | None:
66
+ for selector in selectors:
67
+ el = soup.select_one(selector)
68
+ if el is not None and el.get("href"):
69
+ return urljoin(page_url, el["href"])
70
+ return None
71
+
72
+
73
+ def harvest_nav(html: str, page_url: str) -> dict:
74
+ soup = BeautifulSoup(html, "html.parser")
75
+
76
+ tabs = []
77
+ for selector in _TAB_SELECTORS:
78
+ links = soup.select(selector)
79
+ if links:
80
+ tabs = [
81
+ _node(a.get_text(strip=True), urljoin(page_url, a["href"]), [])
82
+ for a in links
83
+ if a.get("href") and a.get_text(strip=True)
84
+ ]
85
+ break
86
+
87
+ tree: list[dict] = []
88
+ for selector in _SIDEBAR_SELECTORS:
89
+ container = soup.select_one(selector)
90
+ if container is not None:
91
+ ul = container if container.name == "ul" else container.find("ul")
92
+ if ul is not None:
93
+ tree = _parse_list(ul, page_url)
94
+ if tree:
95
+ break
96
+
97
+ return {
98
+ "tree": tree,
99
+ "tabs": tabs,
100
+ "prev": _first_href(soup, _PREV_SELECTORS, page_url),
101
+ "next": _first_href(soup, _NEXT_SELECTORS, page_url),
102
+ }
103
+
104
+
105
+ # -- merging ----------------------------------------------------------------
106
+
107
+
108
+ def _count_nodes(nodes: list[dict]) -> int:
109
+ return sum(1 + _count_nodes(n["children"]) for n in nodes)
110
+
111
+
112
+ def _index_tree(nodes: list[dict], index: dict) -> None:
113
+ for node in nodes:
114
+ if node["url"]:
115
+ index.setdefault(normalize(node["url"]), node)
116
+ _index_tree(node["children"], index)
117
+
118
+
119
+ def _first_seen_merge(skeleton: list[dict], index: dict, other: list[dict]) -> None:
120
+ """Attach nodes unseen by the skeleton under their (known) parent."""
121
+
122
+ def walk(nodes: list[dict], parent_children: list[dict]):
123
+ for node in nodes:
124
+ norm = normalize(node["url"]) if node["url"] else None
125
+ if norm and norm in index:
126
+ walk(node["children"], index[norm]["children"])
127
+ elif norm:
128
+ copy = _node(node["title"], node["url"], [])
129
+ parent_children.append(copy)
130
+ index[norm] = copy
131
+ walk(node["children"], copy["children"])
132
+ else:
133
+ walk(node["children"], parent_children)
134
+
135
+ walk(other, skeleton)
136
+
137
+
138
+ def _attach_tabs(tabs: list[dict], roots: list[dict]) -> list[dict]:
139
+ """Tabs become the top level; existing roots nest under the tab whose
140
+ URL path is their prefix. A root that IS a tab's page merges into it."""
141
+ tab_nodes = [_node(t["title"], t["url"], []) for t in tabs]
142
+ leftovers = []
143
+ for root in roots:
144
+ target = None
145
+ if root["url"]:
146
+ for tab in tab_nodes:
147
+ tab_path = (tab["url"] or "").rstrip("/")
148
+ if tab_path and (root["url"].rstrip("/") + "/").startswith(tab_path + "/"):
149
+ target = tab
150
+ break
151
+ if target is None:
152
+ leftovers.append(root)
153
+ elif root["url"] and tab_nodes and normalize(root["url"]) == normalize(target["url"]):
154
+ target["children"].extend(root["children"])
155
+ else:
156
+ target["children"].append(root)
157
+ return tab_nodes + leftovers
158
+
159
+
160
+ def _prune(nodes: list[dict], written: set[str]) -> list[dict]:
161
+ """Un-crawled nodes keep their label but lose the link; label-only nodes
162
+ without children are dropped."""
163
+ result = []
164
+ for node in nodes:
165
+ children = _prune(node["children"], written)
166
+ url = node["url"] if node["url"] and normalize(node["url"]) in written else None
167
+ if url is None and not children:
168
+ continue
169
+ result.append(_node(node["title"], url, children))
170
+ return result
171
+
172
+
173
+ def _traversal(nodes: list[dict]) -> list[str]:
174
+ urls = []
175
+ for node in nodes:
176
+ if node["url"]:
177
+ urls.append(node["url"])
178
+ urls.extend(_traversal(node["children"]))
179
+ return urls
180
+
181
+
182
+ def _chain_sequences(harvests: list[dict], written: set[str]) -> list[str]:
183
+ """Assemble prev/next links into ordered chains of normalized URLs."""
184
+ next_of: dict[str, str] = {}
185
+ has_incoming: set[str] = set()
186
+ for harvest in harvests:
187
+ page = normalize(harvest["page"])
188
+ if harvest.get("next"):
189
+ target = normalize(harvest["next"])
190
+ next_of.setdefault(page, target)
191
+ has_incoming.add(target)
192
+ if harvest.get("prev"):
193
+ source = normalize(harvest["prev"])
194
+ next_of.setdefault(source, page)
195
+ has_incoming.add(page)
196
+
197
+ sequence, visited = [], set()
198
+ heads = [p for p in next_of if p not in has_incoming]
199
+ for head in heads:
200
+ current: str | None = head
201
+ while current and current not in visited:
202
+ visited.add(current)
203
+ if current in written:
204
+ sequence.append(current)
205
+ current = next_of.get(current)
206
+ return sequence
207
+
208
+
209
+ def merge_harvests(
210
+ harvests: list[dict], written_urls: list[str]
211
+ ) -> tuple[list[dict], list[str]]:
212
+ """Merge per-page harvests into (nav tree, reading order).
213
+
214
+ harvests: [{"page": url, "tree": [...], "tabs": [...], "prev", "next"}]
215
+ written_urls: Pages actually written, in crawl order (original URLs).
216
+ """
217
+ written = {normalize(u) for u in written_urls}
218
+ original = {normalize(u): u for u in written_urls}
219
+
220
+ trees = [h for h in harvests if h["tree"]]
221
+ skeleton: list[dict] = []
222
+ if trees:
223
+ skeleton = max(trees, key=lambda h: _count_nodes(h["tree"]))["tree"]
224
+ index: dict = {}
225
+ _index_tree(skeleton, index)
226
+ for harvest in trees:
227
+ if harvest["tree"] is not skeleton:
228
+ _first_seen_merge(skeleton, index, harvest["tree"])
229
+
230
+ tabs = next((h["tabs"] for h in harvests if h["tabs"]), [])
231
+ if tabs:
232
+ skeleton = _attach_tabs(tabs, skeleton)
233
+
234
+ nav = _prune(skeleton, written)
235
+
236
+ ordered = _chain_sequences(harvests, written)
237
+ for url in _traversal(nav) + written_urls:
238
+ norm = normalize(url)
239
+ if norm in written and norm not in ordered:
240
+ ordered.append(norm)
241
+ reading_order = [original[norm] for norm in dict.fromkeys(ordered)]
242
+ return nav, reading_order
getdocs/output.py ADDED
@@ -0,0 +1,191 @@
1
+ """Output: Page records to a .md tree with YAML frontmatter, plus the Manifest."""
2
+
3
+ import json
4
+ import posixpath
5
+ import re
6
+ from dataclasses import asdict, dataclass
7
+ from pathlib import Path
8
+ from urllib.parse import unquote, urlsplit
9
+
10
+ import yaml
11
+
12
+ from getdocs.urlnorm import normalize
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class PageRecord:
17
+ url: str
18
+ title: str
19
+ markdown: str
20
+ status: int
21
+ crawled_at: str
22
+ canonical: str | None = None
23
+ html: str | None = None
24
+
25
+
26
+ class FileTreeWriter:
27
+ def __init__(self, output_dir: Path):
28
+ self.output_dir = Path(output_dir)
29
+ self.page_count = 0
30
+
31
+ def path_for(self, url: str) -> Path:
32
+ # Decode percent-escapes per segment so %20 doesn't end up in file
33
+ # names (and %2F can't smuggle in extra directory levels).
34
+ segments = [
35
+ unquote(segment).replace("/", "_")
36
+ for segment in urlsplit(url).path.split("/")
37
+ if segment
38
+ ]
39
+ path = "/".join(segments) or "index"
40
+ return self.output_dir / f"{path}.md"
41
+
42
+ def write_page(self, record: PageRecord) -> Path:
43
+ target = self.path_for(record.url)
44
+ target.parent.mkdir(parents=True, exist_ok=True)
45
+
46
+ frontmatter = {
47
+ k: v
48
+ for k, v in asdict(record).items()
49
+ if k not in ("markdown", "html") and v is not None
50
+ }
51
+ target.write_text(
52
+ "---\n"
53
+ + yaml.safe_dump(frontmatter, sort_keys=False)
54
+ + "---\n\n"
55
+ + record.markdown
56
+ + "\n"
57
+ )
58
+ if record.html is not None:
59
+ target.with_suffix(".html").write_text(record.html)
60
+ self.page_count += 1
61
+ return target
62
+
63
+ def write_manifest(
64
+ self,
65
+ seeds: list[str],
66
+ errors: list[dict] | None = None,
67
+ truncated: bool = False,
68
+ skipped: list[dict] | None = None,
69
+ shells: list[str] | None = None,
70
+ nav: list[dict] | None = None,
71
+ reading_order: list[str] | None = None,
72
+ media_skipped: list[dict] | None = None,
73
+ ) -> Path:
74
+ target = self.output_dir / "crawl.json"
75
+ target.parent.mkdir(parents=True, exist_ok=True)
76
+ target.write_text(
77
+ json.dumps(
78
+ {
79
+ "seeds": seeds,
80
+ "page_count": self.page_count,
81
+ "errors": errors or [],
82
+ "skipped": skipped or [],
83
+ "shells": shells or [],
84
+ "truncated": truncated,
85
+ "nav": nav or [],
86
+ "reading_order": reading_order or [],
87
+ "media_skipped": media_skipped or [],
88
+ },
89
+ indent=2,
90
+ )
91
+ + "\n"
92
+ )
93
+ return target
94
+
95
+
96
+ _MD_LINK_RE = re.compile(r"\((https?://[^)\s]+)\)")
97
+
98
+
99
+ def relink_pages(writer: FileTreeWriter, written_urls: list[str]) -> None:
100
+ """Rewrite links between crawled Pages into relative .md paths.
101
+
102
+ Runs at Crawl end, when the full set of written Pages is known. Links
103
+ to anything else — external sites, un-crawled pages, hotlinked media —
104
+ keep their absolute URLs.
105
+ """
106
+ targets = {
107
+ normalize(url): writer.path_for(url).relative_to(writer.output_dir).as_posix()
108
+ for url in written_urls
109
+ }
110
+
111
+ for url in written_urls:
112
+ page_path = writer.path_for(url)
113
+ page_dir = page_path.relative_to(writer.output_dir).parent.as_posix()
114
+
115
+ def rewrite(match):
116
+ link, _, fragment = match.group(1).partition("#")
117
+ target = targets.get(normalize(link))
118
+ if target is None:
119
+ return match.group(0)
120
+ relative = posixpath.relpath(target, start=page_dir)
121
+ return f"({relative}{'#' + fragment if fragment else ''})"
122
+
123
+ text = page_path.read_text()
124
+ rewritten = _MD_LINK_RE.sub(rewrite, text)
125
+ if rewritten != text:
126
+ page_path.write_text(rewritten)
127
+
128
+
129
+ class AssetStore:
130
+ """Downloaded Assets land under _media/<host>/<decoded path>."""
131
+
132
+ def __init__(self, output_dir: Path):
133
+ self.output_dir = Path(output_dir)
134
+
135
+ def save(self, url: str, body: bytes) -> str:
136
+ parts = urlsplit(url)
137
+ segments = [
138
+ unquote(s).replace("/", "_") for s in parts.path.split("/") if s
139
+ ] or ["asset"]
140
+ relpath = "/".join(["_media", parts.netloc, *segments])
141
+ target = self.output_dir / relpath
142
+ target.parent.mkdir(parents=True, exist_ok=True)
143
+ target.write_bytes(body)
144
+ return relpath
145
+
146
+
147
+ class JsonlWriter:
148
+ """One typed JSON record per line; the Manifest is the final record.
149
+
150
+ This stream is the process-boundary protocol the future API service
151
+ consumes (ADR-0002) — record shape changes are contract changes.
152
+ """
153
+
154
+ def __init__(self, stream):
155
+ self.stream = stream
156
+ self.page_count = 0
157
+
158
+ def _emit(self, record: dict) -> None:
159
+ self.stream.write(json.dumps(record, ensure_ascii=False) + "\n")
160
+ self.stream.flush()
161
+
162
+ def write_page(self, record: PageRecord) -> None:
163
+ fields = {k: v for k, v in asdict(record).items() if v is not None}
164
+ self._emit({"type": "page", **fields})
165
+ self.page_count += 1
166
+
167
+ def write_manifest(
168
+ self,
169
+ seeds: list[str],
170
+ errors: list[dict] | None = None,
171
+ truncated: bool = False,
172
+ skipped: list[dict] | None = None,
173
+ shells: list[str] | None = None,
174
+ nav: list[dict] | None = None,
175
+ reading_order: list[str] | None = None,
176
+ media_skipped: list[dict] | None = None,
177
+ ) -> None:
178
+ self._emit(
179
+ {
180
+ "type": "manifest",
181
+ "seeds": seeds,
182
+ "page_count": self.page_count,
183
+ "errors": errors or [],
184
+ "skipped": skipped or [],
185
+ "shells": shells or [],
186
+ "truncated": truncated,
187
+ "nav": nav or [],
188
+ "reading_order": reading_order or [],
189
+ "media_skipped": media_skipped or [],
190
+ }
191
+ )