offcourse 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/dist/cli/commands/config.js.map +1 -1
  2. package/dist/cli/commands/inspect.js +1 -1
  3. package/dist/cli/commands/inspect.js.map +1 -1
  4. package/dist/cli/commands/sync.d.ts +1 -2
  5. package/dist/cli/commands/sync.d.ts.map +1 -1
  6. package/dist/cli/commands/sync.js +13 -14
  7. package/dist/cli/commands/sync.js.map +1 -1
  8. package/dist/cli/commands/syncHighLevel.d.ts +1 -2
  9. package/dist/cli/commands/syncHighLevel.d.ts.map +1 -1
  10. package/dist/cli/commands/syncHighLevel.js +4 -8
  11. package/dist/cli/commands/syncHighLevel.js.map +1 -1
  12. package/dist/cli/index.js +1 -1
  13. package/dist/cli/index.js.map +1 -1
  14. package/dist/config/configManager.d.ts.map +1 -1
  15. package/dist/config/configManager.js +4 -0
  16. package/dist/config/configManager.js.map +1 -1
  17. package/dist/downloader/hlsDownloader.d.ts.map +1 -1
  18. package/dist/downloader/hlsDownloader.js +23 -14
  19. package/dist/downloader/hlsDownloader.js.map +1 -1
  20. package/dist/downloader/hlsValidator.d.ts.map +1 -1
  21. package/dist/downloader/hlsValidator.js +6 -2
  22. package/dist/downloader/hlsValidator.js.map +1 -1
  23. package/dist/downloader/index.d.ts +3 -0
  24. package/dist/downloader/index.d.ts.map +1 -1
  25. package/dist/downloader/index.js +3 -0
  26. package/dist/downloader/index.js.map +1 -1
  27. package/dist/downloader/loomDownloader.d.ts.map +1 -1
  28. package/dist/downloader/loomDownloader.js +23 -20
  29. package/dist/downloader/loomDownloader.js.map +1 -1
  30. package/dist/downloader/queue.d.ts +4 -4
  31. package/dist/downloader/queue.d.ts.map +1 -1
  32. package/dist/downloader/queue.js.map +1 -1
  33. package/dist/downloader/vimeoDownloader.d.ts.map +1 -1
  34. package/dist/downloader/vimeoDownloader.js +7 -3
  35. package/dist/downloader/vimeoDownloader.js.map +1 -1
  36. package/dist/scraper/extractor.d.ts +4 -0
  37. package/dist/scraper/extractor.d.ts.map +1 -1
  38. package/dist/scraper/extractor.js +79 -79
  39. package/dist/scraper/extractor.js.map +1 -1
  40. package/dist/scraper/highlevel/extractor.d.ts +11 -19
  41. package/dist/scraper/highlevel/extractor.d.ts.map +1 -1
  42. package/dist/scraper/highlevel/extractor.js +72 -85
  43. package/dist/scraper/highlevel/extractor.js.map +1 -1
  44. package/dist/scraper/highlevel/navigator.d.ts +3 -10
  45. package/dist/scraper/highlevel/navigator.d.ts.map +1 -1
  46. package/dist/scraper/highlevel/navigator.js +140 -127
  47. package/dist/scraper/highlevel/navigator.js.map +1 -1
  48. package/dist/scraper/highlevel/schemas.d.ts +188 -0
  49. package/dist/scraper/highlevel/schemas.d.ts.map +1 -0
  50. package/dist/scraper/highlevel/schemas.js +139 -0
  51. package/dist/scraper/highlevel/schemas.js.map +1 -0
  52. package/dist/scraper/navigator.d.ts +14 -11
  53. package/dist/scraper/navigator.d.ts.map +1 -1
  54. package/dist/scraper/navigator.js +61 -104
  55. package/dist/scraper/navigator.js.map +1 -1
  56. package/dist/scraper/schemas.d.ts +57 -0
  57. package/dist/scraper/schemas.d.ts.map +1 -0
  58. package/dist/scraper/schemas.js +135 -0
  59. package/dist/scraper/schemas.js.map +1 -0
  60. package/dist/scraper/videoInterceptor.d.ts +4 -0
  61. package/dist/scraper/videoInterceptor.d.ts.map +1 -1
  62. package/dist/scraper/videoInterceptor.js +66 -51
  63. package/dist/scraper/videoInterceptor.js.map +1 -1
  64. package/dist/shared/auth.d.ts +9 -9
  65. package/dist/shared/auth.d.ts.map +1 -1
  66. package/dist/shared/auth.js +24 -38
  67. package/dist/shared/auth.js.map +1 -1
  68. package/dist/shared/firebase.d.ts +60 -0
  69. package/dist/shared/firebase.d.ts.map +1 -0
  70. package/dist/shared/firebase.js +102 -0
  71. package/dist/shared/firebase.js.map +1 -0
  72. package/dist/shared/fs.d.ts.map +1 -1
  73. package/dist/shared/fs.js +4 -0
  74. package/dist/shared/fs.js.map +1 -1
  75. package/dist/shared/index.d.ts +3 -0
  76. package/dist/shared/index.d.ts.map +1 -1
  77. package/dist/shared/index.js +3 -0
  78. package/dist/shared/index.js.map +1 -1
  79. package/dist/shared/slug.d.ts +11 -0
  80. package/dist/shared/slug.d.ts.map +1 -0
  81. package/{src/shared/slug.ts → dist/shared/slug.js} +10 -11
  82. package/dist/shared/slug.js.map +1 -0
  83. package/dist/shared/url.d.ts +43 -0
  84. package/dist/shared/url.d.ts.map +1 -0
  85. package/{src/shared/url.ts → dist/shared/url.js} +12 -15
  86. package/dist/shared/url.js.map +1 -0
  87. package/dist/state/database.d.ts +1 -0
  88. package/dist/state/database.d.ts.map +1 -1
  89. package/dist/state/database.js +3 -0
  90. package/dist/state/database.js.map +1 -1
  91. package/dist/storage/fileSystem.d.ts +17 -17
  92. package/dist/storage/fileSystem.d.ts.map +1 -1
  93. package/dist/storage/fileSystem.js +39 -31
  94. package/dist/storage/fileSystem.js.map +1 -1
  95. package/package.json +5 -2
  96. package/.github/workflows/ci.yml +0 -50
  97. package/.husky/commit-msg +0 -2
  98. package/.husky/pre-commit +0 -1
  99. package/.husky/pre-push +0 -3
  100. package/.prettierrc +0 -8
  101. package/.release-it.json +0 -23
  102. package/ARCHITECTURE.md +0 -233
  103. package/CHANGELOG.md +0 -78
  104. package/commitlint.config.js +0 -4
  105. package/dist/ai/openRouter.d.ts +0 -47
  106. package/dist/ai/openRouter.d.ts.map +0 -1
  107. package/dist/ai/openRouter.js +0 -116
  108. package/dist/ai/openRouter.js.map +0 -1
  109. package/dist/ai/transcriptPolisher.d.ts +0 -24
  110. package/dist/ai/transcriptPolisher.d.ts.map +0 -1
  111. package/dist/ai/transcriptPolisher.js +0 -89
  112. package/dist/ai/transcriptPolisher.js.map +0 -1
  113. package/dist/cli/commands/enrich.d.ts +0 -14
  114. package/dist/cli/commands/enrich.d.ts.map +0 -1
  115. package/dist/cli/commands/enrich.js +0 -271
  116. package/dist/cli/commands/enrich.js.map +0 -1
  117. package/dist/cli/commands/syncGhl.d.ts +0 -20
  118. package/dist/cli/commands/syncGhl.d.ts.map +0 -1
  119. package/dist/cli/commands/syncGhl.js +0 -483
  120. package/dist/cli/commands/syncGhl.js.map +0 -1
  121. package/dist/cli/commands/syncHighLevel.test.d.ts +0 -2
  122. package/dist/cli/commands/syncHighLevel.test.d.ts.map +0 -1
  123. package/dist/cli/commands/syncHighLevel.test.js +0 -102
  124. package/dist/cli/commands/syncHighLevel.test.js.map +0 -1
  125. package/dist/config/paths.test.d.ts +0 -2
  126. package/dist/config/paths.test.d.ts.map +0 -1
  127. package/dist/config/paths.test.js +0 -70
  128. package/dist/config/paths.test.js.map +0 -1
  129. package/dist/config/schema.test.d.ts +0 -2
  130. package/dist/config/schema.test.d.ts.map +0 -1
  131. package/dist/config/schema.test.js +0 -151
  132. package/dist/config/schema.test.js.map +0 -1
  133. package/dist/downloader/hlsDownloader.test.d.ts +0 -2
  134. package/dist/downloader/hlsDownloader.test.d.ts.map +0 -1
  135. package/dist/downloader/hlsDownloader.test.js +0 -116
  136. package/dist/downloader/hlsDownloader.test.js.map +0 -1
  137. package/dist/downloader/loomDownloader.test.d.ts +0 -2
  138. package/dist/downloader/loomDownloader.test.d.ts.map +0 -1
  139. package/dist/downloader/loomDownloader.test.js +0 -36
  140. package/dist/downloader/loomDownloader.test.js.map +0 -1
  141. package/dist/downloader/queue.test.d.ts +0 -2
  142. package/dist/downloader/queue.test.d.ts.map +0 -1
  143. package/dist/downloader/queue.test.js +0 -158
  144. package/dist/downloader/queue.test.js.map +0 -1
  145. package/dist/downloader/videoDownloader.d.ts +0 -32
  146. package/dist/downloader/videoDownloader.d.ts.map +0 -1
  147. package/dist/downloader/videoDownloader.js +0 -173
  148. package/dist/downloader/videoDownloader.js.map +0 -1
  149. package/dist/downloader/vimeoDownloader.test.d.ts +0 -2
  150. package/dist/downloader/vimeoDownloader.test.d.ts.map +0 -1
  151. package/dist/downloader/vimeoDownloader.test.js +0 -51
  152. package/dist/downloader/vimeoDownloader.test.js.map +0 -1
  153. package/dist/scraper/auth.d.ts +0 -29
  154. package/dist/scraper/auth.d.ts.map +0 -1
  155. package/dist/scraper/auth.js +0 -115
  156. package/dist/scraper/auth.js.map +0 -1
  157. package/dist/scraper/extractor.test.d.ts +0 -2
  158. package/dist/scraper/extractor.test.d.ts.map +0 -1
  159. package/dist/scraper/extractor.test.js +0 -65
  160. package/dist/scraper/extractor.test.js.map +0 -1
  161. package/dist/scraper/ghl/auth.d.ts +0 -25
  162. package/dist/scraper/ghl/auth.d.ts.map +0 -1
  163. package/dist/scraper/ghl/auth.js +0 -187
  164. package/dist/scraper/ghl/auth.js.map +0 -1
  165. package/dist/scraper/ghl/extractor.d.ts +0 -96
  166. package/dist/scraper/ghl/extractor.d.ts.map +0 -1
  167. package/dist/scraper/ghl/extractor.js +0 -345
  168. package/dist/scraper/ghl/extractor.js.map +0 -1
  169. package/dist/scraper/ghl/index.d.ts +0 -4
  170. package/dist/scraper/ghl/index.d.ts.map +0 -1
  171. package/dist/scraper/ghl/index.js +0 -4
  172. package/dist/scraper/ghl/index.js.map +0 -1
  173. package/dist/scraper/ghl/navigator.d.ts +0 -93
  174. package/dist/scraper/ghl/navigator.d.ts.map +0 -1
  175. package/dist/scraper/ghl/navigator.js +0 -447
  176. package/dist/scraper/ghl/navigator.js.map +0 -1
  177. package/dist/scraper/highlevel/auth.d.ts +0 -25
  178. package/dist/scraper/highlevel/auth.d.ts.map +0 -1
  179. package/dist/scraper/highlevel/auth.js +0 -189
  180. package/dist/scraper/highlevel/auth.js.map +0 -1
  181. package/dist/scraper/highlevel/extractor.test.d.ts +0 -2
  182. package/dist/scraper/highlevel/extractor.test.d.ts.map +0 -1
  183. package/dist/scraper/highlevel/extractor.test.js +0 -101
  184. package/dist/scraper/highlevel/extractor.test.js.map +0 -1
  185. package/dist/scraper/highlevel/navigator.test.d.ts +0 -2
  186. package/dist/scraper/highlevel/navigator.test.d.ts.map +0 -1
  187. package/dist/scraper/highlevel/navigator.test.js +0 -78
  188. package/dist/scraper/highlevel/navigator.test.js.map +0 -1
  189. package/dist/scraper/navigator.test.d.ts +0 -2
  190. package/dist/scraper/navigator.test.d.ts.map +0 -1
  191. package/dist/scraper/navigator.test.js +0 -63
  192. package/dist/scraper/navigator.test.js.map +0 -1
  193. package/dist/scraper/skoolApi.d.ts +0 -17
  194. package/dist/scraper/skoolApi.d.ts.map +0 -1
  195. package/dist/scraper/skoolApi.js +0 -72
  196. package/dist/scraper/skoolApi.js.map +0 -1
  197. package/dist/state/database.test.d.ts +0 -2
  198. package/dist/state/database.test.d.ts.map +0 -1
  199. package/dist/state/database.test.js +0 -34
  200. package/dist/state/database.test.js.map +0 -1
  201. package/dist/transcription/whisperService.d.ts +0 -27
  202. package/dist/transcription/whisperService.d.ts.map +0 -1
  203. package/dist/transcription/whisperService.js +0 -102
  204. package/dist/transcription/whisperService.js.map +0 -1
  205. package/eslint.config.js +0 -55
  206. package/src/__fixtures__/highlevel-post-response.json +0 -68
  207. package/src/__fixtures__/hls-master-playlist.m3u8 +0 -24
  208. package/src/cli/commands/__snapshots__/syncHighLevel.test.ts.snap +0 -38
  209. package/src/cli/commands/config.ts +0 -74
  210. package/src/cli/commands/inspect.ts +0 -441
  211. package/src/cli/commands/login.ts +0 -68
  212. package/src/cli/commands/status.ts +0 -147
  213. package/src/cli/commands/sync.ts +0 -1235
  214. package/src/cli/commands/syncHighLevel.test.ts +0 -144
  215. package/src/cli/commands/syncHighLevel.ts +0 -639
  216. package/src/cli/index.ts +0 -121
  217. package/src/config/configManager.ts +0 -75
  218. package/src/config/paths.test.ts +0 -83
  219. package/src/config/paths.ts +0 -36
  220. package/src/config/schema.test.ts +0 -173
  221. package/src/config/schema.ts +0 -65
  222. package/src/downloader/hlsDownloader.test.ts +0 -148
  223. package/src/downloader/hlsDownloader.ts +0 -327
  224. package/src/downloader/hlsValidator.ts +0 -196
  225. package/src/downloader/index.ts +0 -122
  226. package/src/downloader/loomDownloader.test.ts +0 -43
  227. package/src/downloader/loomDownloader.ts +0 -742
  228. package/src/downloader/queue.test.ts +0 -199
  229. package/src/downloader/queue.ts +0 -118
  230. package/src/downloader/vimeoDownloader.test.ts +0 -62
  231. package/src/downloader/vimeoDownloader.ts +0 -722
  232. package/src/scraper/extractor.test.ts +0 -124
  233. package/src/scraper/extractor.ts +0 -757
  234. package/src/scraper/highlevel/__snapshots__/extractor.test.ts.snap +0 -41
  235. package/src/scraper/highlevel/extractor.test.ts +0 -134
  236. package/src/scraper/highlevel/extractor.ts +0 -537
  237. package/src/scraper/highlevel/index.ts +0 -2
  238. package/src/scraper/highlevel/navigator.test.ts +0 -110
  239. package/src/scraper/highlevel/navigator.ts +0 -668
  240. package/src/scraper/highlevel/schemas.ts +0 -183
  241. package/src/scraper/navigator.test.ts +0 -122
  242. package/src/scraper/navigator.ts +0 -355
  243. package/src/scraper/schemas.ts +0 -177
  244. package/src/scraper/videoInterceptor.ts +0 -435
  245. package/src/shared/auth.test.ts +0 -58
  246. package/src/shared/auth.ts +0 -251
  247. package/src/shared/firebase.ts +0 -151
  248. package/src/shared/fs.ts +0 -80
  249. package/src/shared/http.ts +0 -34
  250. package/src/shared/index.ts +0 -6
  251. package/src/shared/url.test.ts +0 -122
  252. package/src/state/database.test.ts +0 -49
  253. package/src/state/database.ts +0 -919
  254. package/src/state/index.ts +0 -14
  255. package/src/storage/fileSystem.test.ts +0 -64
  256. package/src/storage/fileSystem.ts +0 -175
  257. package/tsconfig.json +0 -28
  258. package/vitest.config.ts +0 -29
@@ -1,757 +0,0 @@
1
- import type { Page } from "playwright";
2
- import TurndownService from "turndown";
3
- import {
4
- parseNextData,
5
- extractVideoFromNextData as extractVideoFromNextDataSchema,
6
- } from "./schemas.js";
7
-
8
- export interface DownloadableFile {
9
- url: string;
10
- filename: string;
11
- type: "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "zip" | "other";
12
- }
13
-
14
- export interface LessonContent {
15
- title: string;
16
- videoUrl: string | null;
17
- videoType: "loom" | "vimeo" | "youtube" | "wistia" | "native" | "unknown" | null;
18
- htmlContent: string;
19
- markdownContent: string;
20
- downloadableFiles: DownloadableFile[];
21
- }
22
-
23
- // Initialize Turndown for HTML to Markdown conversion
24
- const turndown = new TurndownService({
25
- headingStyle: "atx",
26
- codeBlockStyle: "fenced",
27
- bulletListMarker: "-",
28
- });
29
-
30
- // Custom rule to handle images
31
- turndown.addRule("images", {
32
- filter: "img",
33
- replacement: (_content, node) => {
34
- const img = node as HTMLImageElement;
35
- const alt = img.alt || "";
36
- const src = img.src || "";
37
- return `![${alt}](${src})`;
38
- },
39
- });
40
-
41
- // Custom rule to preserve links
42
- turndown.addRule("links", {
43
- filter: "a",
44
- replacement: (content, node) => {
45
- const anchor = node as HTMLAnchorElement;
46
- const href = anchor.href || "";
47
- if (!href || href === content) {
48
- return content;
49
- }
50
- return `[${content}](${href})`;
51
- },
52
- });
53
-
54
- // Browser automation functions - require Playwright
55
- /* v8 ignore start */
56
-
57
- /**
58
- * Checks if there's a video preview/thumbnail that needs to be clicked to load the video.
59
- */
60
- async function tryClickVideoPreview(page: Page): Promise<boolean> {
61
- // First, try Skool's specific styled-components pattern for video players
62
- // These have classes like "styled__VideoPlayerWrapper-sc-xxx" and "styled__PlaybackButton-sc-xxx"
63
-
64
- // Strategy 0: Direct click on Skool's VideoPlayerWrapper or PlaybackButton
65
- const skoolClicked = await page.evaluate(() => {
66
- // Find the playback button directly
67
- const playbackButton = document.querySelector('[class*="PlaybackButton"]');
68
- if (playbackButton && playbackButton instanceof HTMLElement) {
69
- const rect = playbackButton.getBoundingClientRect();
70
- if (rect.width > 0 && rect.height > 0) {
71
- playbackButton.click();
72
- return { clicked: true, selector: "PlaybackButton" };
73
- }
74
- }
75
-
76
- // Or try clicking the video player wrapper
77
- const videoWrapper = document.querySelector('[class*="VideoPlayerWrapper"]');
78
- if (videoWrapper && videoWrapper instanceof HTMLElement) {
79
- const rect = videoWrapper.getBoundingClientRect();
80
- if (rect.width > 200 && rect.height > 100) {
81
- videoWrapper.click();
82
- return { clicked: true, selector: "VideoPlayerWrapper" };
83
- }
84
- }
85
-
86
- // Try styled-components video patterns
87
- const styledVideo = document.querySelector('[class*="styled__Video"]');
88
- if (styledVideo && styledVideo instanceof HTMLElement) {
89
- styledVideo.click();
90
- return { clicked: true, selector: "styled__Video" };
91
- }
92
-
93
- return { clicked: false };
94
- });
95
-
96
- if (skoolClicked.clicked) {
97
- // Wait for iframe to appear after click
98
- try {
99
- await page.waitForSelector(
100
- 'iframe[src*="loom.com"], iframe[src*="vimeo"], iframe[src*="youtube"], video',
101
- {
102
- timeout: 5000,
103
- }
104
- );
105
- } catch {
106
- // Timeout is fine, we'll check for video anyway
107
- }
108
- // Extra wait for iframe content to load
109
- await page.waitForTimeout(1000);
110
- return true;
111
- }
112
-
113
- // Strategy 1: Look for elements that contain "loom" in href or data attributes
114
- const loomClicked = await page.evaluate(() => {
115
- // Find any anchor or element with loom URL
116
- const loomLink = document.querySelector('a[href*="loom.com"]');
117
- if (loomLink && loomLink instanceof HTMLElement) {
118
- // Try to find a parent container that might be the video preview
119
- let container = loomLink.parentElement;
120
- for (let i = 0; i < 5 && container; i++) {
121
- const rect = container.getBoundingClientRect();
122
- if (rect.width > 300 && rect.height > 150) {
123
- container.click();
124
- return true;
125
- }
126
- container = container.parentElement;
127
- }
128
- loomLink.click();
129
- return true;
130
- }
131
-
132
- // Look for elements with loom-related classes or data
133
- const loomElements = Array.from(
134
- document.querySelectorAll('[class*="loom"], [data-loom], [data-video-provider="loom"]')
135
- );
136
- for (const el of loomElements) {
137
- if (el instanceof HTMLElement) {
138
- const rect = el.getBoundingClientRect();
139
- if (rect.width > 100 && rect.height > 50) {
140
- el.click();
141
- return true;
142
- }
143
- }
144
- }
145
-
146
- return false;
147
- });
148
-
149
- if (loomClicked) {
150
- await page.waitForTimeout(2500);
151
- return true;
152
- }
153
-
154
- // Strategy 2: Look for play button overlays on large elements
155
- const playClicked = await page.evaluate(() => {
156
- // Find all elements that look like play buttons (including styled-components patterns)
157
- const playButtons = Array.from(
158
- document.querySelectorAll(
159
- '[class*="play" i], [class*="Play"], [class*="Playback"], svg[class*="play" i], [aria-label*="play" i]'
160
- )
161
- );
162
-
163
- for (const btn of playButtons) {
164
- if (btn instanceof HTMLElement || btn instanceof SVGElement) {
165
- const rect = btn.getBoundingClientRect();
166
- // Play buttons are usually visible and reasonably sized
167
- if (rect.width > 20 && rect.height > 20 && rect.top > 0 && rect.left > 0) {
168
- // Check if this is inside a large container (video preview)
169
- let parent = btn.parentElement;
170
- for (let i = 0; i < 5 && parent; i++) {
171
- const parentRect = parent.getBoundingClientRect();
172
- if (parentRect.width > 300 && parentRect.height > 150) {
173
- // This looks like a video container - click the play button
174
- if (btn instanceof HTMLElement) {
175
- btn.click();
176
- } else {
177
- // For SVG, try to click the parent
178
- parent.click();
179
- }
180
- return true;
181
- }
182
- parent = parent.parentElement;
183
- }
184
- }
185
- }
186
- }
187
- return false;
188
- });
189
-
190
- if (playClicked) {
191
- await page.waitForTimeout(2500);
192
- return true;
193
- }
194
-
195
- // Strategy 3: Look for large clickable images/thumbnails
196
- const thumbnailClicked = await page.evaluate(() => {
197
- // Find large images that might be video thumbnails
198
- const images = Array.from(document.querySelectorAll("img"));
199
-
200
- for (const img of images) {
201
- const rect = img.getBoundingClientRect();
202
- // Video thumbnails are typically 16:9 or similar aspect ratio
203
- if (rect.width > 400 && rect.height > 200) {
204
- // Check if image has a sibling or parent with play icon
205
- const parent = img.parentElement;
206
- if (parent) {
207
- const hasPLay = parent.querySelector('[class*="play" i], svg');
208
- if (hasPLay || parent.className.toLowerCase().includes("video")) {
209
- parent.click();
210
- return true;
211
- }
212
- }
213
- }
214
- }
215
-
216
- return false;
217
- });
218
-
219
- if (thumbnailClicked) {
220
- await page.waitForTimeout(2500);
221
- return true;
222
- }
223
-
224
- // Strategy 4: Common patterns for video preview overlays
225
- const previewSelectors = [
226
- // Skool-specific patterns
227
- '[class*="VideoWrapper"]',
228
- '[class*="video-wrapper"]',
229
- '[class*="video-container"]',
230
- '[class*="VideoContainer"]',
231
- '[class*="player-wrapper"]',
232
- '[class*="embed-wrapper"]',
233
- // Generic patterns
234
- '[class*="video-preview"]',
235
- '[class*="VideoPreview"]',
236
- '[class*="video-thumbnail"]',
237
- '[class*="poster"]',
238
- // Data attribute patterns
239
- "[data-video-id]",
240
- "[data-video-url]",
241
- "[data-embed]",
242
- ];
243
-
244
- for (const selector of previewSelectors) {
245
- try {
246
- const element = await page.$(selector);
247
- if (element) {
248
- const isVisible = await element.isVisible();
249
- if (isVisible) {
250
- const box = await element.boundingBox();
251
- if (box && box.width > 200 && box.height > 100) {
252
- await element.click();
253
- await page.waitForTimeout(2500);
254
- return true;
255
- }
256
- }
257
- }
258
- } catch {
259
- // Selector not found or not clickable, try next
260
- }
261
- }
262
-
263
- return false;
264
- }
265
-
266
- /**
267
- * Extracts the video URL from the current lesson page.
268
- * Supports Loom, Vimeo, YouTube, Wistia, and native video.
269
- *
270
- * For Vimeo: Prefers iframe src (has auth params) over __NEXT_DATA__ URL.
271
- * For others: Uses __NEXT_DATA__ first, then falls back to DOM inspection.
272
- */
273
- export async function extractVideoUrl(
274
- page: Page
275
- ): Promise<{ url: string | null; type: LessonContent["videoType"] }> {
276
- // First: Check for iframe with full URL (includes auth params for Vimeo)
277
- const iframeVideo = await extractVideoFromIframe(page);
278
-
279
- // If it's Vimeo, prefer iframe URL as it has the auth hash
280
- if (iframeVideo.url && iframeVideo.type === "vimeo") {
281
- return iframeVideo;
282
- }
283
-
284
- // Try extracting from __NEXT_DATA__ (Skool embeds video URLs here)
285
- const nextDataVideo = await extractVideoFromNextData(page);
286
- if (nextDataVideo.url) {
287
- // For Vimeo, check if iframe has additional params we're missing
288
- if (nextDataVideo.type === "vimeo" && iframeVideo.url) {
289
- return iframeVideo; // iframe has the full URL with hash
290
- }
291
- return nextDataVideo;
292
- }
293
-
294
- // Check for already loaded video in DOM
295
- let videoInfo = await findVideoInPage(page);
296
-
297
- // If no video found, try clicking preview to trigger lazy load
298
- if (!videoInfo.url) {
299
- const clicked = await tryClickVideoPreview(page);
300
- if (clicked) {
301
- // Re-check for video after clicking
302
- videoInfo = await findVideoInPage(page);
303
- }
304
- }
305
-
306
- return videoInfo;
307
- }
308
-
309
- /**
310
- * Extracts video URL directly from iframe src attribute.
311
- * This captures the full URL including auth parameters.
312
- */
313
- async function extractVideoFromIframe(
314
- page: Page
315
- ): Promise<{ url: string | null; type: LessonContent["videoType"] }> {
316
- return page.evaluate(() => {
317
- // Check for Vimeo iframe (prioritize this for auth params)
318
- const vimeoIframe = document.querySelector('iframe[src*="vimeo.com"]');
319
- if (vimeoIframe) {
320
- const src = (vimeoIframe as HTMLIFrameElement).src;
321
- if (src) {
322
- return { url: src, type: "vimeo" as const };
323
- }
324
- }
325
-
326
- // Check for Loom iframe
327
- const loomIframe = document.querySelector('iframe[src*="loom.com"]');
328
- if (loomIframe) {
329
- const src = (loomIframe as HTMLIFrameElement).src;
330
- if (src) {
331
- return { url: src, type: "loom" as const };
332
- }
333
- }
334
-
335
- // Check for YouTube iframe
336
- const ytIframe = document.querySelector('iframe[src*="youtube.com"], iframe[src*="youtu.be"]');
337
- if (ytIframe) {
338
- const src = (ytIframe as HTMLIFrameElement).src;
339
- if (src) {
340
- return { url: src, type: "youtube" as const };
341
- }
342
- }
343
-
344
- return { url: null, type: null };
345
- });
346
- }
347
-
348
- /**
349
- * Extracts video URL from Skool's __NEXT_DATA__ JSON.
350
- * This is the most reliable method as Skool stores video metadata here.
351
- *
352
- * IMPORTANT: __NEXT_DATA__ is only updated on full page loads, not SPA navigation.
353
- * Our sync uses page.goto() which triggers full loads, so this is fine.
354
- * We also verify the module ID matches the current URL to detect stale data.
355
- */
356
- async function extractVideoFromNextData(
357
- page: Page
358
- ): Promise<{ url: string | null; type: LessonContent["videoType"] }> {
359
- // Get raw data from browser
360
- const { nextDataJson, urlModuleId } = await page.evaluate(() => {
361
- const nextDataScript = document.querySelector("#__NEXT_DATA__");
362
- const json = nextDataScript?.textContent ?? null;
363
-
364
- // Get URL module ID to verify freshness
365
- const urlParams = new URLSearchParams(window.location.search);
366
- const urlMd = urlParams.get("md");
367
-
368
- return {
369
- nextDataJson: json,
370
- urlModuleId: urlMd,
371
- };
372
- });
373
-
374
- if (!nextDataJson) {
375
- return { url: null, type: null };
376
- }
377
-
378
- // Parse and validate in Node context
379
- const parsed = parseNextData(nextDataJson);
380
- if (!parsed) {
381
- return { url: null, type: null };
382
- }
383
-
384
- const selectedModuleId = parsed.props?.pageProps?.selectedModule;
385
- if (!selectedModuleId) {
386
- return { url: null, type: null };
387
- }
388
-
389
- // Verify this matches the current URL (detect stale __NEXT_DATA__ from SPA navigation)
390
- if (urlModuleId && urlModuleId !== selectedModuleId) {
391
- // __NEXT_DATA__ is stale (SPA navigation happened), don't trust it
392
- return { url: null, type: null };
393
- }
394
-
395
- // Extract video using validated data
396
- const videoInfo = extractVideoFromNextDataSchema(parsed, selectedModuleId);
397
- if (videoInfo) {
398
- return { url: videoInfo.url, type: videoInfo.type };
399
- }
400
-
401
- return { url: null, type: null };
402
- }
403
-
404
- /**
405
- * Finds video URL in the current page state.
406
- */
407
- async function findVideoInPage(
408
- page: Page
409
- ): Promise<{ url: string | null; type: LessonContent["videoType"] }> {
410
- const videoInfo = await page.evaluate(() => {
411
- // Check for Loom iframe (most common on Skool)
412
- const loomIframe = document.querySelector('iframe[src*="loom.com"]');
413
- if (loomIframe) {
414
- return { url: (loomIframe as HTMLIFrameElement).src, type: "loom" as const };
415
- }
416
-
417
- // Check for Loom embed link (sometimes used instead of iframe)
418
- const loomLinks = Array.from(document.querySelectorAll('a[href*="loom.com"]'));
419
- for (const link of loomLinks) {
420
- const href = (link as HTMLAnchorElement).href;
421
- if (href.includes("/share/") || href.includes("/embed/")) {
422
- // Convert share URL to embed URL
423
- const embedUrl = href.replace("/share/", "/embed/");
424
- return { url: embedUrl, type: "loom" as const };
425
- }
426
- }
427
-
428
- // Check for Loom data attributes
429
- const loomEmbed = document.querySelector("[data-loom-id]");
430
- if (loomEmbed) {
431
- const loomId = loomEmbed.getAttribute("data-loom-id");
432
- if (loomId) {
433
- return { url: `https://www.loom.com/embed/${loomId}`, type: "loom" as const };
434
- }
435
- }
436
-
437
- // Check for Loom URLs in any element's attributes or content
438
- const allElements = Array.from(document.querySelectorAll("*"));
439
- for (const el of allElements) {
440
- // Check data attributes
441
- const attrs = Array.from(el.attributes);
442
- for (const attr of attrs) {
443
- if (attr.value.includes("loom.com/share/") || attr.value.includes("loom.com/embed/")) {
444
- const match = /loom\.com\/(share|embed)\/([a-f0-9]+)/.exec(attr.value);
445
- if (match?.[2]) {
446
- return { url: `https://www.loom.com/embed/${match[2]}`, type: "loom" as const };
447
- }
448
- }
449
- }
450
- }
451
-
452
- // Check for Vimeo iframe
453
- const vimeoIframe = document.querySelector('iframe[src*="vimeo.com"]');
454
- if (vimeoIframe) {
455
- return { url: (vimeoIframe as HTMLIFrameElement).src, type: "vimeo" as const };
456
- }
457
-
458
- // Check for YouTube iframe
459
- const youtubeIframe = document.querySelector(
460
- 'iframe[src*="youtube.com"], iframe[src*="youtu.be"]'
461
- );
462
- if (youtubeIframe) {
463
- return { url: (youtubeIframe as HTMLIFrameElement).src, type: "youtube" as const };
464
- }
465
-
466
- // Check for Wistia
467
- const wistiaVideo = document.querySelector('[class*="wistia"]');
468
- if (wistiaVideo) {
469
- const wistiaId = /wistia_embed wistia_async_(\w+)/.exec(wistiaVideo.className);
470
- if (wistiaId?.[1]) {
471
- return {
472
- url: `https://fast.wistia.net/embed/medias/${wistiaId[1]}`,
473
- type: "wistia" as const,
474
- };
475
- }
476
- }
477
-
478
- // Check for HTML5 video
479
- const videoElement = document.querySelector("video");
480
- if (videoElement) {
481
- const source = videoElement.querySelector("source");
482
- const src = source?.src ?? videoElement.src;
483
- if (src) {
484
- return { url: src, type: "native" as const };
485
- }
486
- }
487
-
488
- // Check for any iframe that might be a video player
489
- const iframes = Array.from(document.querySelectorAll("iframe"));
490
- for (const iframe of iframes) {
491
- const src = iframe.src;
492
- if (
493
- src &&
494
- !src.includes("stripe.com") &&
495
- (src.includes("embed") || src.includes("player") || src.includes("video"))
496
- ) {
497
- return { url: src, type: "unknown" as const };
498
- }
499
- }
500
-
501
- // Last resort: search page HTML for loom URLs
502
- const pageHtml = document.documentElement.outerHTML;
503
- const loomMatch = /loom\.com\/(share|embed)\/([a-f0-9]{32})/.exec(pageHtml);
504
- if (loomMatch?.[2]) {
505
- return { url: `https://www.loom.com/embed/${loomMatch[2]}`, type: "loom" as const };
506
- }
507
-
508
- // Try to find explicit Loom video IDs in script tags
509
- const scripts = Array.from(document.querySelectorAll("script"));
510
- for (const script of scripts) {
511
- const content = script.textContent ?? "";
512
-
513
- // Look for explicit loom URL patterns in scripts
514
- const loomUrlMatch = /loom\.com\/(share|embed)\/([a-f0-9]{32})/.exec(content);
515
- if (loomUrlMatch?.[2]) {
516
- return { url: `https://www.loom.com/embed/${loomUrlMatch[2]}`, type: "loom" as const };
517
- }
518
-
519
- // Look for loom video ID near "loom" keyword
520
- const loomContextMatch = /["']loom["'][^}]*["']([a-f0-9]{32})["']/i.exec(content);
521
- if (loomContextMatch?.[1]) {
522
- return { url: `https://www.loom.com/embed/${loomContextMatch[1]}`, type: "loom" as const };
523
- }
524
- }
525
-
526
- return { url: null, type: null };
527
- });
528
-
529
- return videoInfo;
530
- }
531
-
532
- /**
533
- * Extracts the text content from the lesson page.
534
- */
535
- export async function extractTextContent(page: Page): Promise<{ html: string; markdown: string }> {
536
- const html = await page.evaluate(() => {
537
- // Skool lesson content is typically in a styled div below the video
538
- // Look for common content container patterns
539
- const contentSelectors = [
540
- '[class*="LessonContent"]',
541
- '[class*="PostContent"]',
542
- '[class*="ContentWrapper"]',
543
- '[class*="content-body"]',
544
- "article",
545
- '[class*="prose"]',
546
- ];
547
-
548
- for (const selector of contentSelectors) {
549
- const element = document.querySelector(selector);
550
- if (element?.textContent && element.textContent.trim().length > 50) {
551
- // Clone to avoid modifying the actual page
552
- const clone = element.cloneNode(true) as HTMLElement;
553
-
554
- // Remove unwanted elements
555
- const unwanted = clone.querySelectorAll(
556
- "script, style, nav, [class*='video'], [class*='Video'], iframe, [class*='player'], [class*='Player']"
557
- );
558
- unwanted.forEach((el) => {
559
- el.remove();
560
- });
561
-
562
- return clone.innerHTML;
563
- }
564
- }
565
-
566
- // Fallback: Try to find the main content area by structure
567
- // Skool typically has: Header -> Video -> Content
568
- const mainContent = document.querySelector("main, [class*='Main']");
569
- if (mainContent) {
570
- const clone = mainContent.cloneNode(true) as HTMLElement;
571
-
572
- // Remove video player and navigation
573
- const unwanted = clone.querySelectorAll(
574
- "script, style, nav, header, [class*='video'], [class*='Video'], iframe, [class*='Sidebar'], [class*='sidebar']"
575
- );
576
- unwanted.forEach((el) => {
577
- el.remove();
578
- });
579
-
580
- // Get remaining text content
581
- const textContent = clone.innerHTML;
582
- if (textContent.trim().length > 100) {
583
- return textContent;
584
- }
585
- }
586
-
587
- return "";
588
- });
589
-
590
- const markdown = html ? turndown.turndown(html) : "";
591
-
592
- return { html, markdown };
593
- }
594
-
595
- /**
596
- * File extensions we want to download.
597
- */
598
- const DOWNLOADABLE_EXTENSIONS = [
599
- "pdf",
600
- "doc",
601
- "docx",
602
- "xls",
603
- "xlsx",
604
- "ppt",
605
- "pptx",
606
- "zip",
607
- "rar",
608
- "7z",
609
- "txt",
610
- "csv",
611
- "epub",
612
- "mobi",
613
- ] as const;
614
- /* v8 ignore stop */
615
-
616
- /**
617
- * Gets the file type from extension.
618
- */
619
- export function getFileType(ext: string): DownloadableFile["type"] {
620
- const lowerExt = ext.toLowerCase();
621
- if (lowerExt === "pdf") return "pdf";
622
- if (lowerExt === "doc") return "doc";
623
- if (lowerExt === "docx") return "docx";
624
- if (lowerExt === "xls") return "xls";
625
- if (lowerExt === "xlsx") return "xlsx";
626
- if (lowerExt === "ppt") return "ppt";
627
- if (lowerExt === "pptx") return "pptx";
628
- if (["zip", "rar", "7z"].includes(lowerExt)) return "zip";
629
- return "other";
630
- }
631
-
632
- /* v8 ignore start */
633
- /**
634
- * Extracts downloadable file links from the page content.
635
- */
636
- export async function extractDownloadableFiles(page: Page): Promise<DownloadableFile[]> {
637
- const files = await page.evaluate(
638
- (extensions) => {
639
- const results: { url: string; filename: string; ext: string }[] = [];
640
- const seen = new Set<string>();
641
-
642
- // Find all links in the page
643
- const links = document.querySelectorAll("a[href]");
644
-
645
- for (const link of Array.from(links)) {
646
- const href = (link as HTMLAnchorElement).href;
647
- if (!href || seen.has(href)) continue;
648
-
649
- // Check if the URL ends with a downloadable extension
650
- const urlWithoutQuery = href.split("?")[0] ?? href;
651
- const ext = urlWithoutQuery.split(".").pop()?.toLowerCase() ?? "";
652
-
653
- if (extensions.includes(ext)) {
654
- seen.add(href);
655
-
656
- // Try to get filename from link text, download attribute, or URL
657
- let filename = (link as HTMLAnchorElement).download;
658
- if (!filename) {
659
- filename = link.textContent?.trim() ?? "";
660
- }
661
- if (!filename || filename.length > 100) {
662
- // Extract from URL
663
- const urlParts = urlWithoutQuery.split("/");
664
- filename = urlParts[urlParts.length - 1] ?? `file.${ext}`;
665
- }
666
-
667
- // Ensure filename has the correct extension
668
- if (!filename.toLowerCase().endsWith(`.${ext}`)) {
669
- filename = `${filename}.${ext}`;
670
- }
671
-
672
- // Sanitize filename
673
- filename = filename.replace(/[<>:"/\\|?*]/g, "_").trim();
674
-
675
- results.push({ url: href, filename, ext });
676
- }
677
- }
678
-
679
- return results;
680
- },
681
- DOWNLOADABLE_EXTENSIONS as unknown as string[]
682
- );
683
-
684
- return files.map((f) => ({
685
- url: f.url,
686
- filename: f.filename,
687
- type: getFileType(f.ext),
688
- }));
689
- }
690
-
691
- /**
692
- * Extracts all content from a lesson page.
693
- */
694
- export async function extractLessonContent(page: Page, lessonUrl: string): Promise<LessonContent> {
695
- const currentUrl = page.url();
696
-
697
- if (currentUrl !== lessonUrl) {
698
- await page.goto(lessonUrl, { timeout: 30000 });
699
- await page.waitForLoadState("domcontentloaded");
700
- await page.waitForTimeout(2000);
701
- }
702
-
703
- const title = await page.title();
704
- const { url: videoUrl, type: videoType } = await extractVideoUrl(page);
705
- const { html: htmlContent, markdown: markdownContent } = await extractTextContent(page);
706
- const downloadableFiles = await extractDownloadableFiles(page);
707
-
708
- // Clean up title: "1. Lesson Name - Module Name · Course Name" -> "1. Lesson Name"
709
- const cleanTitle = title.split(" - ")[0]?.trim() ?? title;
710
-
711
- return {
712
- title: cleanTitle,
713
- videoUrl,
714
- videoType,
715
- htmlContent,
716
- markdownContent,
717
- downloadableFiles,
718
- };
719
- }
720
- /* v8 ignore stop */
721
-
722
- /**
723
- * Extracts the Loom video ID from an embed URL.
724
- */
725
- export function extractLoomVideoId(embedUrl: string): string | null {
726
- const match = /loom\.com\/embed\/([a-f0-9]+)/.exec(embedUrl);
727
- return match?.[1] ?? null;
728
- }
729
-
730
- /**
731
- * Cleans and formats the markdown content.
732
- */
733
- export function formatMarkdown(
734
- title: string,
735
- content: string,
736
- videoUrl: string | null,
737
- videoType: string | null
738
- ): string {
739
- const lines = [`# ${title}`, ""];
740
-
741
- if (videoUrl) {
742
- const videoLabel = videoType
743
- ? `${videoType.charAt(0).toUpperCase()}${videoType.slice(1)}`
744
- : "Video";
745
- lines.push(`> 📺 ${videoLabel}: ${videoUrl}`, "");
746
- }
747
-
748
- if (content.trim()) {
749
- lines.push(content);
750
- }
751
-
752
- // Clean up excessive newlines
753
- return lines
754
- .join("\n")
755
- .replace(/\n{3,}/g, "\n\n")
756
- .trim();
757
- }