@nshipster/sosumi 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +19 -0
- package/README.md +304 -0
- package/bin/sosumi.mjs +76 -0
- package/package.json +53 -0
- package/public/_headers +2 -0
- package/public/favicon.ico +0 -0
- package/public/favicon.svg +7 -0
- package/public/icons/square.and.pencil.svg +15 -0
- package/public/index.html +898 -0
- package/public/llms.txt +184 -0
- package/public/sosumi.m4a +0 -0
- package/src/cli.ts +214 -0
- package/src/index.ts +507 -0
- package/src/lib/cli-endpoints.ts +106 -0
- package/src/lib/external/fetch.ts +133 -0
- package/src/lib/external/index.ts +8 -0
- package/src/lib/external/policy.ts +308 -0
- package/src/lib/external/types.ts +10 -0
- package/src/lib/fetch.ts +43 -0
- package/src/lib/hig/fetch.ts +186 -0
- package/src/lib/hig/index.ts +9 -0
- package/src/lib/hig/render.ts +514 -0
- package/src/lib/hig/types.ts +206 -0
- package/src/lib/hig/util.ts +30 -0
- package/src/lib/mcp.ts +315 -0
- package/src/lib/reference/fetch.ts +53 -0
- package/src/lib/reference/index.ts +8 -0
- package/src/lib/reference/render.ts +739 -0
- package/src/lib/reference/types.ts +31 -0
- package/src/lib/search.ts +221 -0
- package/src/lib/types.ts +334 -0
- package/src/lib/url.ts +55 -0
- package/src/lib/video/index.ts +179 -0
- package/wrangler.jsonc +27 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { getRandomUserAgent } from "../fetch"
|
|
2
|
+
|
|
3
|
+
export class TranscriptNotFoundError extends Error {}
|
|
4
|
+
|
|
5
|
+
const APPLE_VIDEO_SUFFIX = " - Videos - Apple Developer"
|
|
6
|
+
|
|
7
|
+
interface TranscriptLine {
|
|
8
|
+
startSeconds: number
|
|
9
|
+
text: string
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function fetchVideoTranscriptMarkdown(
|
|
13
|
+
sourceUrl: string,
|
|
14
|
+
collection: string,
|
|
15
|
+
videoId: string,
|
|
16
|
+
): Promise<string> {
|
|
17
|
+
const html = await fetchVideoTranscriptHtml(sourceUrl)
|
|
18
|
+
const title = extractVideoTitleFromHtml(html) ?? `Video ${videoId}`
|
|
19
|
+
const transcriptLines = extractTranscriptLinesFromHtml(html)
|
|
20
|
+
|
|
21
|
+
if (transcriptLines.length === 0) {
|
|
22
|
+
throw new TranscriptNotFoundError("Transcript not found for this video.")
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return renderVideoTranscriptMarkdown({
|
|
26
|
+
title,
|
|
27
|
+
sourceUrl,
|
|
28
|
+
collection,
|
|
29
|
+
videoId,
|
|
30
|
+
transcriptLines,
|
|
31
|
+
})
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export async function fetchVideoTranscriptHtml(sourceUrl: string): Promise<string> {
|
|
35
|
+
const response = await fetch(sourceUrl, {
|
|
36
|
+
headers: {
|
|
37
|
+
"User-Agent": getRandomUserAgent(),
|
|
38
|
+
Accept: "text/html,application/xhtml+xml",
|
|
39
|
+
"Cache-Control": "no-cache",
|
|
40
|
+
},
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
if (!response.ok) {
|
|
44
|
+
if (response.status === 404) {
|
|
45
|
+
throw new TranscriptNotFoundError("Video not found.")
|
|
46
|
+
}
|
|
47
|
+
throw new Error(`Failed to fetch video page: ${response.status} ${response.statusText}`)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return response.text()
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function extractVideoTitleFromHtml(html: string): string | null {
|
|
54
|
+
const titleMatch = html.match(/<title>([\s\S]*?)<\/title>/i)
|
|
55
|
+
if (!titleMatch) {
|
|
56
|
+
return null
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const title = decodeHtmlEntities(stripHtml(titleMatch[1])).trim()
|
|
60
|
+
if (!title) {
|
|
61
|
+
return null
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (title.endsWith(APPLE_VIDEO_SUFFIX)) {
|
|
65
|
+
return title.slice(0, -APPLE_VIDEO_SUFFIX.length).trim()
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return title
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function extractTranscriptLinesFromHtml(html: string): TranscriptLine[] {
|
|
72
|
+
const transcriptSectionMatch = html.match(
|
|
73
|
+
/<section[^>]*id=["']transcript-content["'][^>]*>([\s\S]*?)<\/section>/i,
|
|
74
|
+
)
|
|
75
|
+
if (!transcriptSectionMatch) {
|
|
76
|
+
return []
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const transcriptSection = transcriptSectionMatch[1]
|
|
80
|
+
const lines: TranscriptLine[] = []
|
|
81
|
+
const spanPattern = /<span[^>]*data-start=["']([\d.]+)["'][^>]*>([\s\S]*?)<\/span>/gi
|
|
82
|
+
|
|
83
|
+
let match = spanPattern.exec(transcriptSection)
|
|
84
|
+
while (match) {
|
|
85
|
+
const startSeconds = Number.parseFloat(match[1])
|
|
86
|
+
const text = decodeHtmlEntities(stripHtml(match[2])).replace(/\s+/g, " ").trim()
|
|
87
|
+
|
|
88
|
+
if (Number.isFinite(startSeconds) && text) {
|
|
89
|
+
lines.push({ startSeconds, text })
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
match = spanPattern.exec(transcriptSection)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return lines
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function renderVideoTranscriptMarkdown({
|
|
99
|
+
title,
|
|
100
|
+
sourceUrl,
|
|
101
|
+
collection,
|
|
102
|
+
videoId,
|
|
103
|
+
transcriptLines,
|
|
104
|
+
}: {
|
|
105
|
+
title: string
|
|
106
|
+
sourceUrl: string
|
|
107
|
+
collection: string
|
|
108
|
+
videoId: string
|
|
109
|
+
transcriptLines: TranscriptLine[]
|
|
110
|
+
}): string {
|
|
111
|
+
const transcriptBody = transcriptLines
|
|
112
|
+
.map((line) => `- [${formatTimestamp(line.startSeconds)}] ${line.text}`)
|
|
113
|
+
.join("\n")
|
|
114
|
+
|
|
115
|
+
return [
|
|
116
|
+
"---",
|
|
117
|
+
`title: ${title}`,
|
|
118
|
+
`source: ${sourceUrl}`,
|
|
119
|
+
`timestamp: ${new Date().toISOString()}`,
|
|
120
|
+
"---",
|
|
121
|
+
"",
|
|
122
|
+
`# ${title}`,
|
|
123
|
+
"",
|
|
124
|
+
`**Collection:** ${collection}`,
|
|
125
|
+
"",
|
|
126
|
+
`**Video:** ${videoId}`,
|
|
127
|
+
"",
|
|
128
|
+
"## Transcript",
|
|
129
|
+
"",
|
|
130
|
+
transcriptBody,
|
|
131
|
+
"",
|
|
132
|
+
"---",
|
|
133
|
+
"",
|
|
134
|
+
"*Extracted by [sosumi.ai](https://sosumi.ai) - Making Apple docs AI-readable.*",
|
|
135
|
+
"*This is unofficial content. All transcripts belong to Apple Inc.*",
|
|
136
|
+
"",
|
|
137
|
+
].join("\n")
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function formatTimestamp(seconds: number): string {
|
|
141
|
+
const rounded = Math.max(0, Math.floor(seconds))
|
|
142
|
+
const minutes = Math.floor(rounded / 60)
|
|
143
|
+
const remainingSeconds = rounded % 60
|
|
144
|
+
return `${String(minutes).padStart(2, "0")}:${String(remainingSeconds).padStart(2, "0")}`
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function stripHtml(input: string): string {
|
|
148
|
+
return input.replace(/<[^>]+>/g, "")
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function decodeHtmlEntities(input: string): string {
|
|
152
|
+
return input.replace(/&(#x?[0-9a-fA-F]+|[a-zA-Z]+);/g, (_, entity: string) => {
|
|
153
|
+
if (entity[0] === "#") {
|
|
154
|
+
const isHex = entity[1]?.toLowerCase() === "x"
|
|
155
|
+
const codePoint = Number.parseInt(entity.slice(isHex ? 2 : 1), isHex ? 16 : 10)
|
|
156
|
+
if (Number.isFinite(codePoint)) {
|
|
157
|
+
return String.fromCodePoint(codePoint)
|
|
158
|
+
}
|
|
159
|
+
return ""
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
switch (entity) {
|
|
163
|
+
case "amp":
|
|
164
|
+
return "&"
|
|
165
|
+
case "lt":
|
|
166
|
+
return "<"
|
|
167
|
+
case "gt":
|
|
168
|
+
return ">"
|
|
169
|
+
case "quot":
|
|
170
|
+
return '"'
|
|
171
|
+
case "apos":
|
|
172
|
+
return "'"
|
|
173
|
+
case "nbsp":
|
|
174
|
+
return " "
|
|
175
|
+
default:
|
|
176
|
+
return `&${entity};`
|
|
177
|
+
}
|
|
178
|
+
})
|
|
179
|
+
}
|
package/wrangler.jsonc
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* For more details on how to configure Wrangler, refer to:
|
|
3
|
+
* https://developers.cloudflare.com/workers/wrangler/configuration/
|
|
4
|
+
*/
|
|
5
|
+
{
|
|
6
|
+
"$schema": "node_modules/wrangler/config-schema.json",
|
|
7
|
+
"name": "sosumi-ai",
|
|
8
|
+
"routes": [
|
|
9
|
+
{
|
|
10
|
+
"pattern": "sosumi.ai",
|
|
11
|
+
"custom_domain": true
|
|
12
|
+
}
|
|
13
|
+
],
|
|
14
|
+
"main": "src/index.ts",
|
|
15
|
+
"compatibility_date": "2025-08-23",
|
|
16
|
+
"compatibility_flags": ["nodejs_compat"],
|
|
17
|
+
"assets": {
|
|
18
|
+
"binding": "ASSETS",
|
|
19
|
+
"directory": "./public"
|
|
20
|
+
},
|
|
21
|
+
"observability": {
|
|
22
|
+
"enabled": true
|
|
23
|
+
},
|
|
24
|
+
"vars": {
|
|
25
|
+
"NODE_ENV": "production"
|
|
26
|
+
}
|
|
27
|
+
}
|