@nshipster/sosumi 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +19 -0
- package/README.md +304 -0
- package/bin/sosumi.mjs +76 -0
- package/package.json +53 -0
- package/public/_headers +2 -0
- package/public/favicon.ico +0 -0
- package/public/favicon.svg +7 -0
- package/public/icons/square.and.pencil.svg +15 -0
- package/public/index.html +898 -0
- package/public/llms.txt +184 -0
- package/public/sosumi.m4a +0 -0
- package/src/cli.ts +214 -0
- package/src/index.ts +507 -0
- package/src/lib/cli-endpoints.ts +106 -0
- package/src/lib/external/fetch.ts +133 -0
- package/src/lib/external/index.ts +8 -0
- package/src/lib/external/policy.ts +308 -0
- package/src/lib/external/types.ts +10 -0
- package/src/lib/fetch.ts +43 -0
- package/src/lib/hig/fetch.ts +186 -0
- package/src/lib/hig/index.ts +9 -0
- package/src/lib/hig/render.ts +514 -0
- package/src/lib/hig/types.ts +206 -0
- package/src/lib/hig/util.ts +30 -0
- package/src/lib/mcp.ts +315 -0
- package/src/lib/reference/fetch.ts +53 -0
- package/src/lib/reference/index.ts +8 -0
- package/src/lib/reference/render.ts +739 -0
- package/src/lib/reference/types.ts +31 -0
- package/src/lib/search.ts +221 -0
- package/src/lib/types.ts +334 -0
- package/src/lib/url.ts +55 -0
- package/src/lib/video/index.ts +179 -0
- package/wrangler.jsonc +27 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { renderFromJSON } from "../reference"
|
|
2
|
+
import type { AppleDocJSON } from "../types"
|
|
3
|
+
import {
|
|
4
|
+
assertExternalDocumentationAccess,
|
|
5
|
+
ExternalAccessError,
|
|
6
|
+
validateExternalDocumentationUrl,
|
|
7
|
+
} from "./policy"
|
|
8
|
+
import type { ExternalPolicyEnv, RobotsPolicyResult } from "./types"
|
|
9
|
+
|
|
10
|
+
const RESTRICTIVE_X_ROBOTS_TAGS = ["none", "noindex", "noai", "noimageai"] as const
|
|
11
|
+
|
|
12
|
+
export function extractExternalDocumentationBasePath(sourceUrl: URL): string {
|
|
13
|
+
const normalizedPath = sourceUrl.pathname.replace(/\/+$/, "")
|
|
14
|
+
const match = normalizedPath.match(/^(.*?)(\/documentation(?:\/.*)?)$/)
|
|
15
|
+
if (!match) {
|
|
16
|
+
throw new ExternalAccessError(
|
|
17
|
+
"External URL must point to a Swift-DocC documentation path.",
|
|
18
|
+
400,
|
|
19
|
+
)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
return match[1]
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function buildExternalDocCJsonUrl(sourceUrl: URL): URL {
|
|
26
|
+
const hostBasePath = extractExternalDocumentationBasePath(sourceUrl)
|
|
27
|
+
const documentationPath = sourceUrl.pathname.replace(/\/+$/, "").slice(hostBasePath.length)
|
|
28
|
+
const jsonPath = documentationPath.endsWith(".json")
|
|
29
|
+
? documentationPath
|
|
30
|
+
: `${documentationPath}.json`
|
|
31
|
+
return new URL(`${hostBasePath}/data${jsonPath}`, sourceUrl.origin)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export async function fetchExternalDocCJSON(
|
|
35
|
+
sourceUrl: URL,
|
|
36
|
+
externalPolicyEnv: ExternalPolicyEnv = {},
|
|
37
|
+
): Promise<AppleDocJSON> {
|
|
38
|
+
const validatedUrl = validateExternalDocumentationUrl(sourceUrl.toString())
|
|
39
|
+
await assertExternalDocumentationAccess(validatedUrl, externalPolicyEnv)
|
|
40
|
+
const jsonUrl = buildExternalDocCJsonUrl(validatedUrl)
|
|
41
|
+
const response = await fetch(jsonUrl.toString(), {
|
|
42
|
+
headers: {
|
|
43
|
+
"User-Agent": EXTERNAL_DOC_USER_AGENT,
|
|
44
|
+
Accept: "application/json",
|
|
45
|
+
},
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
const xRobotsTag = response.headers.get("x-robots-tag")
|
|
49
|
+
if (containsRestrictiveXRobotsTag(xRobotsTag)) {
|
|
50
|
+
throw new ExternalAccessError(
|
|
51
|
+
"External host denied AI/doc access via X-Robots-Tag response header.",
|
|
52
|
+
403,
|
|
53
|
+
)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (!response.ok) {
|
|
57
|
+
if (response.status === 404) {
|
|
58
|
+
throw new ExternalAccessError(
|
|
59
|
+
`External documentation page not found at ${jsonUrl.toString()}`,
|
|
60
|
+
404,
|
|
61
|
+
)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
throw new Error(`Failed to fetch external DocC JSON: ${response.status} ${response.statusText}`)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return (await response.json()) as AppleDocJSON
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export async function fetchExternalDocumentationMarkdown(
|
|
71
|
+
url: string,
|
|
72
|
+
externalPolicyEnv: ExternalPolicyEnv = {},
|
|
73
|
+
): Promise<string> {
|
|
74
|
+
const targetUrl = validateExternalDocumentationUrl(url)
|
|
75
|
+
const jsonData = await fetchExternalDocCJSON(targetUrl, externalPolicyEnv)
|
|
76
|
+
const externalBasePath = extractExternalDocumentationBasePath(targetUrl)
|
|
77
|
+
return renderFromJSON(jsonData, targetUrl.toString(), {
|
|
78
|
+
externalOrigin: `${targetUrl.origin}${externalBasePath}`,
|
|
79
|
+
})
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export async function fetchRobotsPolicy(
|
|
83
|
+
origin: string,
|
|
84
|
+
userAgent: string,
|
|
85
|
+
): Promise<RobotsPolicyResult> {
|
|
86
|
+
const robotsUrl = new URL("/robots.txt", origin)
|
|
87
|
+
const response = await fetch(robotsUrl.toString(), {
|
|
88
|
+
headers: {
|
|
89
|
+
"User-Agent": userAgent,
|
|
90
|
+
Accept: "text/plain, text/*;q=0.9, */*;q=0.1",
|
|
91
|
+
},
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
// Missing or inaccessible robots.txt — caller may try root domain or allow.
|
|
95
|
+
if (response.status === 404 || response.status === 410 || response.status === 403) {
|
|
96
|
+
return { kind: "not-found" }
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Explicit access denial when robots cannot be read due to auth.
|
|
100
|
+
if (response.status === 401) {
|
|
101
|
+
return { kind: "deny-all" }
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Fail open for transient server/network issues.
|
|
105
|
+
if (!response.ok) {
|
|
106
|
+
return { kind: "allow-all" }
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const robotsText = await response.text()
|
|
110
|
+
return { kind: "rules", robotsText }
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function containsRestrictiveXRobotsTag(headerValue: string | null): boolean {
|
|
114
|
+
if (!headerValue) {
|
|
115
|
+
return false
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const tokenSet = new Set(
|
|
119
|
+
headerValue
|
|
120
|
+
.toLowerCase()
|
|
121
|
+
.split(",")
|
|
122
|
+
.map((token) => token.trim())
|
|
123
|
+
.filter(Boolean),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
for (const token of RESTRICTIVE_X_ROBOTS_TAGS) {
|
|
127
|
+
if (tokenSet.has(token)) {
|
|
128
|
+
return true
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
return false
|
|
132
|
+
}
|
|
133
|
+
export const EXTERNAL_DOC_USER_AGENT = "sosumi-ai/1.0 (+https://sosumi.ai/#bot)"
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import robotsParser from "robots-parser"
|
|
2
|
+
|
|
3
|
+
import { EXTERNAL_DOC_USER_AGENT, fetchRobotsPolicy } from "./fetch"
|
|
4
|
+
import type { ExternalPolicyEnv, RobotsPolicyResult } from "./types"
|
|
5
|
+
|
|
6
|
+
const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "::1"])
|
|
7
|
+
const EXTERNAL_PATH_PREFIX = "/external/"
|
|
8
|
+
const ROBOTS_CACHE_TTL_MS = 5 * 60 * 1000
|
|
9
|
+
const ROBOTS_CACHE_MAX_ENTRIES = 1000
|
|
10
|
+
const ROBOTS_INFLIGHT_MAX_ENTRIES = 1000
|
|
11
|
+
const robotsPolicyCache = new Map<string, { expiresAt: number; policy: RobotsPolicyResult }>()
|
|
12
|
+
const robotsPolicyInFlight = new Map<string, Promise<RobotsPolicyResult>>()
|
|
13
|
+
|
|
14
|
+
export class ExternalAccessError extends Error {
|
|
15
|
+
status: number
|
|
16
|
+
|
|
17
|
+
constructor(message: string, status: number = 403) {
|
|
18
|
+
super(message)
|
|
19
|
+
this.name = "ExternalAccessError"
|
|
20
|
+
this.status = status
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function validateExternalDocumentationUrl(rawUrl: string): URL {
|
|
25
|
+
if (!rawUrl || hasControlOrWhitespace(rawUrl)) {
|
|
26
|
+
throw new ExternalAccessError("Invalid external URL.", 400)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
let parsedUrl: URL
|
|
30
|
+
|
|
31
|
+
try {
|
|
32
|
+
parsedUrl = new URL(rawUrl)
|
|
33
|
+
} catch {
|
|
34
|
+
throw new ExternalAccessError("Invalid external URL.", 400)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (parsedUrl.protocol !== "https:") {
|
|
38
|
+
throw new ExternalAccessError("Only https:// external URLs are supported.", 400)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (parsedUrl.username || parsedUrl.password) {
|
|
42
|
+
throw new ExternalAccessError("Credentialed URLs are not supported.", 400)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (parsedUrl.hash) {
|
|
46
|
+
throw new ExternalAccessError("URL fragments are not supported.", 400)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return parsedUrl
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export function decodeExternalTargetPath(path: string): string {
|
|
53
|
+
if (!path.startsWith(EXTERNAL_PATH_PREFIX)) {
|
|
54
|
+
throw new ExternalAccessError("Invalid external URL.", 400)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const encodedTarget = path.slice(EXTERNAL_PATH_PREFIX.length)
|
|
58
|
+
if (!encodedTarget) {
|
|
59
|
+
throw new ExternalAccessError("Invalid external URL.", 400)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const decodedTarget = decodeURIComponent(encodedTarget)
|
|
64
|
+
if (!decodedTarget || hasControlOrWhitespace(decodedTarget)) {
|
|
65
|
+
throw new ExternalAccessError("Invalid external URL.", 400)
|
|
66
|
+
}
|
|
67
|
+
return decodedTarget
|
|
68
|
+
} catch {
|
|
69
|
+
throw new ExternalAccessError("Invalid external URL.", 400)
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export async function assertExternalDocumentationAccess(
|
|
74
|
+
targetUrl: URL,
|
|
75
|
+
env: ExternalPolicyEnv,
|
|
76
|
+
): Promise<void> {
|
|
77
|
+
assertHostPolicy(targetUrl, env)
|
|
78
|
+
const robotsAllowed = await isAllowedByRobotsTxt(targetUrl)
|
|
79
|
+
if (!robotsAllowed) {
|
|
80
|
+
throw new ExternalAccessError("External host denied access for this path via robots.txt.", 403)
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function assertHostPolicy(targetUrl: URL, env: ExternalPolicyEnv): void {
|
|
85
|
+
const hostname = targetUrl.hostname.toLowerCase()
|
|
86
|
+
const allowlist = parseHostList(env.EXTERNAL_DOC_HOST_ALLOWLIST)
|
|
87
|
+
const blocklist = parseHostList(env.EXTERNAL_DOC_HOST_BLOCKLIST)
|
|
88
|
+
const explicitlyAllowlisted = isHostListed(hostname, allowlist)
|
|
89
|
+
|
|
90
|
+
if (isHostListed(hostname, blocklist)) {
|
|
91
|
+
throw new ExternalAccessError("External host is blocked by configuration.", 403)
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (allowlist.size > 0 && !explicitlyAllowlisted) {
|
|
95
|
+
throw new ExternalAccessError("External host is not allowlisted.", 403)
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (isLocalOrPrivateHost(hostname) && !explicitlyAllowlisted) {
|
|
99
|
+
// This blocks obvious local/private hostnames, but DNS rebinding on public hostnames
|
|
100
|
+
// still requires explicit allowlists for strict SSRF protection in runtimes without DNS resolution APIs.
|
|
101
|
+
throw new ExternalAccessError(
|
|
102
|
+
"External URL points to a local or private host and is not allowlisted.",
|
|
103
|
+
403,
|
|
104
|
+
)
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async function isAllowedByRobotsTxt(targetUrl: URL): Promise<boolean> {
|
|
109
|
+
const policy = await getRobotsPolicy(targetUrl.origin)
|
|
110
|
+
if (policy.kind === "allow-all") {
|
|
111
|
+
return true
|
|
112
|
+
}
|
|
113
|
+
if (policy.kind === "deny-all") {
|
|
114
|
+
return false
|
|
115
|
+
}
|
|
116
|
+
if (policy.kind === "rules") {
|
|
117
|
+
return evaluateRobotsPolicy(policy.robotsText, targetUrl, EXTERNAL_DOC_USER_AGENT)
|
|
118
|
+
}
|
|
119
|
+
return true
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function evaluateRobotsPolicy(robotsText: string, targetUrl: URL, userAgent: string): boolean {
|
|
123
|
+
const robots = robotsParser(new URL("/robots.txt", targetUrl.origin).toString(), robotsText)
|
|
124
|
+
const isAllowed = robots.isAllowed(targetUrl.toString(), userAgent)
|
|
125
|
+
return isAllowed !== false
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function parseHostList(rawList: string | undefined): Set<string> {
|
|
129
|
+
if (!rawList) {
|
|
130
|
+
return new Set()
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return new Set(
|
|
134
|
+
rawList
|
|
135
|
+
.split(/\r?\n|,/)
|
|
136
|
+
.map((value) => value.trim().toLowerCase())
|
|
137
|
+
.filter(Boolean),
|
|
138
|
+
)
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function getRootOrigin(origin: string): string | null {
|
|
142
|
+
try {
|
|
143
|
+
const url = new URL(origin)
|
|
144
|
+
const labels = url.hostname.toLowerCase().split(".")
|
|
145
|
+
if (labels.length < 3) {
|
|
146
|
+
return null
|
|
147
|
+
}
|
|
148
|
+
const rootHost = labels.slice(-2).join(".")
|
|
149
|
+
return `${url.protocol}//${rootHost}`
|
|
150
|
+
} catch {
|
|
151
|
+
return null
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
async function getRobotsPolicy(origin: string): Promise<RobotsPolicyResult> {
|
|
156
|
+
const now = Date.now()
|
|
157
|
+
pruneExpiredRobotsPolicyEntries(now)
|
|
158
|
+
|
|
159
|
+
const cached = robotsPolicyCache.get(origin)
|
|
160
|
+
if (cached && cached.expiresAt > now) {
|
|
161
|
+
return cached.policy
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const inFlight = robotsPolicyInFlight.get(origin)
|
|
165
|
+
if (inFlight) {
|
|
166
|
+
return inFlight
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const request = (async (): Promise<RobotsPolicyResult> => {
|
|
170
|
+
let policy = await fetchRobotsPolicy(origin, EXTERNAL_DOC_USER_AGENT)
|
|
171
|
+
if (policy.kind === "not-found") {
|
|
172
|
+
const rootOrigin = getRootOrigin(origin)
|
|
173
|
+
if (rootOrigin && rootOrigin !== origin) {
|
|
174
|
+
const rootPolicy = await fetchRobotsPolicy(rootOrigin, EXTERNAL_DOC_USER_AGENT)
|
|
175
|
+
if (rootPolicy.kind !== "not-found") {
|
|
176
|
+
policy = rootPolicy
|
|
177
|
+
} else {
|
|
178
|
+
policy = { kind: "allow-all" }
|
|
179
|
+
}
|
|
180
|
+
} else {
|
|
181
|
+
policy = { kind: "allow-all" }
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return policy
|
|
185
|
+
})()
|
|
186
|
+
.then((policy) => {
|
|
187
|
+
robotsPolicyCache.set(origin, {
|
|
188
|
+
expiresAt: Date.now() + ROBOTS_CACHE_TTL_MS,
|
|
189
|
+
policy,
|
|
190
|
+
})
|
|
191
|
+
enforceMaxMapEntries(robotsPolicyCache, ROBOTS_CACHE_MAX_ENTRIES)
|
|
192
|
+
return policy
|
|
193
|
+
})
|
|
194
|
+
.finally(() => {
|
|
195
|
+
robotsPolicyInFlight.delete(origin)
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
enforceMaxMapEntries(robotsPolicyInFlight, ROBOTS_INFLIGHT_MAX_ENTRIES, origin)
|
|
199
|
+
robotsPolicyInFlight.set(origin, request)
|
|
200
|
+
return request
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function isHostListed(hostname: string, list: Set<string>): boolean {
|
|
204
|
+
if (list.has(hostname)) {
|
|
205
|
+
return true
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
for (const candidate of list) {
|
|
209
|
+
if (candidate.startsWith(".")) {
|
|
210
|
+
if (hostname.endsWith(candidate)) {
|
|
211
|
+
return true
|
|
212
|
+
}
|
|
213
|
+
continue
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if (hostname === candidate || hostname.endsWith(`.${candidate}`)) {
|
|
217
|
+
return true
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return false
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function pruneExpiredRobotsPolicyEntries(now: number): void {
|
|
225
|
+
for (const [origin, entry] of robotsPolicyCache.entries()) {
|
|
226
|
+
if (entry.expiresAt <= now) {
|
|
227
|
+
robotsPolicyCache.delete(origin)
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function enforceMaxMapEntries<K, V>(map: Map<K, V>, maxEntries: number, incomingKey?: K): void {
|
|
233
|
+
while (
|
|
234
|
+
map.size > maxEntries ||
|
|
235
|
+
(incomingKey !== undefined && map.size >= maxEntries && !map.has(incomingKey))
|
|
236
|
+
) {
|
|
237
|
+
const oldestKey = map.keys().next().value
|
|
238
|
+
if (oldestKey === undefined) {
|
|
239
|
+
break
|
|
240
|
+
}
|
|
241
|
+
map.delete(oldestKey)
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function isLocalOrPrivateHost(hostname: string): boolean {
|
|
246
|
+
if (LOCAL_HOSTNAMES.has(hostname)) {
|
|
247
|
+
return true
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (hostname.endsWith(".local")) {
|
|
251
|
+
return true
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (isPrivateIPv4(hostname)) {
|
|
255
|
+
return true
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (isPrivateIPv6(hostname)) {
|
|
259
|
+
return true
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return false
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
function isPrivateIPv4(hostname: string): boolean {
|
|
266
|
+
const octets = hostname.split(".")
|
|
267
|
+
if (octets.length !== 4 || octets.some((octet) => !/^\d{1,3}$/.test(octet))) {
|
|
268
|
+
return false
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const octetNumbers = octets.map((octet) => Number.parseInt(octet, 10))
|
|
272
|
+
if (octetNumbers.some((value) => value > 255)) {
|
|
273
|
+
return false
|
|
274
|
+
}
|
|
275
|
+
const [a, b] = octetNumbers
|
|
276
|
+
|
|
277
|
+
return (
|
|
278
|
+
a === 10 ||
|
|
279
|
+
a === 127 ||
|
|
280
|
+
a === 0 ||
|
|
281
|
+
(a === 169 && b === 254) ||
|
|
282
|
+
(a === 172 && b >= 16 && b <= 31) ||
|
|
283
|
+
(a === 192 && b === 168)
|
|
284
|
+
)
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function isPrivateIPv6(hostname: string): boolean {
|
|
288
|
+
const normalized = hostname.toLowerCase().replace(/^\[|\]$/g, "")
|
|
289
|
+
return (
|
|
290
|
+
normalized === "::1" ||
|
|
291
|
+
normalized.startsWith("fc") ||
|
|
292
|
+
normalized.startsWith("fd") ||
|
|
293
|
+
normalized.startsWith("fe8") ||
|
|
294
|
+
normalized.startsWith("fe9") ||
|
|
295
|
+
normalized.startsWith("fea") ||
|
|
296
|
+
normalized.startsWith("feb")
|
|
297
|
+
)
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
function hasControlOrWhitespace(value: string): boolean {
|
|
301
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
302
|
+
const code = value.charCodeAt(index)
|
|
303
|
+
if (code <= 0x20 || code === 0x7f) {
|
|
304
|
+
return true
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
return false
|
|
308
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export type RobotsPolicyResult =
|
|
2
|
+
| { kind: "allow-all" }
|
|
3
|
+
| { kind: "deny-all" }
|
|
4
|
+
| { kind: "not-found" }
|
|
5
|
+
| { kind: "rules"; robotsText: string }
|
|
6
|
+
|
|
7
|
+
export interface ExternalPolicyEnv {
|
|
8
|
+
EXTERNAL_DOC_HOST_ALLOWLIST?: string
|
|
9
|
+
EXTERNAL_DOC_HOST_BLOCKLIST?: string
|
|
10
|
+
}
|
package/src/lib/fetch.ts
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared fetching utilities for Apple Developer documentation
|
|
3
|
+
* Contains common utilities used by both HIG and reference documentation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export class NotFoundError extends Error {}
|
|
7
|
+
|
|
8
|
+
const USER_AGENTS = [
|
|
9
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.2.20",
|
|
10
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15",
|
|
11
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
|
|
12
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
|
|
13
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15",
|
|
14
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.7.24",
|
|
15
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.2 Safari/605.1.15 Reeder/5.4",
|
|
16
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.1.1",
|
|
17
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15",
|
|
18
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
|
|
19
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_3_9; en) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.6 Safari/605.1.15",
|
|
20
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.7.24",
|
|
21
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.4.24",
|
|
22
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.7.23",
|
|
23
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15",
|
|
24
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A",
|
|
25
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15",
|
|
26
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15",
|
|
27
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_16) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/17618.1.15.111.8",
|
|
28
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
|
|
29
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.6.24",
|
|
30
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/618.2.7 (KHTML, like Gecko) Version/17.5 Safari/618.2.7",
|
|
31
|
+
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3 like Mac OS X; de-de) AppleWebKit/533.17.9 (KHTML, like Gecko) Mobile/8F190",
|
|
32
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1",
|
|
33
|
+
"Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4",
|
|
34
|
+
"Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_3 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/10.1 Mobile/15A432 Safari/602.1",
|
|
35
|
+
] as const
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Get a random Safari user agent
|
|
39
|
+
*/
|
|
40
|
+
export function getRandomUserAgent(): string {
|
|
41
|
+
const randomIndex = Math.floor(Math.random() * USER_AGENTS.length)
|
|
42
|
+
return USER_AGENTS[randomIndex]
|
|
43
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Human Interface Guidelines (HIG) fetching functionality
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { getRandomUserAgent, NotFoundError } from "../fetch"
|
|
6
|
+
import type { HIGPageJSON, HIGTableOfContents } from "./types"
|
|
7
|
+
|
|
8
|
+
// ============================================================================
|
|
9
|
+
// CONSTANTS
|
|
10
|
+
// ============================================================================
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Base URL for HIG JSON API
|
|
14
|
+
*/
|
|
15
|
+
const HIG_BASE_URL = "https://developer.apple.com/tutorials/data"
|
|
16
|
+
|
|
17
|
+
// ============================================================================
|
|
18
|
+
// FETCHING FUNCTIONS
|
|
19
|
+
// ============================================================================
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Fetch the complete HIG table of contents
|
|
23
|
+
*/
|
|
24
|
+
export async function fetchHIGTableOfContents(): Promise<HIGTableOfContents> {
|
|
25
|
+
const tocUrl = `${HIG_BASE_URL}/index/design--human-interface-guidelines`
|
|
26
|
+
|
|
27
|
+
const userAgent = getRandomUserAgent()
|
|
28
|
+
|
|
29
|
+
const response = await fetch(tocUrl, {
|
|
30
|
+
headers: {
|
|
31
|
+
"User-Agent": userAgent,
|
|
32
|
+
Accept: "application/json",
|
|
33
|
+
"Cache-Control": "no-cache",
|
|
34
|
+
},
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
if (!response.ok) {
|
|
38
|
+
console.error(`Failed to fetch HIG ToC: ${response.status} ${response.statusText}`)
|
|
39
|
+
if (response.status === 404) {
|
|
40
|
+
throw new NotFoundError(`HIG table of contents not found at ${tocUrl}`)
|
|
41
|
+
}
|
|
42
|
+
throw new Error(`Failed to fetch HIG ToC: ${response.status} ${response.statusText}`)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const data = (await response.json()) as HIGTableOfContents
|
|
46
|
+
return data
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Fetch HIG page content by path
|
|
51
|
+
*
|
|
52
|
+
* @param path - The HIG path (e.g., "getting-started", "foundations/color")
|
|
53
|
+
* @returns HIG page JSON data
|
|
54
|
+
*/
|
|
55
|
+
export async function fetchHIGPageData(path: string): Promise<HIGPageJSON> {
|
|
56
|
+
// Normalize the path - remove leading/trailing slashes
|
|
57
|
+
const normalizedPath = path.replace(/^\/+|\/+$/g, "")
|
|
58
|
+
|
|
59
|
+
// Construct the full JSON URL
|
|
60
|
+
const jsonUrl = `${HIG_BASE_URL}/design/human-interface-guidelines/${normalizedPath}.json`
|
|
61
|
+
|
|
62
|
+
const userAgent = getRandomUserAgent()
|
|
63
|
+
|
|
64
|
+
const response = await fetch(jsonUrl, {
|
|
65
|
+
headers: {
|
|
66
|
+
"User-Agent": userAgent,
|
|
67
|
+
Accept: "application/json",
|
|
68
|
+
"Cache-Control": "no-cache",
|
|
69
|
+
},
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
if (!response.ok) {
|
|
73
|
+
console.error(`Failed to fetch HIG page: ${response.status} ${response.statusText}`)
|
|
74
|
+
if (response.status === 404) {
|
|
75
|
+
throw new NotFoundError(`HIG page not found at ${jsonUrl}`)
|
|
76
|
+
}
|
|
77
|
+
throw new Error(`Failed to fetch HIG page: ${response.status} ${response.statusText}`)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const data = (await response.json()) as HIGPageJSON
|
|
81
|
+
return data
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// ============================================================================
|
|
85
|
+
// UTILITY FUNCTIONS
|
|
86
|
+
// ============================================================================
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Extract all available HIG paths from the table of contents
|
|
90
|
+
*
|
|
91
|
+
* @param toc - The HIG table of contents
|
|
92
|
+
* @returns Array of all available paths
|
|
93
|
+
*/
|
|
94
|
+
export function extractHIGPaths(toc: HIGTableOfContents): string[] {
|
|
95
|
+
const paths: string[] = []
|
|
96
|
+
|
|
97
|
+
function extractFromItems(items: typeof toc.interfaceLanguages.swift) {
|
|
98
|
+
for (const item of items) {
|
|
99
|
+
if (item.path) {
|
|
100
|
+
// Remove the leading "/design/human-interface-guidelines/" prefix
|
|
101
|
+
const normalizedPath = item.path.replace(/^\/design\/human-interface-guidelines\//, "")
|
|
102
|
+
if (normalizedPath) {
|
|
103
|
+
paths.push(normalizedPath)
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (item.children) {
|
|
108
|
+
extractFromItems(item.children)
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
extractFromItems(toc.interfaceLanguages.swift)
|
|
114
|
+
return paths
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Find a specific HIG item in the table of contents by path
|
|
119
|
+
*
|
|
120
|
+
* @param toc - The HIG table of contents
|
|
121
|
+
* @param targetPath - The path to search for
|
|
122
|
+
* @returns The HIG item if found, undefined otherwise
|
|
123
|
+
*/
|
|
124
|
+
export function findHIGItemByPath(
|
|
125
|
+
toc: HIGTableOfContents,
|
|
126
|
+
targetPath: string,
|
|
127
|
+
): (typeof toc.interfaceLanguages.swift)[0] | undefined {
|
|
128
|
+
const normalizedTarget = targetPath.replace(/^\/+|\/+$/g, "")
|
|
129
|
+
|
|
130
|
+
function searchInItems(
|
|
131
|
+
items: typeof toc.interfaceLanguages.swift,
|
|
132
|
+
): (typeof items)[0] | undefined {
|
|
133
|
+
for (const item of items) {
|
|
134
|
+
const normalizedItemPath = item.path
|
|
135
|
+
.replace(/^\/design\/human-interface-guidelines\//, "")
|
|
136
|
+
.replace(/^\/+|\/+$/g, "")
|
|
137
|
+
|
|
138
|
+
if (normalizedItemPath === normalizedTarget) {
|
|
139
|
+
return item
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (item.children) {
|
|
143
|
+
const found = searchInItems(item.children)
|
|
144
|
+
if (found) return found
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return undefined
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return searchInItems(toc.interfaceLanguages.swift)
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Get breadcrumb path for a HIG item
|
|
155
|
+
*
|
|
156
|
+
* @param toc - The HIG table of contents
|
|
157
|
+
* @param targetPath - The path to get breadcrumbs for
|
|
158
|
+
* @returns Array of titles representing the breadcrumb path
|
|
159
|
+
*/
|
|
160
|
+
export function getHIGBreadcrumbs(toc: HIGTableOfContents, targetPath: string): string[] {
|
|
161
|
+
const normalizedTarget = targetPath.replace(/^\/+|\/+$/g, "")
|
|
162
|
+
|
|
163
|
+
function findBreadcrumbs(
|
|
164
|
+
items: typeof toc.interfaceLanguages.swift,
|
|
165
|
+
currentPath: string[] = [],
|
|
166
|
+
): string[] | null {
|
|
167
|
+
for (const item of items) {
|
|
168
|
+
const normalizedItemPath = item.path
|
|
169
|
+
.replace(/^\/design\/human-interface-guidelines\//, "")
|
|
170
|
+
.replace(/^\/+|\/+$/g, "")
|
|
171
|
+
const newPath = [...currentPath, item.title]
|
|
172
|
+
|
|
173
|
+
if (normalizedItemPath === normalizedTarget) {
|
|
174
|
+
return newPath
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (item.children) {
|
|
178
|
+
const found = findBreadcrumbs(item.children, newPath)
|
|
179
|
+
if (found) return found
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return null
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return findBreadcrumbs(toc.interfaceLanguages.swift) || []
|
|
186
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Human Interface Guidelines (HIG) functionality
|
|
3
|
+
* Re-exports all HIG-related functions and types
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export * from "./fetch"
|
|
7
|
+
export * from "./render"
|
|
8
|
+
export type * from "./types"
|
|
9
|
+
export { hasChildren, isHIGImageReference, isHIGTopicReference } from "./util"
|