@nshipster/sosumi 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,133 @@
1
+ import { renderFromJSON } from "../reference"
2
+ import type { AppleDocJSON } from "../types"
3
+ import {
4
+ assertExternalDocumentationAccess,
5
+ ExternalAccessError,
6
+ validateExternalDocumentationUrl,
7
+ } from "./policy"
8
+ import type { ExternalPolicyEnv, RobotsPolicyResult } from "./types"
9
+
10
+ const RESTRICTIVE_X_ROBOTS_TAGS = ["none", "noindex", "noai", "noimageai"] as const
11
+
12
+ export function extractExternalDocumentationBasePath(sourceUrl: URL): string {
13
+ const normalizedPath = sourceUrl.pathname.replace(/\/+$/, "")
14
+ const match = normalizedPath.match(/^(.*?)(\/documentation(?:\/.*)?)$/)
15
+ if (!match) {
16
+ throw new ExternalAccessError(
17
+ "External URL must point to a Swift-DocC documentation path.",
18
+ 400,
19
+ )
20
+ }
21
+
22
+ return match[1]
23
+ }
24
+
25
+ export function buildExternalDocCJsonUrl(sourceUrl: URL): URL {
26
+ const hostBasePath = extractExternalDocumentationBasePath(sourceUrl)
27
+ const documentationPath = sourceUrl.pathname.replace(/\/+$/, "").slice(hostBasePath.length)
28
+ const jsonPath = documentationPath.endsWith(".json")
29
+ ? documentationPath
30
+ : `${documentationPath}.json`
31
+ return new URL(`${hostBasePath}/data${jsonPath}`, sourceUrl.origin)
32
+ }
33
+
34
+ export async function fetchExternalDocCJSON(
35
+ sourceUrl: URL,
36
+ externalPolicyEnv: ExternalPolicyEnv = {},
37
+ ): Promise<AppleDocJSON> {
38
+ const validatedUrl = validateExternalDocumentationUrl(sourceUrl.toString())
39
+ await assertExternalDocumentationAccess(validatedUrl, externalPolicyEnv)
40
+ const jsonUrl = buildExternalDocCJsonUrl(validatedUrl)
41
+ const response = await fetch(jsonUrl.toString(), {
42
+ headers: {
43
+ "User-Agent": EXTERNAL_DOC_USER_AGENT,
44
+ Accept: "application/json",
45
+ },
46
+ })
47
+
48
+ const xRobotsTag = response.headers.get("x-robots-tag")
49
+ if (containsRestrictiveXRobotsTag(xRobotsTag)) {
50
+ throw new ExternalAccessError(
51
+ "External host denied AI/doc access via X-Robots-Tag response header.",
52
+ 403,
53
+ )
54
+ }
55
+
56
+ if (!response.ok) {
57
+ if (response.status === 404) {
58
+ throw new ExternalAccessError(
59
+ `External documentation page not found at ${jsonUrl.toString()}`,
60
+ 404,
61
+ )
62
+ }
63
+
64
+ throw new Error(`Failed to fetch external DocC JSON: ${response.status} ${response.statusText}`)
65
+ }
66
+
67
+ return (await response.json()) as AppleDocJSON
68
+ }
69
+
70
+ export async function fetchExternalDocumentationMarkdown(
71
+ url: string,
72
+ externalPolicyEnv: ExternalPolicyEnv = {},
73
+ ): Promise<string> {
74
+ const targetUrl = validateExternalDocumentationUrl(url)
75
+ const jsonData = await fetchExternalDocCJSON(targetUrl, externalPolicyEnv)
76
+ const externalBasePath = extractExternalDocumentationBasePath(targetUrl)
77
+ return renderFromJSON(jsonData, targetUrl.toString(), {
78
+ externalOrigin: `${targetUrl.origin}${externalBasePath}`,
79
+ })
80
+ }
81
+
82
+ export async function fetchRobotsPolicy(
83
+ origin: string,
84
+ userAgent: string,
85
+ ): Promise<RobotsPolicyResult> {
86
+ const robotsUrl = new URL("/robots.txt", origin)
87
+ const response = await fetch(robotsUrl.toString(), {
88
+ headers: {
89
+ "User-Agent": userAgent,
90
+ Accept: "text/plain, text/*;q=0.9, */*;q=0.1",
91
+ },
92
+ })
93
+
94
+ // Missing or inaccessible robots.txt — caller may try root domain or allow.
95
+ if (response.status === 404 || response.status === 410 || response.status === 403) {
96
+ return { kind: "not-found" }
97
+ }
98
+
99
+ // Explicit access denial when robots cannot be read due to auth.
100
+ if (response.status === 401) {
101
+ return { kind: "deny-all" }
102
+ }
103
+
104
+ // Fail open for transient server/network issues.
105
+ if (!response.ok) {
106
+ return { kind: "allow-all" }
107
+ }
108
+
109
+ const robotsText = await response.text()
110
+ return { kind: "rules", robotsText }
111
+ }
112
+
113
+ function containsRestrictiveXRobotsTag(headerValue: string | null): boolean {
114
+ if (!headerValue) {
115
+ return false
116
+ }
117
+
118
+ const tokenSet = new Set(
119
+ headerValue
120
+ .toLowerCase()
121
+ .split(",")
122
+ .map((token) => token.trim())
123
+ .filter(Boolean),
124
+ )
125
+
126
+ for (const token of RESTRICTIVE_X_ROBOTS_TAGS) {
127
+ if (tokenSet.has(token)) {
128
+ return true
129
+ }
130
+ }
131
+ return false
132
+ }
133
+ export const EXTERNAL_DOC_USER_AGENT = "sosumi-ai/1.0 (+https://sosumi.ai/#bot)"
@@ -0,0 +1,8 @@
1
+ /**
2
+ * External documentation functionality
3
+ * Re-exports all external-doc related functions and types
4
+ */
5
+
6
+ export * from "./fetch"
7
+ export * from "./policy"
8
+ export type * from "./types"
@@ -0,0 +1,308 @@
1
+ import robotsParser from "robots-parser"
2
+
3
+ import { EXTERNAL_DOC_USER_AGENT, fetchRobotsPolicy } from "./fetch"
4
+ import type { ExternalPolicyEnv, RobotsPolicyResult } from "./types"
5
+
6
+ const LOCAL_HOSTNAMES = new Set(["localhost", "127.0.0.1", "::1"])
7
+ const EXTERNAL_PATH_PREFIX = "/external/"
8
+ const ROBOTS_CACHE_TTL_MS = 5 * 60 * 1000
9
+ const ROBOTS_CACHE_MAX_ENTRIES = 1000
10
+ const ROBOTS_INFLIGHT_MAX_ENTRIES = 1000
11
+ const robotsPolicyCache = new Map<string, { expiresAt: number; policy: RobotsPolicyResult }>()
12
+ const robotsPolicyInFlight = new Map<string, Promise<RobotsPolicyResult>>()
13
+
14
+ export class ExternalAccessError extends Error {
15
+ status: number
16
+
17
+ constructor(message: string, status: number = 403) {
18
+ super(message)
19
+ this.name = "ExternalAccessError"
20
+ this.status = status
21
+ }
22
+ }
23
+
24
+ export function validateExternalDocumentationUrl(rawUrl: string): URL {
25
+ if (!rawUrl || hasControlOrWhitespace(rawUrl)) {
26
+ throw new ExternalAccessError("Invalid external URL.", 400)
27
+ }
28
+
29
+ let parsedUrl: URL
30
+
31
+ try {
32
+ parsedUrl = new URL(rawUrl)
33
+ } catch {
34
+ throw new ExternalAccessError("Invalid external URL.", 400)
35
+ }
36
+
37
+ if (parsedUrl.protocol !== "https:") {
38
+ throw new ExternalAccessError("Only https:// external URLs are supported.", 400)
39
+ }
40
+
41
+ if (parsedUrl.username || parsedUrl.password) {
42
+ throw new ExternalAccessError("Credentialed URLs are not supported.", 400)
43
+ }
44
+
45
+ if (parsedUrl.hash) {
46
+ throw new ExternalAccessError("URL fragments are not supported.", 400)
47
+ }
48
+
49
+ return parsedUrl
50
+ }
51
+
52
+ export function decodeExternalTargetPath(path: string): string {
53
+ if (!path.startsWith(EXTERNAL_PATH_PREFIX)) {
54
+ throw new ExternalAccessError("Invalid external URL.", 400)
55
+ }
56
+
57
+ const encodedTarget = path.slice(EXTERNAL_PATH_PREFIX.length)
58
+ if (!encodedTarget) {
59
+ throw new ExternalAccessError("Invalid external URL.", 400)
60
+ }
61
+
62
+ try {
63
+ const decodedTarget = decodeURIComponent(encodedTarget)
64
+ if (!decodedTarget || hasControlOrWhitespace(decodedTarget)) {
65
+ throw new ExternalAccessError("Invalid external URL.", 400)
66
+ }
67
+ return decodedTarget
68
+ } catch {
69
+ throw new ExternalAccessError("Invalid external URL.", 400)
70
+ }
71
+ }
72
+
73
+ export async function assertExternalDocumentationAccess(
74
+ targetUrl: URL,
75
+ env: ExternalPolicyEnv,
76
+ ): Promise<void> {
77
+ assertHostPolicy(targetUrl, env)
78
+ const robotsAllowed = await isAllowedByRobotsTxt(targetUrl)
79
+ if (!robotsAllowed) {
80
+ throw new ExternalAccessError("External host denied access for this path via robots.txt.", 403)
81
+ }
82
+ }
83
+
84
+ function assertHostPolicy(targetUrl: URL, env: ExternalPolicyEnv): void {
85
+ const hostname = targetUrl.hostname.toLowerCase()
86
+ const allowlist = parseHostList(env.EXTERNAL_DOC_HOST_ALLOWLIST)
87
+ const blocklist = parseHostList(env.EXTERNAL_DOC_HOST_BLOCKLIST)
88
+ const explicitlyAllowlisted = isHostListed(hostname, allowlist)
89
+
90
+ if (isHostListed(hostname, blocklist)) {
91
+ throw new ExternalAccessError("External host is blocked by configuration.", 403)
92
+ }
93
+
94
+ if (allowlist.size > 0 && !explicitlyAllowlisted) {
95
+ throw new ExternalAccessError("External host is not allowlisted.", 403)
96
+ }
97
+
98
+ if (isLocalOrPrivateHost(hostname) && !explicitlyAllowlisted) {
99
+ // This blocks obvious local/private hostnames, but DNS rebinding on public hostnames
100
+ // still requires explicit allowlists for strict SSRF protection in runtimes without DNS resolution APIs.
101
+ throw new ExternalAccessError(
102
+ "External URL points to a local or private host and is not allowlisted.",
103
+ 403,
104
+ )
105
+ }
106
+ }
107
+
108
+ async function isAllowedByRobotsTxt(targetUrl: URL): Promise<boolean> {
109
+ const policy = await getRobotsPolicy(targetUrl.origin)
110
+ if (policy.kind === "allow-all") {
111
+ return true
112
+ }
113
+ if (policy.kind === "deny-all") {
114
+ return false
115
+ }
116
+ if (policy.kind === "rules") {
117
+ return evaluateRobotsPolicy(policy.robotsText, targetUrl, EXTERNAL_DOC_USER_AGENT)
118
+ }
119
+ return true
120
+ }
121
+
122
+ function evaluateRobotsPolicy(robotsText: string, targetUrl: URL, userAgent: string): boolean {
123
+ const robots = robotsParser(new URL("/robots.txt", targetUrl.origin).toString(), robotsText)
124
+ const isAllowed = robots.isAllowed(targetUrl.toString(), userAgent)
125
+ return isAllowed !== false
126
+ }
127
+
128
+ function parseHostList(rawList: string | undefined): Set<string> {
129
+ if (!rawList) {
130
+ return new Set()
131
+ }
132
+
133
+ return new Set(
134
+ rawList
135
+ .split(/\r?\n|,/)
136
+ .map((value) => value.trim().toLowerCase())
137
+ .filter(Boolean),
138
+ )
139
+ }
140
+
141
+ function getRootOrigin(origin: string): string | null {
142
+ try {
143
+ const url = new URL(origin)
144
+ const labels = url.hostname.toLowerCase().split(".")
145
+ if (labels.length < 3) {
146
+ return null
147
+ }
148
+ const rootHost = labels.slice(-2).join(".")
149
+ return `${url.protocol}//${rootHost}`
150
+ } catch {
151
+ return null
152
+ }
153
+ }
154
+
155
+ async function getRobotsPolicy(origin: string): Promise<RobotsPolicyResult> {
156
+ const now = Date.now()
157
+ pruneExpiredRobotsPolicyEntries(now)
158
+
159
+ const cached = robotsPolicyCache.get(origin)
160
+ if (cached && cached.expiresAt > now) {
161
+ return cached.policy
162
+ }
163
+
164
+ const inFlight = robotsPolicyInFlight.get(origin)
165
+ if (inFlight) {
166
+ return inFlight
167
+ }
168
+
169
+ const request = (async (): Promise<RobotsPolicyResult> => {
170
+ let policy = await fetchRobotsPolicy(origin, EXTERNAL_DOC_USER_AGENT)
171
+ if (policy.kind === "not-found") {
172
+ const rootOrigin = getRootOrigin(origin)
173
+ if (rootOrigin && rootOrigin !== origin) {
174
+ const rootPolicy = await fetchRobotsPolicy(rootOrigin, EXTERNAL_DOC_USER_AGENT)
175
+ if (rootPolicy.kind !== "not-found") {
176
+ policy = rootPolicy
177
+ } else {
178
+ policy = { kind: "allow-all" }
179
+ }
180
+ } else {
181
+ policy = { kind: "allow-all" }
182
+ }
183
+ }
184
+ return policy
185
+ })()
186
+ .then((policy) => {
187
+ robotsPolicyCache.set(origin, {
188
+ expiresAt: Date.now() + ROBOTS_CACHE_TTL_MS,
189
+ policy,
190
+ })
191
+ enforceMaxMapEntries(robotsPolicyCache, ROBOTS_CACHE_MAX_ENTRIES)
192
+ return policy
193
+ })
194
+ .finally(() => {
195
+ robotsPolicyInFlight.delete(origin)
196
+ })
197
+
198
+ enforceMaxMapEntries(robotsPolicyInFlight, ROBOTS_INFLIGHT_MAX_ENTRIES, origin)
199
+ robotsPolicyInFlight.set(origin, request)
200
+ return request
201
+ }
202
+
203
+ function isHostListed(hostname: string, list: Set<string>): boolean {
204
+ if (list.has(hostname)) {
205
+ return true
206
+ }
207
+
208
+ for (const candidate of list) {
209
+ if (candidate.startsWith(".")) {
210
+ if (hostname.endsWith(candidate)) {
211
+ return true
212
+ }
213
+ continue
214
+ }
215
+
216
+ if (hostname === candidate || hostname.endsWith(`.${candidate}`)) {
217
+ return true
218
+ }
219
+ }
220
+
221
+ return false
222
+ }
223
+
224
+ function pruneExpiredRobotsPolicyEntries(now: number): void {
225
+ for (const [origin, entry] of robotsPolicyCache.entries()) {
226
+ if (entry.expiresAt <= now) {
227
+ robotsPolicyCache.delete(origin)
228
+ }
229
+ }
230
+ }
231
+
232
+ function enforceMaxMapEntries<K, V>(map: Map<K, V>, maxEntries: number, incomingKey?: K): void {
233
+ while (
234
+ map.size > maxEntries ||
235
+ (incomingKey !== undefined && map.size >= maxEntries && !map.has(incomingKey))
236
+ ) {
237
+ const oldestKey = map.keys().next().value
238
+ if (oldestKey === undefined) {
239
+ break
240
+ }
241
+ map.delete(oldestKey)
242
+ }
243
+ }
244
+
245
+ function isLocalOrPrivateHost(hostname: string): boolean {
246
+ if (LOCAL_HOSTNAMES.has(hostname)) {
247
+ return true
248
+ }
249
+
250
+ if (hostname.endsWith(".local")) {
251
+ return true
252
+ }
253
+
254
+ if (isPrivateIPv4(hostname)) {
255
+ return true
256
+ }
257
+
258
+ if (isPrivateIPv6(hostname)) {
259
+ return true
260
+ }
261
+
262
+ return false
263
+ }
264
+
265
+ function isPrivateIPv4(hostname: string): boolean {
266
+ const octets = hostname.split(".")
267
+ if (octets.length !== 4 || octets.some((octet) => !/^\d{1,3}$/.test(octet))) {
268
+ return false
269
+ }
270
+
271
+ const octetNumbers = octets.map((octet) => Number.parseInt(octet, 10))
272
+ if (octetNumbers.some((value) => value > 255)) {
273
+ return false
274
+ }
275
+ const [a, b] = octetNumbers
276
+
277
+ return (
278
+ a === 10 ||
279
+ a === 127 ||
280
+ a === 0 ||
281
+ (a === 169 && b === 254) ||
282
+ (a === 172 && b >= 16 && b <= 31) ||
283
+ (a === 192 && b === 168)
284
+ )
285
+ }
286
+
287
+ function isPrivateIPv6(hostname: string): boolean {
288
+ const normalized = hostname.toLowerCase().replace(/^\[|\]$/g, "")
289
+ return (
290
+ normalized === "::1" ||
291
+ normalized.startsWith("fc") ||
292
+ normalized.startsWith("fd") ||
293
+ normalized.startsWith("fe8") ||
294
+ normalized.startsWith("fe9") ||
295
+ normalized.startsWith("fea") ||
296
+ normalized.startsWith("feb")
297
+ )
298
+ }
299
+
300
+ function hasControlOrWhitespace(value: string): boolean {
301
+ for (let index = 0; index < value.length; index += 1) {
302
+ const code = value.charCodeAt(index)
303
+ if (code <= 0x20 || code === 0x7f) {
304
+ return true
305
+ }
306
+ }
307
+ return false
308
+ }
@@ -0,0 +1,10 @@
1
+ export type RobotsPolicyResult =
2
+ | { kind: "allow-all" }
3
+ | { kind: "deny-all" }
4
+ | { kind: "not-found" }
5
+ | { kind: "rules"; robotsText: string }
6
+
7
+ export interface ExternalPolicyEnv {
8
+ EXTERNAL_DOC_HOST_ALLOWLIST?: string
9
+ EXTERNAL_DOC_HOST_BLOCKLIST?: string
10
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Shared fetching utilities for Apple Developer documentation
3
+ * Contains common utilities used by both HIG and reference documentation
4
+ */
5
+
6
+ export class NotFoundError extends Error {}
7
+
8
+ const USER_AGENTS = [
9
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.2.20",
10
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15",
11
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
12
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
13
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15",
14
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.7.24",
15
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.2 Safari/605.1.15 Reeder/5.4",
16
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.1.1",
17
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15",
18
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15",
19
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_3_9; en) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.6 Safari/605.1.15",
20
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.7.24",
21
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.4.24",
22
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.7.23",
23
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15",
24
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A",
25
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15",
26
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15",
27
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_16) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/17618.1.15.111.8",
28
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
29
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.6.24",
30
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/618.2.7 (KHTML, like Gecko) Version/17.5 Safari/618.2.7",
31
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3 like Mac OS X; de-de) AppleWebKit/533.17.9 (KHTML, like Gecko) Mobile/8F190",
32
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1",
33
+ "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4",
34
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_3 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/10.1 Mobile/15A432 Safari/602.1",
35
+ ] as const
36
+
37
+ /**
38
+ * Get a random Safari user agent
39
+ */
40
+ export function getRandomUserAgent(): string {
41
+ const randomIndex = Math.floor(Math.random() * USER_AGENTS.length)
42
+ return USER_AGENTS[randomIndex]
43
+ }
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Human Interface Guidelines (HIG) fetching functionality
3
+ */
4
+
5
+ import { getRandomUserAgent, NotFoundError } from "../fetch"
6
+ import type { HIGPageJSON, HIGTableOfContents } from "./types"
7
+
8
+ // ============================================================================
9
+ // CONSTANTS
10
+ // ============================================================================
11
+
12
+ /**
13
+ * Base URL for HIG JSON API
14
+ */
15
+ const HIG_BASE_URL = "https://developer.apple.com/tutorials/data"
16
+
17
+ // ============================================================================
18
+ // FETCHING FUNCTIONS
19
+ // ============================================================================
20
+
21
+ /**
22
+ * Fetch the complete HIG table of contents
23
+ */
24
+ export async function fetchHIGTableOfContents(): Promise<HIGTableOfContents> {
25
+ const tocUrl = `${HIG_BASE_URL}/index/design--human-interface-guidelines`
26
+
27
+ const userAgent = getRandomUserAgent()
28
+
29
+ const response = await fetch(tocUrl, {
30
+ headers: {
31
+ "User-Agent": userAgent,
32
+ Accept: "application/json",
33
+ "Cache-Control": "no-cache",
34
+ },
35
+ })
36
+
37
+ if (!response.ok) {
38
+ console.error(`Failed to fetch HIG ToC: ${response.status} ${response.statusText}`)
39
+ if (response.status === 404) {
40
+ throw new NotFoundError(`HIG table of contents not found at ${tocUrl}`)
41
+ }
42
+ throw new Error(`Failed to fetch HIG ToC: ${response.status} ${response.statusText}`)
43
+ }
44
+
45
+ const data = (await response.json()) as HIGTableOfContents
46
+ return data
47
+ }
48
+
49
+ /**
50
+ * Fetch HIG page content by path
51
+ *
52
+ * @param path - The HIG path (e.g., "getting-started", "foundations/color")
53
+ * @returns HIG page JSON data
54
+ */
55
+ export async function fetchHIGPageData(path: string): Promise<HIGPageJSON> {
56
+ // Normalize the path - remove leading/trailing slashes
57
+ const normalizedPath = path.replace(/^\/+|\/+$/g, "")
58
+
59
+ // Construct the full JSON URL
60
+ const jsonUrl = `${HIG_BASE_URL}/design/human-interface-guidelines/${normalizedPath}.json`
61
+
62
+ const userAgent = getRandomUserAgent()
63
+
64
+ const response = await fetch(jsonUrl, {
65
+ headers: {
66
+ "User-Agent": userAgent,
67
+ Accept: "application/json",
68
+ "Cache-Control": "no-cache",
69
+ },
70
+ })
71
+
72
+ if (!response.ok) {
73
+ console.error(`Failed to fetch HIG page: ${response.status} ${response.statusText}`)
74
+ if (response.status === 404) {
75
+ throw new NotFoundError(`HIG page not found at ${jsonUrl}`)
76
+ }
77
+ throw new Error(`Failed to fetch HIG page: ${response.status} ${response.statusText}`)
78
+ }
79
+
80
+ const data = (await response.json()) as HIGPageJSON
81
+ return data
82
+ }
83
+
84
+ // ============================================================================
85
+ // UTILITY FUNCTIONS
86
+ // ============================================================================
87
+
88
+ /**
89
+ * Extract all available HIG paths from the table of contents
90
+ *
91
+ * @param toc - The HIG table of contents
92
+ * @returns Array of all available paths
93
+ */
94
+ export function extractHIGPaths(toc: HIGTableOfContents): string[] {
95
+ const paths: string[] = []
96
+
97
+ function extractFromItems(items: typeof toc.interfaceLanguages.swift) {
98
+ for (const item of items) {
99
+ if (item.path) {
100
+ // Remove the leading "/design/human-interface-guidelines/" prefix
101
+ const normalizedPath = item.path.replace(/^\/design\/human-interface-guidelines\//, "")
102
+ if (normalizedPath) {
103
+ paths.push(normalizedPath)
104
+ }
105
+ }
106
+
107
+ if (item.children) {
108
+ extractFromItems(item.children)
109
+ }
110
+ }
111
+ }
112
+
113
+ extractFromItems(toc.interfaceLanguages.swift)
114
+ return paths
115
+ }
116
+
117
+ /**
118
+ * Find a specific HIG item in the table of contents by path
119
+ *
120
+ * @param toc - The HIG table of contents
121
+ * @param targetPath - The path to search for
122
+ * @returns The HIG item if found, undefined otherwise
123
+ */
124
+ export function findHIGItemByPath(
125
+ toc: HIGTableOfContents,
126
+ targetPath: string,
127
+ ): (typeof toc.interfaceLanguages.swift)[0] | undefined {
128
+ const normalizedTarget = targetPath.replace(/^\/+|\/+$/g, "")
129
+
130
+ function searchInItems(
131
+ items: typeof toc.interfaceLanguages.swift,
132
+ ): (typeof items)[0] | undefined {
133
+ for (const item of items) {
134
+ const normalizedItemPath = item.path
135
+ .replace(/^\/design\/human-interface-guidelines\//, "")
136
+ .replace(/^\/+|\/+$/g, "")
137
+
138
+ if (normalizedItemPath === normalizedTarget) {
139
+ return item
140
+ }
141
+
142
+ if (item.children) {
143
+ const found = searchInItems(item.children)
144
+ if (found) return found
145
+ }
146
+ }
147
+ return undefined
148
+ }
149
+
150
+ return searchInItems(toc.interfaceLanguages.swift)
151
+ }
152
+
153
+ /**
154
+ * Get breadcrumb path for a HIG item
155
+ *
156
+ * @param toc - The HIG table of contents
157
+ * @param targetPath - The path to get breadcrumbs for
158
+ * @returns Array of titles representing the breadcrumb path
159
+ */
160
+ export function getHIGBreadcrumbs(toc: HIGTableOfContents, targetPath: string): string[] {
161
+ const normalizedTarget = targetPath.replace(/^\/+|\/+$/g, "")
162
+
163
+ function findBreadcrumbs(
164
+ items: typeof toc.interfaceLanguages.swift,
165
+ currentPath: string[] = [],
166
+ ): string[] | null {
167
+ for (const item of items) {
168
+ const normalizedItemPath = item.path
169
+ .replace(/^\/design\/human-interface-guidelines\//, "")
170
+ .replace(/^\/+|\/+$/g, "")
171
+ const newPath = [...currentPath, item.title]
172
+
173
+ if (normalizedItemPath === normalizedTarget) {
174
+ return newPath
175
+ }
176
+
177
+ if (item.children) {
178
+ const found = findBreadcrumbs(item.children, newPath)
179
+ if (found) return found
180
+ }
181
+ }
182
+ return null
183
+ }
184
+
185
+ return findBreadcrumbs(toc.interfaceLanguages.swift) || []
186
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Human Interface Guidelines (HIG) functionality
3
+ * Re-exports all HIG-related functions and types
4
+ */
5
+
6
+ export * from "./fetch"
7
+ export * from "./render"
8
+ export type * from "./types"
9
+ export { hasChildren, isHIGImageReference, isHIGTopicReference } from "./util"