@nshipster/sosumi 1.0.0 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib/search.ts CHANGED
@@ -14,208 +14,225 @@ export interface SearchResponse {
14
14
  results: SearchResult[]
15
15
  }
16
16
 
17
- class SearchResultParser {
18
- private results: SearchResult[] = []
19
- private currentResult: Partial<SearchResult> = {}
20
- private currentBreadcrumbs: string[] = []
21
- private currentTags: string[] = []
22
- private isInResultTitle = false
23
- private isInResultDescription = false
24
- private isInBreadcrumb = false
25
- private isInTag = false
26
-
27
- getResults(): SearchResult[] {
28
- return this.results
17
+ // Apple's current search backend, discovered from
18
+ // https://developer.apple.com/search/scripts/search.js (May 2026)
19
+ //
20
+ // Historical context:
21
+ // - The legacy /search/ HTML scraper broke when Apple switched to a JS-rendered SPA
22
+ // - PR #54 upstream (NSHipster/sosumi.ai) targeted /search/services/search.php with
23
+ // NDJSON-style streamed events; that endpoint is now also gone (404)
24
+ // - The current backend is a plain JSON POST API on Apple's MSC infrastructure
25
+ const APPLE_SEARCH_SERVICE_URL = "https://devintserv.msc.sbz.apple.com/api/v1/search"
26
+ const DEFAULT_TARGET_RESULT_LOCALE = "en"
27
+ const TARGET_RESULT_LOCALE_BY_BASE_NAME = new Map([
28
+ ["en", "en"],
29
+ ["zh-CN", "zh-CN"],
30
+ ["ja-JP", "ja-JP"],
31
+ ["ko-KR", "ko-KR"],
32
+ ["fr-FR", "fr-FR"],
33
+ ["de-DE", "de-DE"],
34
+ ["pt-BR", "pt-BR"],
35
+ ["es-LA", "es-lamr"],
36
+ ["es-419", "es-lamr"],
37
+ ["it-IT", "it-IT"],
38
+ ])
39
+
40
+ type JsonRecord = Record<string, unknown>
41
+
42
+ export async function searchAppleDeveloperDocs(query: string): Promise<SearchResponse> {
43
+ const results = await searchAppleDeveloperDocsViaService(query)
44
+ return { query, results }
45
+ }
46
+
47
+ async function searchAppleDeveloperDocsViaService(query: string): Promise<SearchResult[]> {
48
+ const response = await fetch(APPLE_SEARCH_SERVICE_URL, {
49
+ method: "POST",
50
+ headers: {
51
+ "Content-Type": "application/json",
52
+ Accept: "application/json",
53
+ // The MSC backend requires a browser-style Origin/Referer pair to accept the request.
54
+ Origin: "https://developer.apple.com",
55
+ Referer: "https://developer.apple.com/search/",
56
+ "User-Agent": getRandomUserAgent(),
57
+ },
58
+ body: JSON.stringify({
59
+ text: query,
60
+ targetResultLocale: resolveTargetResultLocale(),
61
+ }),
62
+ })
63
+
64
+ if (!response.ok) {
65
+ throw new Error(`Search request failed: ${response.status}`)
29
66
  }
30
67
 
31
- private resetCurrentResult() {
32
- this.currentResult = {}
33
- this.currentBreadcrumbs = []
34
- this.currentTags = []
35
- this.isInResultTitle = false
36
- this.isInResultDescription = false
37
- this.isInBreadcrumb = false
38
- this.isInTag = false
68
+ const data = await readSearchResponseJson(response)
69
+ const rawResults = Array.isArray(data.results) ? (data.results as unknown[]) : []
70
+ return extractSearchResults(rawResults)
71
+ }
72
+
73
+ async function readSearchResponseJson(response: Response): Promise<JsonRecord> {
74
+ try {
75
+ const data = await response.json()
76
+ return isJsonRecord(data) ? data : {}
77
+ } catch {
78
+ throw new Error("Search response was not valid JSON")
39
79
  }
80
+ }
40
81
 
41
- private finalizeCurrentResult() {
42
- if (this.currentResult.title && this.currentResult.url) {
43
- this.results.push({
44
- title: this.currentResult.title,
45
- url: this.currentResult.url,
46
- description: this.currentResult.description || "",
47
- breadcrumbs: [...this.currentBreadcrumbs],
48
- tags: [...this.currentTags],
49
- type: this.currentResult.type || "unknown",
50
- })
51
- }
52
- this.resetCurrentResult()
82
+ function extractSearchResults(items: unknown[]): SearchResult[] {
83
+ return items.flatMap((item) => {
84
+ const result = normalizeSearchResult(item)
85
+ return result ? [result] : []
86
+ })
87
+ }
88
+
89
+ function normalizeSearchResult(item: unknown): SearchResult | null {
90
+ if (!isJsonRecord(item)) {
91
+ return null
53
92
  }
54
93
 
55
- element(element: Element) {
56
- // Start of a search result
57
- if (element.tagName === "li" && element.getAttribute("class")?.includes("search-result")) {
58
- this.finalizeCurrentResult() // Finalize previous result if any
59
-
60
- // Extract result type from class
61
- const className = element.getAttribute("class") || ""
62
- if (className.includes("documentation")) {
63
- this.currentResult.type = "documentation"
64
- } else if (className.includes("general")) {
65
- this.currentResult.type = "general"
66
- } else {
67
- this.currentResult.type = "other"
68
- }
94
+ const documentation = extractMetadataRecord(item.documentation)
95
+ if (documentation) {
96
+ const title = stringValue(documentation.title)
97
+ const url = stringValue(documentation.permalink)
98
+ if (!title || !url) {
99
+ return null
69
100
  }
70
101
 
71
- // Result title link
72
- if (
73
- element.tagName === "a" &&
74
- element.getAttribute("class")?.includes("click-analytics-result")
75
- ) {
76
- const href = element.getAttribute("href")
77
- if (href) {
78
- this.currentResult.url = href.startsWith("/") ? `https://developer.apple.com${href}` : href
79
- }
80
- this.isInResultTitle = true
102
+ return {
103
+ title,
104
+ url,
105
+ description: stringValue(documentation.description) ?? "",
106
+ breadcrumbs: splitHierarchy(stringValue(documentation.hierarchy)),
107
+ tags: compactStrings([stringValue(documentation.kind)]),
108
+ type: "documentation",
81
109
  }
110
+ }
82
111
 
83
- // Result description
84
- if (element.tagName === "p" && element.getAttribute("class")?.includes("result-description")) {
85
- this.isInResultDescription = true
112
+ const developer = extractMetadataRecord(item.developer)
113
+ if (developer) {
114
+ const title = firstString(developer.titles)
115
+ const url = firstString(developer.permalinks)
116
+ if (!title || !url) {
117
+ return null
86
118
  }
87
119
 
88
- // Breadcrumb items
89
- if (
90
- element.tagName === "li" &&
91
- element.getAttribute("class")?.includes("breadcrumb-list-item")
92
- ) {
93
- this.isInBreadcrumb = true
120
+ return {
121
+ title,
122
+ url,
123
+ description: firstString(developer.descriptions) ?? "",
124
+ breadcrumbs: compactStrings([firstString(developer.projectNames)]),
125
+ tags: compactStrings([
126
+ firstString(developer.itemTypes),
127
+ firstString(developer.deliveryLanguageCodes),
128
+ ]),
129
+ type: (firstString(developer.itemTypes) ?? "developer").toLowerCase(),
94
130
  }
131
+ }
95
132
 
96
- // Tag spans
97
- if (
98
- element.tagName === "span" &&
99
- element.parentElement?.getAttribute("class")?.includes("result-tag")
100
- ) {
101
- this.isInTag = true
133
+ const devsite = extractMetadataRecord(item.devsite)
134
+ if (devsite) {
135
+ const title = stringValue(devsite.title)
136
+ const url = stringValue(devsite.sourceURL)
137
+ if (!title || !url) {
138
+ return null
102
139
  }
103
140
 
104
- // Tag list items (for languages like "Swift", "Objective-C")
105
- if (
106
- element.tagName === "li" &&
107
- element.getAttribute("class")?.includes("result-tag language")
108
- ) {
109
- this.isInTag = true
141
+ return {
142
+ title,
143
+ url,
144
+ description: stringValue(devsite.description) ?? "",
145
+ breadcrumbs: [],
146
+ tags: [],
147
+ type: "general",
110
148
  }
111
149
  }
112
150
 
113
- text(text: Text) {
114
- const content = text.text.trim()
115
- if (!content) return
116
-
117
- if (this.isInResultTitle && this.currentResult.url) {
118
- this.currentResult.title = content
119
- this.isInResultTitle = false
120
- } else if (this.isInResultDescription) {
121
- this.currentResult.description = content
122
- this.isInResultDescription = false
123
- } else if (this.isInBreadcrumb) {
124
- this.currentBreadcrumbs.push(content)
125
- this.isInBreadcrumb = false
126
- } else if (this.isInTag) {
127
- this.currentTags.push(content)
128
- this.isInTag = false
151
+ const swiftdocs = extractMetadataRecord(item.swiftdocs)
152
+ if (swiftdocs) {
153
+ const title = stringValue(swiftdocs.title)
154
+ const url = stringValue(swiftdocs.sourceURL)
155
+ if (!title || !url) {
156
+ return null
157
+ }
158
+
159
+ return {
160
+ title,
161
+ url,
162
+ description: stringValue(swiftdocs.description) ?? "",
163
+ breadcrumbs: [],
164
+ tags: [],
165
+ type: "general",
129
166
  }
130
167
  }
131
168
 
132
- end() {
133
- this.finalizeCurrentResult() // Finalize the last result
169
+ return null
170
+ }
171
+
172
+ function extractMetadataRecord(container: unknown): JsonRecord | null {
173
+ if (!isJsonRecord(container)) {
174
+ return null
134
175
  }
176
+
177
+ const metadata = container.metadata
178
+ return isJsonRecord(metadata) ? metadata : null
135
179
  }
136
180
 
137
- export async function searchAppleDeveloperDocs(query: string): Promise<SearchResponse> {
138
- const searchUrl = `https://developer.apple.com/search/?q=${encodeURIComponent(query)}`
139
- const response = await fetch(searchUrl, {
140
- headers: {
141
- "User-Agent": getRandomUserAgent(),
142
- },
143
- })
181
+ function isJsonRecord(value: unknown): value is JsonRecord {
182
+ return typeof value === "object" && value !== null
183
+ }
144
184
 
145
- if (!response.ok) {
146
- throw new Error(`Search request failed: ${response.status}`)
147
- }
185
+ function stringValue(value: unknown): string | null {
186
+ return typeof value === "string" && value.length > 0 ? value : null
187
+ }
148
188
 
149
- const html = await response.text()
150
- let results: SearchResult[] = []
151
- if (typeof HTMLRewriter !== "undefined") {
152
- const parser = new SearchResultParser()
153
- const rewriter = new HTMLRewriter()
154
- .on("li.search-result", parser)
155
- .on("li.search-result a.click-analytics-result", parser)
156
- .on("li.search-result p.result-description", parser)
157
- .on("li.search-result li.breadcrumb-list-item", parser)
158
- .on("li.search-result li.result-tag", parser)
159
- .on("li.search-result li.result-tag span", parser)
160
-
161
- // We need to consume the transformed response to trigger parsing callbacks.
162
- await rewriter.transform(new Response(html)).text()
163
- parser.end()
164
- results = parser.getResults()
165
- } else {
166
- results = await parseSearchResultsWithCheerio(html)
189
+ function firstString(value: unknown): string | null {
190
+ if (!Array.isArray(value)) {
191
+ return null
167
192
  }
168
193
 
169
- return {
170
- query,
171
- results,
172
- }
194
+ const first = value.find((item) => typeof item === "string" && item.length > 0)
195
+ return typeof first === "string" ? first : null
173
196
  }
174
197
 
175
- async function parseSearchResultsWithCheerio(html: string): Promise<SearchResult[]> {
176
- const { load } = await import("cheerio")
177
- const $ = load(html)
178
- const results: SearchResult[] = []
198
+ function splitHierarchy(hierarchy: string | null): string[] {
199
+ if (!hierarchy) {
200
+ return []
201
+ }
179
202
 
180
- $("li.search-result").each((_, element) => {
181
- const item = $(element)
182
- const link = item.find("a.click-analytics-result").first()
183
- const rawHref = link.attr("href")
184
- const title = link.text().trim()
203
+ return hierarchy
204
+ .split(" > ")
205
+ .map((segment) => segment.trim())
206
+ .filter(Boolean)
207
+ }
185
208
 
186
- if (!rawHref || !title) {
187
- return
188
- }
209
+ function compactStrings(values: Array<string | null>): string[] {
210
+ return values.filter((value): value is string => Boolean(value))
211
+ }
189
212
 
190
- const description = item.find("p.result-description").first().text().trim()
191
- const breadcrumbs = item
192
- .find("li.breadcrumb-list-item")
193
- .toArray()
194
- .map((breadcrumb) => $(breadcrumb).text().trim())
195
- .filter(Boolean)
196
-
197
- const tags = item
198
- .find("li.result-tag span, li.result-tag.language")
199
- .toArray()
200
- .map((tag) => $(tag).text().trim())
201
- .filter(Boolean)
202
-
203
- const className = item.attr("class") ?? ""
204
- const type = className.includes("documentation")
205
- ? "documentation"
206
- : className.includes("general")
207
- ? "general"
208
- : "other"
209
-
210
- results.push({
211
- title,
212
- url: rawHref.startsWith("/") ? `https://developer.apple.com${rawHref}` : rawHref,
213
- description,
214
- breadcrumbs,
215
- tags,
216
- type,
217
- })
218
- })
213
+ // Apple's MSC backend uses BCP-47 language tags ("en", "ja-JP", "zh-CN", etc.)
214
+ // instead of POSIX locale codes ("en_US").
215
+ // Mirror the mapping from
216
+ // https://developer.apple.com/search/scripts/helpers.js
217
+ function resolveTargetResultLocale(): string {
218
+ const locale = Intl.DateTimeFormat().resolvedOptions().locale
219
+ if (!locale) {
220
+ return DEFAULT_TARGET_RESULT_LOCALE
221
+ }
219
222
 
220
- return results
223
+ try {
224
+ const normalized = new Intl.Locale(locale)
225
+ const lang = normalized.language
226
+ const region = normalized.region
227
+ const languageRegion = region ? `${lang}-${region}` : lang
228
+
229
+ return (
230
+ TARGET_RESULT_LOCALE_BY_BASE_NAME.get(normalized.baseName) ??
231
+ TARGET_RESULT_LOCALE_BY_BASE_NAME.get(languageRegion) ??
232
+ TARGET_RESULT_LOCALE_BY_BASE_NAME.get(lang) ??
233
+ DEFAULT_TARGET_RESULT_LOCALE
234
+ )
235
+ } catch {
236
+ return DEFAULT_TARGET_RESULT_LOCALE
237
+ }
221
238
  }
@@ -0,0 +1,148 @@
1
+ import { HTTPException } from "hono/http-exception"
2
+
3
+ export const SKILL_NAME = "sosumi"
4
+
5
+ export const skillHeaders = {
6
+ "Access-Control-Allow-Origin": "*",
7
+ "Cache-Control": "public, max-age=300, s-maxage=600",
8
+ "Content-Type": "text/markdown; charset=utf-8",
9
+ }
10
+
11
+ export const skillIndexHeaders = {
12
+ "Access-Control-Allow-Origin": "*",
13
+ "Cache-Control": "public, max-age=300, s-maxage=600",
14
+ "Content-Type": "application/json; charset=utf-8",
15
+ }
16
+
17
+ export interface SkillArtifact {
18
+ bytes: ArrayBuffer
19
+ description: string
20
+ name: string
21
+ }
22
+
23
+ export async function loadSkill(assets: Fetcher, baseUrl: string): Promise<SkillArtifact> {
24
+ const skillResponse = await assets.fetch(new Request(skillAssetUrl(baseUrl)))
25
+
26
+ if (!skillResponse.ok) {
27
+ throw new HTTPException(500, {
28
+ message: "Failed to load SKILL.md",
29
+ })
30
+ }
31
+
32
+ const bytes = await skillResponse.arrayBuffer()
33
+ const frontmatter = parseSkillFrontmatter(new TextDecoder().decode(bytes))
34
+
35
+ assertValidSkillFrontmatter(frontmatter)
36
+
37
+ return {
38
+ bytes,
39
+ description: frontmatter.description,
40
+ name: frontmatter.name,
41
+ }
42
+ }
43
+
44
+ export async function skillExists(assets: Fetcher, baseUrl: string): Promise<boolean> {
45
+ const response = await assets.fetch(new Request(skillAssetUrl(baseUrl), { method: "HEAD" }))
46
+ return response.ok
47
+ }
48
+
49
+ function skillAssetUrl(baseUrl: string): string {
50
+ return new URL("/SKILL.md", baseUrl).toString()
51
+ }
52
+
53
+ export async function createSkillIndex(skill: SkillArtifact) {
54
+ return {
55
+ $schema: "https://schemas.agentskills.io/discovery/0.2.0/schema.json",
56
+ skills: [
57
+ {
58
+ name: skill.name,
59
+ type: "skill-md",
60
+ description: skill.description,
61
+ url: `/.well-known/agent-skills/${skill.name}/SKILL.md`,
62
+ digest: `sha256:${await sha256Hex(skill.bytes)}`,
63
+ files: ["SKILL.md"],
64
+ },
65
+ ],
66
+ }
67
+ }
68
+
69
+ function parseSkillFrontmatter(markdown: string): Record<string, string> {
70
+ const match = markdown.match(/^---\r?\n([\s\S]*?)\r?\n---(?:\r?\n|$)/)
71
+
72
+ if (!match) {
73
+ throw new HTTPException(500, {
74
+ message: "SKILL.md must start with YAML frontmatter.",
75
+ })
76
+ }
77
+
78
+ const fields: Record<string, string> = {}
79
+
80
+ for (const line of match[1].split(/\r?\n/)) {
81
+ if (!line.trim()) {
82
+ continue
83
+ }
84
+
85
+ const separator = line.indexOf(":")
86
+
87
+ if (separator === -1) {
88
+ throw new HTTPException(500, {
89
+ message: `Invalid SKILL.md frontmatter line: ${line}`,
90
+ })
91
+ }
92
+
93
+ const key = line.slice(0, separator).trim()
94
+ const value = line.slice(separator + 1).trim()
95
+
96
+ fields[key] = stripQuotes(value)
97
+ }
98
+
99
+ return fields
100
+ }
101
+
102
+ function stripQuotes(value: string): string {
103
+ if (
104
+ (value.startsWith('"') && value.endsWith('"')) ||
105
+ (value.startsWith("'") && value.endsWith("'"))
106
+ ) {
107
+ return value.slice(1, -1)
108
+ }
109
+
110
+ return value
111
+ }
112
+
113
+ function assertValidSkillFrontmatter(frontmatter: Record<string, string>) {
114
+ if (typeof frontmatter.name !== "string" || frontmatter.name.length === 0) {
115
+ throw new HTTPException(500, {
116
+ message: "Skill name is required.",
117
+ })
118
+ }
119
+
120
+ if (frontmatter.name !== SKILL_NAME) {
121
+ throw new HTTPException(500, {
122
+ message: `Expected skill name "${SKILL_NAME}", got "${frontmatter.name}".`,
123
+ })
124
+ }
125
+
126
+ if (!/^[a-z0-9](?:-?[a-z0-9])*$/.test(frontmatter.name) || frontmatter.name.length > 64) {
127
+ throw new HTTPException(500, {
128
+ message: `Invalid skill name: ${frontmatter.name}`,
129
+ })
130
+ }
131
+
132
+ if (typeof frontmatter.description !== "string" || frontmatter.description.length === 0) {
133
+ throw new HTTPException(500, {
134
+ message: "Skill description is required.",
135
+ })
136
+ }
137
+
138
+ if (frontmatter.description.length > 1024) {
139
+ throw new HTTPException(500, {
140
+ message: "Skill description must be 1024 characters or fewer.",
141
+ })
142
+ }
143
+ }
144
+
145
+ async function sha256Hex(bytes: ArrayBuffer): Promise<string> {
146
+ const digest = await crypto.subtle.digest("SHA-256", bytes)
147
+ return [...new Uint8Array(digest)].map((byte) => byte.toString(16).padStart(2, "0")).join("")
148
+ }
package/src/lib/types.ts CHANGED
@@ -32,6 +32,14 @@ export interface Token {
32
32
  text?: string
33
33
  }
34
34
 
35
+ /**
36
+ * One panel in Apple DocC `tabNavigator` JSON (e.g. a language tab with examples).
37
+ */
38
+ export interface TabNavigatorTab {
39
+ title?: string
40
+ content?: ContentItem[]
41
+ }
42
+
35
43
  /**
36
44
  * The main content item type used throughout the documentation structure.
37
45
  * Can represent text, code, lists, headings, and other content elements.
@@ -51,6 +59,9 @@ export interface ContentItem {
51
59
  inlineContent?: ContentItem[]
52
60
  items?: ContentItem[]
53
61
 
62
+ /** Tab panels for DocC `tabNavigator` content (e.g. Swift / Objective-C examples). */
63
+ tabs?: TabNavigatorTab[]
64
+
54
65
  // Code content
55
66
  code?: string | string[]
56
67
  syntax?: string
@@ -113,10 +124,12 @@ export interface SeeAlsoSection {
113
124
  */
114
125
  export interface PrimaryContentSection {
115
126
  kind: string
127
+ title?: string
116
128
  content?: ContentItem[]
117
129
  declarations?: Declaration[]
118
130
  parameters?: Parameter[]
119
131
  items?: PropertyItem[]
132
+ values?: PossibleValueItem[]
120
133
  }
121
134
 
122
135
  /**
@@ -137,6 +150,14 @@ export interface PropertyItem {
137
150
  }>
138
151
  }
139
152
 
153
+ /**
154
+ * Represents a possible value item used in enum/string type pages.
155
+ */
156
+ export interface PossibleValueItem {
157
+ name: string
158
+ content?: ContentItem[]
159
+ }
160
+
140
161
  // ============================================================================
141
162
  // VARIANT TYPES
142
163
  // ============================================================================
package/wrangler.jsonc CHANGED
@@ -16,7 +16,8 @@
16
16
  "compatibility_flags": ["nodejs_compat"],
17
17
  "assets": {
18
18
  "binding": "ASSETS",
19
- "directory": "./public"
19
+ "directory": "./public",
20
+ "run_worker_first": ["/", "/.well-known/agent-skills/*"]
20
21
  },
21
22
  "observability": {
22
23
  "enabled": true