ara-generate 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -0
- package/index.js +304 -0
- package/package.json +28 -0
package/README.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# ara-generate
|
|
2
|
+
|
|
3
|
+
Generates a basic ARA manifest from existing website metadata. Scans your site's HTML, meta tags, JSON-LD, robots.txt, and sitemap.xml to auto-generate a Level 1 ARA manifest.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Generate to stdout
|
|
9
|
+
npx ara-generate https://example.com
|
|
10
|
+
|
|
11
|
+
# Save to file
|
|
12
|
+
npx ara-generate https://example.com --output .well-known/ara/manifest.json
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## What It Extracts
|
|
16
|
+
|
|
17
|
+
- **Title & description** from `<meta>`, OpenGraph, and `<title>` tags
|
|
18
|
+
- **JSON-LD / Schema.org** structured data
|
|
19
|
+
- **Language** from `<html lang="...">`
|
|
20
|
+
- **Site type** inferred from content and JSON-LD types
|
|
21
|
+
- **robots.txt** and **sitemap.xml** presence
|
|
22
|
+
|
|
23
|
+
## Output
|
|
24
|
+
|
|
25
|
+
A Level 1 ARA manifest (`manifest.json`) with:
|
|
26
|
+
- `identity` (name, type, description, locale, contact)
|
|
27
|
+
- `content_map` (basic resource listing)
|
|
28
|
+
- `capabilities` (placeholder for protocols)
|
|
29
|
+
- `policies` (default open access with rate limits)
|
|
30
|
+
- `meta` (generation timestamp, source URL)
|
|
31
|
+
|
|
32
|
+
The generated manifest is a starting point. Enrich it with:
|
|
33
|
+
- **Layer 2**: Add `schemas/*.json` for semantic data structure
|
|
34
|
+
- **Layer 3**: Add `actions.json` for agent interactions
|
|
35
|
+
|
|
36
|
+
## Links
|
|
37
|
+
|
|
38
|
+
- **Spec**: https://ara-standard.org
|
|
39
|
+
- **GitHub**: https://github.com/aka9871/ara-standard
|
|
40
|
+
- **Validator**: `npx ara-validate https://yoursite.com`
|
|
41
|
+
|
|
42
|
+
## License
|
|
43
|
+
|
|
44
|
+
MIT
|
package/index.js
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* ARA Generator — Generates a basic ARA manifest from site metadata.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* npx ara-generate https://example.com
|
|
8
|
+
* npx ara-generate https://example.com --output .well-known/ara/manifest.json
|
|
9
|
+
*
|
|
10
|
+
* This tool:
|
|
11
|
+
* 1. Fetches the site's HTML
|
|
12
|
+
* 2. Extracts metadata (title, description, OpenGraph, Schema.org/JSON-LD)
|
|
13
|
+
* 3. Checks for robots.txt and sitemap.xml
|
|
14
|
+
* 4. Generates a Level 1 ARA manifest
|
|
15
|
+
*
|
|
16
|
+
* The generated manifest is a starting point — you'll want to enrich it
|
|
17
|
+
* with Layer 2 schemas and Layer 3 actions manually.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const https = require("https");
|
|
21
|
+
const http = require("http");
|
|
22
|
+
const fs = require("fs");
|
|
23
|
+
const path = require("path");
|
|
24
|
+
const { URL } = require("url");
|
|
25
|
+
|
|
26
|
+
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
function fetchUrl(url, maxRedirects = 5) {
|
|
29
|
+
return new Promise((resolve, reject) => {
|
|
30
|
+
if (maxRedirects <= 0) return reject(new Error("Too many redirects"));
|
|
31
|
+
|
|
32
|
+
const client = url.startsWith("https") ? https : http;
|
|
33
|
+
client
|
|
34
|
+
.get(url, { headers: { "User-Agent": "ARA-Generator/1.0" } }, (res) => {
|
|
35
|
+
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
36
|
+
const redirectUrl = res.headers.location.startsWith("http")
|
|
37
|
+
? res.headers.location
|
|
38
|
+
: new URL(res.headers.location, url).href;
|
|
39
|
+
return fetchUrl(redirectUrl, maxRedirects - 1).then(resolve).catch(reject);
|
|
40
|
+
}
|
|
41
|
+
let data = "";
|
|
42
|
+
res.on("data", (chunk) => (data += chunk));
|
|
43
|
+
res.on("end", () => resolve({ status: res.statusCode, body: data, url }));
|
|
44
|
+
})
|
|
45
|
+
.on("error", reject);
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function extractMeta(html, name) {
|
|
50
|
+
const patterns = [
|
|
51
|
+
new RegExp(`<meta\\s+name=["']${name}["']\\s+content=["']([^"']*)["']`, "i"),
|
|
52
|
+
new RegExp(`<meta\\s+content=["']([^"']*)["']\\s+name=["']${name}["']`, "i"),
|
|
53
|
+
new RegExp(`<meta\\s+property=["']${name}["']\\s+content=["']([^"']*)["']`, "i"),
|
|
54
|
+
new RegExp(`<meta\\s+content=["']([^"']*)["']\\s+property=["']${name}["']`, "i"),
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
for (const pattern of patterns) {
|
|
58
|
+
const match = html.match(pattern);
|
|
59
|
+
if (match) return match[1];
|
|
60
|
+
}
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function extractTitle(html) {
|
|
65
|
+
const match = html.match(/<title[^>]*>([^<]*)<\/title>/i);
|
|
66
|
+
return match ? match[1].trim() : null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function extractJsonLd(html) {
|
|
70
|
+
const results = [];
|
|
71
|
+
const regex = /<script\s+type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
|
|
72
|
+
let match;
|
|
73
|
+
|
|
74
|
+
while ((match = regex.exec(html)) !== null) {
|
|
75
|
+
try {
|
|
76
|
+
results.push(JSON.parse(match[1]));
|
|
77
|
+
} catch {
|
|
78
|
+
// Invalid JSON-LD, skip
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return results;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function detectLanguage(html) {
|
|
86
|
+
const match = html.match(/<html[^>]*\slang=["']([^"']*)["']/i);
|
|
87
|
+
return match ? match[1] : null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function inferSiteType(description, title, jsonLd) {
|
|
91
|
+
const text = `${title || ""} ${description || ""}`.toLowerCase();
|
|
92
|
+
|
|
93
|
+
// Check JSON-LD types first
|
|
94
|
+
for (const ld of jsonLd) {
|
|
95
|
+
const type = ld["@type"] || "";
|
|
96
|
+
if (typeof type === "string") {
|
|
97
|
+
if (type.includes("Store") || type.includes("Product")) return "ecommerce";
|
|
98
|
+
if (type.includes("Restaurant")) return "restaurant";
|
|
99
|
+
if (type.includes("Blog")) return "blog";
|
|
100
|
+
if (type.includes("NewsArticle") || type.includes("NewsMediaOrganization")) return "news_media";
|
|
101
|
+
if (type.includes("SoftwareApplication")) return "saas";
|
|
102
|
+
if (type.includes("RealEstateAgent")) return "real_estate";
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Keyword-based inference
|
|
107
|
+
if (text.match(/shop|store|buy|cart|product|ecommerce/)) return "ecommerce";
|
|
108
|
+
if (text.match(/restaurant|menu|dine|reserv/)) return "restaurant";
|
|
109
|
+
if (text.match(/blog|article|post|writing/)) return "blog";
|
|
110
|
+
if (text.match(/news|media|journal/)) return "news_media";
|
|
111
|
+
if (text.match(/saas|software|platform|app|tool|dashboard/)) return "saas";
|
|
112
|
+
if (text.match(/portfolio|freelanc|design|agency/)) return "portfolio";
|
|
113
|
+
if (text.match(/docs|documentation|api|reference/)) return "documentation";
|
|
114
|
+
|
|
115
|
+
return "website";
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ── Generator ──────────────────────────────────────────────────────────────
|
|
119
|
+
|
|
120
|
+
async function generate(siteUrl) {
|
|
121
|
+
const baseUrl = siteUrl.replace(/\/$/, "");
|
|
122
|
+
const parsedUrl = new URL(baseUrl);
|
|
123
|
+
const domain = parsedUrl.hostname;
|
|
124
|
+
|
|
125
|
+
console.error(`\n ARA Generator v1.0`);
|
|
126
|
+
console.error(` Analyzing ${baseUrl}...\n`);
|
|
127
|
+
|
|
128
|
+
// Fetch main page
|
|
129
|
+
let html = "";
|
|
130
|
+
try {
|
|
131
|
+
const response = await fetchUrl(baseUrl);
|
|
132
|
+
html = response.body;
|
|
133
|
+
console.error(` ✓ Fetched main page (${html.length} bytes)`);
|
|
134
|
+
} catch (e) {
|
|
135
|
+
console.error(` ✗ Could not fetch ${baseUrl}: ${e.message}`);
|
|
136
|
+
process.exit(1);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Extract metadata
|
|
140
|
+
const title = extractMeta(html, "og:title") || extractTitle(html) || domain;
|
|
141
|
+
const description =
|
|
142
|
+
extractMeta(html, "og:description") ||
|
|
143
|
+
extractMeta(html, "description") ||
|
|
144
|
+
`Website at ${domain}`;
|
|
145
|
+
const locale = detectLanguage(html);
|
|
146
|
+
const image = extractMeta(html, "og:image");
|
|
147
|
+
const jsonLd = extractJsonLd(html);
|
|
148
|
+
const siteType = inferSiteType(description, title, jsonLd);
|
|
149
|
+
|
|
150
|
+
console.error(` ✓ Extracted metadata: "${title}"`);
|
|
151
|
+
console.error(` ✓ Detected type: ${siteType}`);
|
|
152
|
+
if (jsonLd.length > 0) {
|
|
153
|
+
console.error(` ✓ Found ${jsonLd.length} JSON-LD block(s)`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Check robots.txt
|
|
157
|
+
let hasRobots = false;
|
|
158
|
+
try {
|
|
159
|
+
const robotsResponse = await fetchUrl(`${baseUrl}/robots.txt`);
|
|
160
|
+
hasRobots = robotsResponse.status === 200;
|
|
161
|
+
console.error(` ${hasRobots ? "✓" : "—"} robots.txt ${hasRobots ? "found" : "not found"}`);
|
|
162
|
+
} catch {
|
|
163
|
+
console.error(" — Could not check robots.txt");
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Check sitemap
|
|
167
|
+
let hasSitemap = false;
|
|
168
|
+
try {
|
|
169
|
+
const sitemapResponse = await fetchUrl(`${baseUrl}/sitemap.xml`);
|
|
170
|
+
hasSitemap = sitemapResponse.status === 200;
|
|
171
|
+
console.error(` ${hasSitemap ? "✓" : "—"} sitemap.xml ${hasSitemap ? "found" : "not found"}`);
|
|
172
|
+
} catch {
|
|
173
|
+
console.error(" — Could not check sitemap.xml");
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Build manifest
|
|
177
|
+
const manifest = {
|
|
178
|
+
$ara: "1.0",
|
|
179
|
+
$schema: "https://ara-standard.org/schema/manifest/v1",
|
|
180
|
+
|
|
181
|
+
identity: {
|
|
182
|
+
name: title.replace(/\s*[-|–—].*$/, "").trim(), // Remove taglines
|
|
183
|
+
type: siteType,
|
|
184
|
+
description: description,
|
|
185
|
+
...(locale && { locale: [locale] }),
|
|
186
|
+
contact: {
|
|
187
|
+
website: baseUrl,
|
|
188
|
+
},
|
|
189
|
+
...(image && {
|
|
190
|
+
branding: {
|
|
191
|
+
logo: image,
|
|
192
|
+
},
|
|
193
|
+
}),
|
|
194
|
+
},
|
|
195
|
+
|
|
196
|
+
content_map: {
|
|
197
|
+
summary: `Content from ${domain}`,
|
|
198
|
+
resources: [
|
|
199
|
+
{
|
|
200
|
+
id: "pages",
|
|
201
|
+
type: "content",
|
|
202
|
+
label: "Site Pages",
|
|
203
|
+
description: "Pages available on this website",
|
|
204
|
+
access: "public",
|
|
205
|
+
freshness: "weekly",
|
|
206
|
+
},
|
|
207
|
+
],
|
|
208
|
+
// TODO: Enrich with detected resources from JSON-LD and sitemap
|
|
209
|
+
},
|
|
210
|
+
|
|
211
|
+
capabilities: {
|
|
212
|
+
protocols: {},
|
|
213
|
+
// TODO: Add detected APIs, MCP endpoints, etc.
|
|
214
|
+
},
|
|
215
|
+
|
|
216
|
+
policies: {
|
|
217
|
+
agent_access: "open",
|
|
218
|
+
rate_limit: {
|
|
219
|
+
requests_per_minute: 30,
|
|
220
|
+
burst: 5,
|
|
221
|
+
},
|
|
222
|
+
data_usage: {
|
|
223
|
+
caching_allowed: true,
|
|
224
|
+
cache_ttl: 3600,
|
|
225
|
+
redistribution: false,
|
|
226
|
+
attribution_required: true,
|
|
227
|
+
},
|
|
228
|
+
},
|
|
229
|
+
|
|
230
|
+
meta: {
|
|
231
|
+
generated_at: new Date().toISOString(),
|
|
232
|
+
generator: "ara-generator/1.0",
|
|
233
|
+
human_site: baseUrl,
|
|
234
|
+
},
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
// Enrich from JSON-LD
|
|
238
|
+
if (jsonLd.length > 0) {
|
|
239
|
+
jsonLd.forEach((ld) => {
|
|
240
|
+
if (ld["@type"] === "Organization" || ld["@type"] === "LocalBusiness") {
|
|
241
|
+
if (ld.name) manifest.identity.name = ld.name;
|
|
242
|
+
if (ld.address) {
|
|
243
|
+
manifest.identity.geo = { address: typeof ld.address === "string" ? ld.address : ld.address.streetAddress };
|
|
244
|
+
}
|
|
245
|
+
if (ld.telephone) manifest.identity.contact.phone = ld.telephone;
|
|
246
|
+
if (ld.email) manifest.identity.contact.email = ld.email;
|
|
247
|
+
}
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
console.error(`\n ✓ Generated ARA manifest (Level 1)`);
|
|
252
|
+
console.error(` ℹ Enrich with schemas (Layer 2) and actions (Layer 3) for full ARA support.\n`);
|
|
253
|
+
|
|
254
|
+
return manifest;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ── CLI ────────────────────────────────────────────────────────────────────
|
|
258
|
+
|
|
259
|
+
async function main() {
|
|
260
|
+
const args = process.argv.slice(2);
|
|
261
|
+
const url = args.find((a) => a.startsWith("http"));
|
|
262
|
+
const outputIdx = args.indexOf("--output");
|
|
263
|
+
const outputFile = outputIdx !== -1 ? args[outputIdx + 1] : null;
|
|
264
|
+
|
|
265
|
+
if (!url || args.includes("--help") || args.includes("-h")) {
|
|
266
|
+
console.log(`
|
|
267
|
+
ARA Generator v1.0
|
|
268
|
+
===================
|
|
269
|
+
|
|
270
|
+
Generates a basic ARA manifest from site metadata.
|
|
271
|
+
|
|
272
|
+
Usage:
|
|
273
|
+
npx ara-generate <url>
|
|
274
|
+
npx ara-generate <url> --output <file>
|
|
275
|
+
|
|
276
|
+
Examples:
|
|
277
|
+
npx ara-generate https://example.com
|
|
278
|
+
npx ara-generate https://myshop.com --output .well-known/ara/manifest.json
|
|
279
|
+
|
|
280
|
+
The generated manifest is a Level 1 starting point.
|
|
281
|
+
Add schemas (Layer 2) and actions (Layer 3) manually for full ARA support.
|
|
282
|
+
`);
|
|
283
|
+
process.exit(0);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const manifest = await generate(url);
|
|
287
|
+
const json = JSON.stringify(manifest, null, 2);
|
|
288
|
+
|
|
289
|
+
if (outputFile) {
|
|
290
|
+
const dir = path.dirname(outputFile);
|
|
291
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
292
|
+
fs.writeFileSync(outputFile, json);
|
|
293
|
+
console.error(` ✓ Saved to ${outputFile}`);
|
|
294
|
+
} else {
|
|
295
|
+
console.log(json);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
main().catch((err) => {
|
|
300
|
+
console.error("Error:", err.message);
|
|
301
|
+
process.exit(1);
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
module.exports = { generate };
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ara-generate",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Generates ARA manifests from existing website metadata",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"ara-generate": "./index.js"
|
|
8
|
+
},
|
|
9
|
+
"keywords": [
|
|
10
|
+
"ara",
|
|
11
|
+
"agent-ready",
|
|
12
|
+
"web-standard",
|
|
13
|
+
"ai-agents",
|
|
14
|
+
"manifest",
|
|
15
|
+
"generator",
|
|
16
|
+
"mcp"
|
|
17
|
+
],
|
|
18
|
+
"author": "ARA Standard Contributors",
|
|
19
|
+
"license": "MIT",
|
|
20
|
+
"repository": {
|
|
21
|
+
"type": "git",
|
|
22
|
+
"url": "https://github.com/aka9871/ara-standard"
|
|
23
|
+
},
|
|
24
|
+
"homepage": "https://ara-standard.org",
|
|
25
|
+
"engines": {
|
|
26
|
+
"node": ">=16.0.0"
|
|
27
|
+
}
|
|
28
|
+
}
|