@pipeworx/mcp-archive 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Pipeworx
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # @pipeworx/mcp-archive
2
+
3
+ MCP server for the [Internet Archive](https://archive.org) — full-text search, item metadata retrieval, and Wayback Machine URL lookups. Free, no auth required.
4
+
5
+ ## Tools
6
+
7
+ | Tool | Description |
8
+ |------|-------------|
9
+ | `search` | Search archive.org collections (Lucene query syntax) |
10
+ | `get_metadata` | Get full metadata for an item by identifier |
11
+ | `wayback_check` | Check if a URL has been archived and get the nearest snapshot |
12
+
13
+ ## Quick Start
14
+
15
+ Add to your MCP client config:
16
+
17
+ ```json
18
+ {
19
+ "mcpServers": {
20
+ "archive": {
21
+ "type": "url",
22
+ "url": "https://gateway.pipeworx.io/archive"
23
+ }
24
+ }
25
+ }
26
+ ```
27
+
28
+ ## CLI Usage
29
+
30
+ ```bash
31
+ npx @anthropic-ai/mcp-client https://gateway.pipeworx.io/archive
32
+ ```
33
+
34
+ ## License
35
+
36
+ MIT
package/package.json ADDED
@@ -0,0 +1,10 @@
1
+ {
2
+ "name": "@pipeworx/mcp-archive",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "license": "MIT",
6
+ "keywords": ["mcp", "mcp-server", "model-context-protocol", "pipeworx", "archive"],
7
+ "devDependencies": {
8
+ "typescript": "^5.7.0"
9
+ }
10
+ }
package/src/index.ts ADDED
@@ -0,0 +1,216 @@
1
+ /**
2
+ * Archive MCP — wraps the Internet Archive APIs (free, no auth)
3
+ *
4
+ * Tools:
5
+ * - search: full-text search across archive.org collections
6
+ * - get_metadata: retrieve complete metadata for a specific item by identifier
7
+ * - wayback_check: check whether a URL has been archived and get the nearest snapshot
8
+ */
9
+
10
+ interface McpToolDefinition {
11
+ name: string;
12
+ description: string;
13
+ inputSchema: {
14
+ type: 'object';
15
+ properties: Record<string, unknown>;
16
+ required?: string[];
17
+ };
18
+ }
19
+
20
+ interface McpToolExport {
21
+ tools: McpToolDefinition[];
22
+ callTool: (name: string, args: Record<string, unknown>) => Promise<unknown>;
23
+ }
24
+
25
+ const BASE_URL = 'https://archive.org';
26
+
27
+ const tools: McpToolExport['tools'] = [
28
+ {
29
+ name: 'search',
30
+ description:
31
+ 'Search the Internet Archive for texts, audio, video, software, and other items. Supports Lucene query syntax.',
32
+ inputSchema: {
33
+ type: 'object' as const,
34
+ properties: {
35
+ query: {
36
+ type: 'string',
37
+ description: 'Search query (e.g., "subject:astronomy", "creator:NASA", "moon landing")',
38
+ },
39
+ limit: { type: 'number', description: 'Number of results to return (1-100, default 20)' },
40
+ },
41
+ required: ['query'],
42
+ },
43
+ },
44
+ {
45
+ name: 'get_metadata',
46
+ description:
47
+ 'Retrieve full metadata for an Internet Archive item by its identifier (the unique ID in the archive.org URL).',
48
+ inputSchema: {
49
+ type: 'object' as const,
50
+ properties: {
51
+ id: {
52
+ type: 'string',
53
+ description: 'Archive.org item identifier (e.g., "principleofrelat00eins", "ApolloMissionsMoonLandings")',
54
+ },
55
+ },
56
+ required: ['id'],
57
+ },
58
+ },
59
+ {
60
+ name: 'wayback_check',
61
+ description:
62
+ 'Check whether a URL has ever been archived in the Wayback Machine and retrieve the closest available snapshot.',
63
+ inputSchema: {
64
+ type: 'object' as const,
65
+ properties: {
66
+ url: {
67
+ type: 'string',
68
+ description: 'The URL to look up (e.g., "https://example.com/some-page")',
69
+ },
70
+ },
71
+ required: ['url'],
72
+ },
73
+ },
74
+ ];
75
+
76
+ async function callTool(name: string, args: Record<string, unknown>): Promise<unknown> {
77
+ switch (name) {
78
+ case 'search':
79
+ return search(args.query as string, (args.limit as number) ?? 20);
80
+ case 'get_metadata':
81
+ return getMetadata(args.id as string);
82
+ case 'wayback_check':
83
+ return waybackCheck(args.url as string);
84
+ default:
85
+ throw new Error(`Unknown tool: ${name}`);
86
+ }
87
+ }
88
+
89
+ async function search(query: string, limit: number) {
90
+ const params = new URLSearchParams({
91
+ q: query,
92
+ output: 'json',
93
+ rows: String(Math.min(100, Math.max(1, limit))),
94
+ fl: 'identifier,title,creator,date,description,mediatype,subject,downloads',
95
+ });
96
+
97
+ const res = await fetch(`${BASE_URL}/advancedsearch.php?${params}`);
98
+ if (!res.ok) throw new Error(`Internet Archive error: ${res.status} ${res.statusText}`);
99
+
100
+ const data = (await res.json()) as {
101
+ response?: {
102
+ numFound?: number;
103
+ docs?: {
104
+ identifier?: string;
105
+ title?: string | string[];
106
+ creator?: string | string[];
107
+ date?: string;
108
+ description?: string | string[];
109
+ mediatype?: string;
110
+ subject?: string | string[];
111
+ downloads?: number;
112
+ }[];
113
+ };
114
+ };
115
+
116
+ const docs = data.response?.docs ?? [];
117
+
118
+ return {
119
+ total: data.response?.numFound ?? 0,
120
+ results: docs.map((d) => ({
121
+ id: d.identifier ?? null,
122
+ title: Array.isArray(d.title) ? d.title[0] : (d.title ?? null),
123
+ creator: Array.isArray(d.creator) ? d.creator : (d.creator ? [d.creator] : []),
124
+ date: d.date ?? null,
125
+ description: Array.isArray(d.description) ? d.description[0] : (d.description ?? null),
126
+ mediatype: d.mediatype ?? null,
127
+ subjects: Array.isArray(d.subject) ? d.subject : (d.subject ? [d.subject] : []),
128
+ downloads: d.downloads ?? null,
129
+ url: d.identifier ? `https://archive.org/details/${d.identifier}` : null,
130
+ })),
131
+ };
132
+ }
133
+
134
+ async function getMetadata(id: string) {
135
+ const res = await fetch(`${BASE_URL}/metadata/${encodeURIComponent(id)}`);
136
+ if (!res.ok) throw new Error(`Internet Archive error: ${res.status} ${res.statusText}`);
137
+
138
+ const data = (await res.json()) as {
139
+ metadata?: {
140
+ identifier?: string | string[];
141
+ title?: string | string[];
142
+ creator?: string | string[];
143
+ date?: string | string[];
144
+ description?: string | string[];
145
+ mediatype?: string | string[];
146
+ subject?: string | string[];
147
+ language?: string | string[];
148
+ licenseurl?: string | string[];
149
+ addeddate?: string | string[];
150
+ publicdate?: string | string[];
151
+ downloads?: number;
152
+ };
153
+ files?: { name?: string; format?: string; size?: string; md5?: string }[];
154
+ item?: { downloads?: number; files_count?: number; item_size?: number };
155
+ };
156
+
157
+ if (!data.metadata) throw new Error(`Item not found: ${id}`);
158
+
159
+ const m = data.metadata;
160
+ const first = <T>(v: T | T[] | undefined): T | null =>
161
+ v == null ? null : Array.isArray(v) ? (v[0] ?? null) : v;
162
+
163
+ return {
164
+ id: first(m.identifier),
165
+ title: first(m.title),
166
+ creator: Array.isArray(m.creator) ? m.creator : (m.creator ? [m.creator] : []),
167
+ date: first(m.date),
168
+ description: first(m.description),
169
+ mediatype: first(m.mediatype),
170
+ subjects: Array.isArray(m.subject) ? m.subject : (m.subject ? [m.subject] : []),
171
+ language: first(m.language),
172
+ license_url: first(m.licenseurl),
173
+ added_date: first(m.addeddate),
174
+ public_date: first(m.publicdate),
175
+ downloads: data.item?.downloads ?? m.downloads ?? null,
176
+ files_count: data.item?.files_count ?? (data.files?.length ?? null),
177
+ item_size_bytes: data.item?.item_size ?? null,
178
+ url: `https://archive.org/details/${id}`,
179
+ files: (data.files ?? []).slice(0, 20).map((f) => ({
180
+ name: f.name ?? null,
181
+ format: f.format ?? null,
182
+ size_bytes: f.size != null ? parseInt(f.size, 10) : null,
183
+ md5: f.md5 ?? null,
184
+ })),
185
+ };
186
+ }
187
+
188
+ async function waybackCheck(url: string) {
189
+ const params = new URLSearchParams({ url });
190
+ const res = await fetch(`${BASE_URL}/wayback/available?${params}`);
191
+ if (!res.ok) throw new Error(`Wayback Machine error: ${res.status} ${res.statusText}`);
192
+
193
+ const data = (await res.json()) as {
194
+ url?: string;
195
+ archived_snapshots?: {
196
+ closest?: {
197
+ status?: string;
198
+ available?: boolean;
199
+ url?: string;
200
+ timestamp?: string;
201
+ };
202
+ };
203
+ };
204
+
205
+ const closest = data.archived_snapshots?.closest;
206
+
207
+ return {
208
+ url: data.url ?? url,
209
+ archived: closest?.available ?? false,
210
+ snapshot_url: closest?.url ?? null,
211
+ timestamp: closest?.timestamp ?? null,
212
+ status: closest?.status ?? null,
213
+ };
214
+ }
215
+
216
+ export default { tools, callTool } satisfies McpToolExport;
package/tsconfig.json ADDED
@@ -0,0 +1,9 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "strict": true
7
+ },
8
+ "include": ["src"]
9
+ }