xcrawl-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.editorconfig +12 -0
  2. package/.env.example +3 -0
  3. package/.prettierrc +6 -0
  4. package/README.md +244 -0
  5. package/claude.md +295 -0
  6. package/dist/core/crawl.d.ts +246 -0
  7. package/dist/core/crawl.d.ts.map +1 -0
  8. package/dist/core/crawl.js +141 -0
  9. package/dist/core/crawl.js.map +1 -0
  10. package/dist/core/map.d.ts +34 -0
  11. package/dist/core/map.d.ts.map +1 -0
  12. package/dist/core/map.js +50 -0
  13. package/dist/core/map.js.map +1 -0
  14. package/dist/core/scrape.d.ts +201 -0
  15. package/dist/core/scrape.d.ts.map +1 -0
  16. package/dist/core/scrape.js +148 -0
  17. package/dist/core/scrape.js.map +1 -0
  18. package/dist/core/search.d.ts +144 -0
  19. package/dist/core/search.d.ts.map +1 -0
  20. package/dist/core/search.js +75 -0
  21. package/dist/core/search.js.map +1 -0
  22. package/dist/index.d.ts +8 -0
  23. package/dist/index.d.ts.map +1 -0
  24. package/dist/index.js +516 -0
  25. package/dist/index.js.map +1 -0
  26. package/dist/stdio.d.ts +3 -0
  27. package/dist/stdio.d.ts.map +1 -0
  28. package/dist/stdio.js +551 -0
  29. package/dist/stdio.js.map +1 -0
  30. package/dist/tools.d.ts +540 -0
  31. package/dist/tools.d.ts.map +1 -0
  32. package/dist/tools.js +528 -0
  33. package/dist/tools.js.map +1 -0
  34. package/dist/types.d.ts +214 -0
  35. package/dist/types.d.ts.map +1 -0
  36. package/dist/types.js +5 -0
  37. package/dist/types.js.map +1 -0
  38. package/package.json +33 -0
  39. package/src/core/crawl.ts +149 -0
  40. package/src/core/map.ts +56 -0
  41. package/src/core/scrape.ts +156 -0
  42. package/src/core/search.ts +81 -0
  43. package/src/index.ts +565 -0
  44. package/src/stdio.ts +584 -0
  45. package/src/tools.ts +539 -0
  46. package/src/types.ts +221 -0
  47. package/tsconfig.build.json +14 -0
  48. package/tsconfig.json +45 -0
  49. package/vitest.config.mts +11 -0
  50. package/worker-configuration.d.ts +10848 -0
  51. package/wrangler.jsonc +26 -0
package/src/types.ts ADDED
@@ -0,0 +1,221 @@
1
+ /**
2
+ * xCrawl API Request and Response Types
3
+ */
4
+
5
+ export interface XCrawlScrapeRequest {
6
+ url: string;
7
+ mode?: "sync" | "async";
8
+ proxy?: {
9
+ location?: string;
10
+ sticky_session?: string;
11
+ };
12
+ request?: {
13
+ locale?: string;
14
+ device?: "desktop" | "mobile";
15
+ cookies?: Record<string, unknown>;
16
+ headers?: Record<string, unknown>;
17
+ only_main_content?: boolean;
18
+ block_ads?: boolean;
19
+ skip_tls_verification?: boolean;
20
+ };
21
+ js_render?: {
22
+ enabled?: boolean;
23
+ wait_until?: "load" | "domcontentloaded" | "networkidle";
24
+ viewport?: {
25
+ width?: number;
26
+ height?: number;
27
+ };
28
+ };
29
+ output?: {
30
+ formats?: Array<"html" | "raw_html" | "markdown" | "links" | "summary" | "screenshot" | "json">;
31
+ screenshot?: "full_page" | "viewport";
32
+ json?: {
33
+ prompt?: string;
34
+ json_schema?: Record<string, any>;
35
+ };
36
+ };
37
+ webhook?: {
38
+ url?: string;
39
+ headers?: Record<string, string>;
40
+ events?: Array<"started" | "completed" | "failed">;
41
+ };
42
+ }
43
+
44
+ export interface XCrawlScrapeResponse {
45
+ scrape_id: string;
46
+ endpoint: string;
47
+ version: string;
48
+ status: string;
49
+ url?: string;
50
+ data?: {
51
+ html?: string;
52
+ raw_html?: string;
53
+ markdown?: string;
54
+ links?: string[];
55
+ metadata?: Record<string, any>;
56
+ screenshot?: string;
57
+ summary?: string;
58
+ json?: Record<string, any>;
59
+ traffic_bytes?: number;
60
+ credits_used?: number;
61
+ credits_detail?: Record<string, any>;
62
+ };
63
+ started_at?: string;
64
+ ended_at?: string;
65
+ total_credits_used?: number;
66
+ message?: string;
67
+ }
68
+
69
+ /**
70
+ * Search API Request and Response Types
71
+ */
72
+ export interface XCrawlSearchRequest {
73
+ query: string;
74
+ location?: string;
75
+ language?: string;
76
+ limit?: number;
77
+ serp_options?: {
78
+ q?: string;
79
+ location?: string;
80
+ uule?: string;
81
+ google_domain?: string;
82
+ gl?: string;
83
+ hl?: string;
84
+ cr?: string;
85
+ lr?: string;
86
+ safe?: number;
87
+ nfpr?: boolean;
88
+ filter?: boolean;
89
+ tbs?: string;
90
+ start?: number;
91
+ num?: number;
92
+ ludocid?: string;
93
+ lsig?: string;
94
+ kgmid?: string;
95
+ si?: string;
96
+ ibp?: string;
97
+ uds?: string;
98
+ no_cache?: boolean;
99
+ };
100
+ }
101
+
102
+ export interface XCrawlSearchResponse {
103
+ search_id: string;
104
+ endpoint: string;
105
+ version: string;
106
+ status: string;
107
+ query: string;
108
+ data?: {
109
+ results?: Record<string, any>;
110
+ credits_used?: number;
111
+ credits_detail?: Record<string, any>;
112
+ };
113
+ started_at?: string;
114
+ ended_at?: string;
115
+ total_credits_used?: number;
116
+ }
117
+
118
+ /**
119
+ * Map API Request and Response Types
120
+ */
121
+ export interface XCrawlMapRequest {
122
+ url: string;
123
+ filter?: string;
124
+ limit?: number;
125
+ include_subdomains?: boolean;
126
+ ignore_query_parameters?: boolean;
127
+ }
128
+
129
+ export interface XCrawlMapResponse {
130
+ map_id: string;
131
+ endpoint: string;
132
+ version: string;
133
+ status: string;
134
+ url: string;
135
+ data?: {
136
+ links?: string[];
137
+ total_links?: number;
138
+ credits_used?: number;
139
+ credits_detail?: Record<string, any>;
140
+ };
141
+ started_at?: string;
142
+ ended_at?: string;
143
+ total_credits_used?: number;
144
+ }
145
+
146
+ /**
147
+ * Crawl API Request and Response Types
148
+ */
149
+ export interface XCrawlCrawlRequest {
150
+ url: string;
151
+ crawler?: {
152
+ limit?: number;
153
+ include?: string[];
154
+ exclude?: string[];
155
+ max_depth?: number;
156
+ include_entire_domain?: boolean;
157
+ include_subdomains?: boolean;
158
+ include_external_links?: boolean;
159
+ sitemaps?: boolean;
160
+ };
161
+ proxy?: {
162
+ location?: string;
163
+ sticky_session?: string;
164
+ };
165
+ request?: {
166
+ locale?: string;
167
+ device?: "desktop" | "mobile";
168
+ cookies?: Record<string, unknown>;
169
+ headers?: Record<string, unknown>;
170
+ only_main_content?: boolean;
171
+ block_ads?: boolean;
172
+ skip_tls_verification?: boolean;
173
+ };
174
+ js_render?: {
175
+ enabled?: boolean;
176
+ wait_until?: "load" | "domcontentloaded" | "networkidle";
177
+ viewport?: {
178
+ width?: number;
179
+ height?: number;
180
+ };
181
+ };
182
+ output?: {
183
+ formats?: Array<"html" | "raw_html" | "markdown" | "links" | "summary" | "screenshot" | "json">;
184
+ screenshot?: "full_page" | "viewport";
185
+ json?: {
186
+ prompt?: string;
187
+ json_schema?: Record<string, any>;
188
+ };
189
+ };
190
+ webhook?: {
191
+ url?: string;
192
+ headers?: Record<string, string>;
193
+ events?: Array<"started" | "completed" | "failed">;
194
+ };
195
+ }
196
+
197
+ export interface XCrawlCrawlResponse {
198
+ crawl_id: string;
199
+ endpoint: string;
200
+ version: string;
201
+ status: string;
202
+ url?: string;
203
+ data?: Array<{
204
+ url: string;
205
+ html?: string;
206
+ raw_html?: string;
207
+ markdown?: string;
208
+ links?: string[];
209
+ metadata?: Record<string, any>;
210
+ screenshot?: string;
211
+ summary?: string;
212
+ json?: Record<string, any>;
213
+ traffic_bytes?: number;
214
+ credits_used?: number;
215
+ credits_detail?: Record<string, any>;
216
+ }>;
217
+ started_at?: string;
218
+ ended_at?: string;
219
+ total_credits_used?: number;
220
+ message?: string;
221
+ }
@@ -0,0 +1,14 @@
1
+ {
2
+ "extends": "./tsconfig.json",
3
+ "compilerOptions": {
4
+ "noEmit": false,
5
+ "outDir": "./dist",
6
+ "rootDir": "./src",
7
+ "declaration": true,
8
+ "declarationMap": true,
9
+ "sourceMap": true,
10
+ "types": ["node"]
11
+ },
12
+ "include": ["src/**/*.ts"],
13
+ "exclude": ["node_modules", "dist", "test"]
14
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "compilerOptions": {
3
+ /* Visit https://aka.ms/tsconfig.json to read more about this file */
4
+
5
+ /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
6
+ "target": "es2024",
7
+ /* Specify a set of bundled library declaration files that describe the target runtime environment. */
8
+ "lib": ["es2024"],
9
+ /* Specify what JSX code is generated. */
10
+ "jsx": "react-jsx",
11
+
12
+ /* Specify what module code is generated. */
13
+ "module": "es2022",
14
+ /* Specify how TypeScript looks up a file from a given module specifier. */
15
+ "moduleResolution": "Bundler",
16
+ /* Enable importing .json files */
17
+ "resolveJsonModule": true,
18
+
19
+ /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */
20
+ "allowJs": true,
21
+ /* Enable error reporting in type-checked JavaScript files. */
22
+ "checkJs": false,
23
+
24
+ /* Disable emitting files from a compilation. */
25
+ "noEmit": true,
26
+
27
+ /* Ensure that each file can be safely transpiled without relying on other imports. */
28
+ "isolatedModules": true,
29
+ /* Allow 'import x from y' when a module doesn't have a default export. */
30
+ "allowSyntheticDefaultImports": true,
31
+ /* Ensure that casing is correct in imports. */
32
+ "forceConsistentCasingInFileNames": true,
33
+
34
+ /* Enable all strict type-checking options. */
35
+ "strict": true,
36
+
37
+ /* Skip type checking all .d.ts files. */
38
+ "skipLibCheck": true,
39
+ "types": [
40
+ "./worker-configuration.d.ts"
41
+ ]
42
+ },
43
+ "exclude": ["test"],
44
+ "include": ["worker-configuration.d.ts", "src/**/*.ts"]
45
+ }
@@ -0,0 +1,11 @@
1
+ import { defineWorkersConfig } from '@cloudflare/vitest-pool-workers/config';
2
+
3
+ export default defineWorkersConfig({
4
+ test: {
5
+ poolOptions: {
6
+ workers: {
7
+ wrangler: { configPath: './wrangler.jsonc' },
8
+ },
9
+ },
10
+ },
11
+ });