image-tiler-mcp-server 1.6.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +138 -340
  2. package/dist/constants.d.ts +24 -5
  3. package/dist/constants.d.ts.map +1 -1
  4. package/dist/constants.js +28 -4
  5. package/dist/constants.js.map +1 -1
  6. package/dist/index.js +29 -8
  7. package/dist/index.js.map +1 -1
  8. package/dist/schemas/index.d.ts +11 -33
  9. package/dist/schemas/index.d.ts.map +1 -1
  10. package/dist/schemas/index.js +49 -62
  11. package/dist/schemas/index.js.map +1 -1
  12. package/dist/services/elicitation.d.ts +24 -0
  13. package/dist/services/elicitation.d.ts.map +1 -0
  14. package/dist/services/elicitation.js +49 -0
  15. package/dist/services/elicitation.js.map +1 -0
  16. package/dist/services/image-processor.d.ts +2 -1
  17. package/dist/services/image-processor.d.ts.map +1 -1
  18. package/dist/services/image-processor.js +51 -28
  19. package/dist/services/image-processor.js.map +1 -1
  20. package/dist/services/image-source-resolver.d.ts.map +1 -1
  21. package/dist/services/image-source-resolver.js +45 -9
  22. package/dist/services/image-source-resolver.js.map +1 -1
  23. package/dist/services/interactive-preview-generator.d.ts.map +1 -1
  24. package/dist/services/interactive-preview-generator.js +37 -8
  25. package/dist/services/interactive-preview-generator.js.map +1 -1
  26. package/dist/services/tile-analyzer.d.ts +4 -0
  27. package/dist/services/tile-analyzer.d.ts.map +1 -0
  28. package/dist/services/tile-analyzer.js +38 -0
  29. package/dist/services/tile-analyzer.js.map +1 -0
  30. package/dist/services/tiling-pipeline.d.ts +81 -0
  31. package/dist/services/tiling-pipeline.d.ts.map +1 -0
  32. package/dist/services/tiling-pipeline.js +325 -0
  33. package/dist/services/tiling-pipeline.js.map +1 -0
  34. package/dist/services/url-capture.d.ts +4 -0
  35. package/dist/services/url-capture.d.ts.map +1 -0
  36. package/dist/services/url-capture.js +619 -0
  37. package/dist/services/url-capture.js.map +1 -0
  38. package/dist/tools/tiler.d.ts +3 -0
  39. package/dist/tools/tiler.d.ts.map +1 -0
  40. package/dist/tools/tiler.js +501 -0
  41. package/dist/tools/tiler.js.map +1 -0
  42. package/dist/types.d.ts +37 -24
  43. package/dist/types.d.ts.map +1 -1
  44. package/dist/utils.d.ts +18 -0
  45. package/dist/utils.d.ts.map +1 -1
  46. package/dist/utils.js +140 -0
  47. package/dist/utils.js.map +1 -1
  48. package/package.json +16 -3
  49. package/dist/tools/get-tiles.d.ts +0 -3
  50. package/dist/tools/get-tiles.d.ts.map +0 -1
  51. package/dist/tools/get-tiles.js +0 -113
  52. package/dist/tools/get-tiles.js.map +0 -1
  53. package/dist/tools/prepare-image.d.ts +0 -3
  54. package/dist/tools/prepare-image.d.ts.map +0 -1
  55. package/dist/tools/prepare-image.js +0 -225
  56. package/dist/tools/prepare-image.js.map +0 -1
  57. package/dist/tools/recommend-settings.d.ts +0 -3
  58. package/dist/tools/recommend-settings.d.ts.map +0 -1
  59. package/dist/tools/recommend-settings.js +0 -198
  60. package/dist/tools/recommend-settings.js.map +0 -1
  61. package/dist/tools/tile-image.d.ts +0 -3
  62. package/dist/tools/tile-image.d.ts.map +0 -1
  63. package/dist/tools/tile-image.js +0 -219
  64. package/dist/tools/tile-image.js.map +0 -1
package/README.md CHANGED
@@ -1,112 +1,37 @@
1
1
  # image-tiler-mcp-server
2
2
 
3
- Split large images into optimally-sized tiles so LLM vision models see every detail no downscaling, no lost text.
3
+ Capture, tile, analyze, and estimate vision tokens for LLM models - so nothing gets downscaled away.
4
4
 
5
5
  <p align="center">
6
6
  <img src="assets/preview.gif" alt="Preview of image tiling grid with advised vision models size and token estimates" width="100%" />
7
7
  </p>
8
8
 
9
- ## Tiling for LLM Vision
10
-
11
- LLM vision systems have a **maximum input resolution**. When you send an image larger than that limit, the model silently downscales it before processing. A 3600×22810 full-page screenshot gets shrunk to ~247×1568 by Claude — text becomes unreadable, UI details disappear, and the model can't analyze what it can't see.
12
-
13
- **Tiling solves this.** This MCP server:
14
-
15
- 1. Reads the image dimensions and the target model's vision config
16
- 2. Calculates an optimal grid that keeps every tile within the model's sweet spot
17
- 3. Extracts tiles as individual PNGs and saves them to disk
18
- 4. Returns metadata (grid layout, file paths, estimated token cost)
19
- 5. Serves tiles back as base64 in paginated batches for the LLM to analyze
20
-
21
- Each tile is processed at **full resolution** — no downscaling — preserving text, UI elements, and fine detail across the entire image.
22
-
23
- **Auto-downscaling:** Images over 10,000px on their longest side are automatically downscaled before tiling (configurable via `maxDimension`). This prevents extreme tile counts on very long screenshots — e.g., a 3600×22810 page drops from 84 tiles / ~134K tokens to 20 tiles / ~32K tokens with no visible quality loss. Set `maxDimension=0` to disable.
24
-
25
- ### Supported Models
26
-
27
- | Model | Default tile | Tokens/tile | Max tile | ID |
28
- |-------|-------------|-------------|----------|-----|
29
- | Claude (default) | 1092px | 1590 | 1568px | `claude` |
30
- | OpenAI (GPT-4o/o-series) | 768px | 765 | 2048px | `openai` |
31
- | Gemini | 768px | 258 | 768px | `gemini` |
32
- | Gemini 3 | 1536px | 1120 | 3072px | `gemini3` |
33
-
34
- > **OpenAI note:** The `openai` config targets the GPT-4o / o-series vision pipeline (512px tile patches). GPT-4.1 uses a fundamentally different pipeline (32x32 pixel patches) and is not currently supported — it would require a separate model config with a different calculation approach.
35
-
36
- > **Gemini 3 note:** Gemini 3 uses a fixed token budget per image (1120 tokens regardless of dimensions). Tiling increases total token cost but preserves fine detail. For cases where detail isn't critical, consider sending a single image instead.
37
-
38
- ## Tools
39
-
40
- ### `tiler_tile_image`
41
-
42
- Splits a large image into tiles and saves them to disk.
43
-
44
- | Parameter | Type | Required | Default | Description |
45
- |---|---|---|---|---|
46
- | `filePath` | string | no* | — | Absolute or relative path to the image file |
47
- | `sourceUrl` | string | no* | — | HTTPS URL to download the image from (max 50MB, 30s timeout) |
48
- | `dataUrl` | string | no* | — | Data URL with base64-encoded image |
49
- | `imageBase64` | string | no* | — | Raw base64-encoded image data |
50
- | `model` | string | no | `"claude"` | Target vision model: `"claude"`, `"openai"`, `"gemini"`, `"gemini3"` |
51
- | `tileSize` | number | no | Model default | Tile size in pixels. Clamped to model min/max with a warning if out of bounds. |
52
- | `maxDimension` | number | no | `10000` | Max dimension in px (0-65536). Pre-downscales the image so its longest side fits within this value before tiling. Defaults to 10000px. Set to 0 to disable auto-downscaling. No-op if already within bounds. |
53
- | `outputDir` | string | no | `tiles/{name}` subfolder next to source | Directory to save tiles |
54
-
55
- *At least one image source (`filePath`, `sourceUrl`, `dataUrl`, or `imageBase64`) is required.
56
-
57
- Returns JSON metadata with grid dimensions, tile count, model used, estimated token cost, and per-tile file paths.
58
-
59
- ### `tiler_get_tiles`
60
-
61
- Returns tile images as base64 in batches of 5 for the LLM to see directly.
62
-
63
- | Parameter | Type | Required | Default | Description |
64
- |---|---|---|---|---|
65
- | `tilesDir` | string | yes | — | Path to tiles directory (from `tiler_tile_image`) |
66
- | `start` | number | no | 0 | Start tile index (0-based, inclusive) |
67
- | `end` | number | no | start + 4 | End tile index (0-based, inclusive) |
68
-
69
- Returns text labels + image content blocks. Includes pagination hint for the next batch.
70
-
71
- ### `tiler_recommend_settings`
9
+ ## Usage
72
10
 
73
- Dry-run estimator: reads image dimensions and returns cost estimates **without tiling**.
11
+ ### Tile an image
74
12
 
75
- | Parameter | Type | Required | Default | Description |
76
- |---|---|---|---|---|
77
- | `filePath` | string | no* | — | Path to image file |
78
- | `sourceUrl` | string | no* | — | HTTPS URL to download from |
79
- | `dataUrl` | string | no* | — | Data URL with base64 image |
80
- | `imageBase64` | string | no* | — | Raw base64 image data |
81
- | `model` | string | no | `"claude"` | Target vision model |
82
- | `tileSize` | number | no | Model default | Override tile size (skips heuristics) |
83
- | `maxDimension` | number | no | — | Override max dimension (skips heuristics) |
84
- | `intent` | string | no | — | `"text_heavy"`, `"ui_screenshot"`, `"diagram"`, `"photo"`, `"general"` |
85
- | `budget` | string | no | — | `"low"`, `"default"`, `"max_detail"` |
13
+ > lets tile ~/Desktop/source.jpg
86
14
 
87
- *At least one image source required.
15
+ The server shows you a comparison of supported vision models with tile counts and token estimates.
16
+ Pick the model that matches your use case, and the server tiles the image and returns them in batches for analysis.
88
17
 
89
- Returns JSON with recommended settings, rationale, image info, grid estimate, and a comparison across all 4 models.
18
+ ### Capture a web page
90
19
 
91
- ### `tiler_prepare_image`
20
+ > capture screenshot of https://example.com and analyze the content
92
21
 
93
- One-shot convenience tool: tiles an image AND returns the first batch of tiles in a single call.
22
+ The server launches Chrome, captures a full-page screenshot (scroll-stitching pages over 16,384px), then presents the same model comparison. Choose a model and the server tiles the capture for analysis.
94
23
 
95
- | Parameter | Type | Required | Default | Description |
96
- |---|---|---|---|---|
97
- | `filePath` | string | no* | — | Path to image file |
98
- | `sourceUrl` | string | no* | — | HTTPS URL to download from |
99
- | `dataUrl` | string | no* | — | Data URL with base64 image |
100
- | `imageBase64` | string | no* | — | Raw base64 image data |
101
- | `model` | string | no | `"claude"` | Target vision model |
102
- | `tileSize` | number | no | Model default | Override tile size |
103
- | `maxDimension` | number | no | `10000` | Max dimension for auto-downscaling |
104
- | `outputDir` | string | no | `tiles/{name}` subfolder | Directory to save tiles |
105
- | `page` | number | no | `0` | Tile page (0 = tiles 0-4, 1 = tiles 5-9, etc.) |
24
+ To get only the screenshot without tiling, just ask for a screenshot and stop after the comparison step.
106
25
 
107
- *At least one image source required.
26
+ ### Customize tiling
108
27
 
109
- Returns tiling metadata + up to 5 tile images inline. Saves a round-trip compared to calling `tiler_tile_image` then `tiler_get_tiles` separately.
28
+ | What | Example prompt |
29
+ |------|---------------|
30
+ | Target a specific model | "Tile hero.png for OpenAI" |
31
+ | Keep full resolution | "Tile banner.png at full resolution, no downscaling" |
32
+ | PNG output | "Tile diagram.png as lossless PNG" |
33
+ | Tile from URL | "Download and tile https://example.com/chart.png" |
34
+ | Tile from base64 | "Tile this base64 image: iVBORw0KGgo..." |
110
35
 
111
36
  ## Installation
112
37
 
@@ -116,31 +41,24 @@ Returns tiling metadata + up to 5 tile images inline. Saves a round-trip compare
116
41
  claude mcp add image-tiler -- npx -y image-tiler-mcp-server
117
42
  ```
118
43
 
119
- > `image-tiler` is a local alias you can name it anything you like. `image-tiler-mcp-server` is the npm package that gets downloaded and run.
44
+ > `image-tiler` is a local alias - you can name it anything you like. `image-tiler-mcp-server` is the npm package that gets downloaded and run.
120
45
 
121
46
  See [Claude Code MCP docs](https://docs.anthropic.com/en/docs/claude-code/mcp) for more info.
122
47
 
123
- ### Claude Desktop
48
+ ### Codex CLI
124
49
 
125
- Add to your Claude Desktop config file:
50
+ ```bash
51
+ codex mcp add image-tiler -- npx -y image-tiler-mcp-server
52
+ ```
126
53
 
127
- - **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
128
- - **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
129
- - **Linux:** `~/.config/Claude/claude_desktop_config.json`
54
+ Or add to `~/.codex/config.toml`:
130
55
 
131
- ```json
132
- {
133
- "mcpServers": {
134
- "image-tiler": {
135
- "command": "npx",
136
- "args": ["-y", "image-tiler-mcp-server"]
137
- }
138
- }
139
- }
56
+ ```toml
57
+ [mcp_servers.image-tiler]
58
+ command = "npx"
59
+ args = ["-y", "image-tiler-mcp-server"]
140
60
  ```
141
61
 
142
- Restart Claude Desktop after editing.
143
-
144
62
  ### VS Code (Cline / Continue)
145
63
 
146
64
  Add to your VS Code MCP settings:
@@ -169,6 +87,27 @@ Add to `~/.cursor/mcp.json`:
169
87
  }
170
88
  ```
171
89
 
90
+ ### Claude Desktop
91
+
92
+ Add to your Claude Desktop config file:
93
+
94
+ - **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
95
+ - **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
96
+ - **Linux:** `~/.config/Claude/claude_desktop_config.json`
97
+
98
+ ```json
99
+ {
100
+ "mcpServers": {
101
+ "image-tiler": {
102
+ "command": "npx",
103
+ "args": ["-y", "image-tiler-mcp-server"]
104
+ }
105
+ }
106
+ }
107
+ ```
108
+
109
+ Restart Claude Desktop after editing.
110
+
172
111
  ### Global Install (faster startup)
173
112
 
174
113
  ```bash
@@ -201,282 +140,141 @@ Then point your MCP config to the built file:
201
140
  }
202
141
  ```
203
142
 
204
- ## Usage
205
-
206
- ### In Claude Code
207
-
208
- ```
209
- > Tile the screenshot at ./screenshots/full-page.png and analyze the layout
210
-
211
- Claude will:
212
- 1. Call tiler_tile_image(filePath="./screenshots/full-page.png")
213
- 2. See: "Tiled 3600x22810 image → 4x21 grid = 84 tiles"
214
- 3. Call tiler_get_tiles(tilesDir="./screenshots/tiles/full-page", start=0, end=4)
215
- 4. Analyze tiles 0-4, then continue with start=5...
216
- ```
217
-
218
- ### With Other Models
219
-
220
- ```
221
- > Tile this image for GPT-4o analysis
222
-
223
- Claude will:
224
- 1. Call tiler_tile_image(filePath="./image.png", model="openai")
225
- 2. Tiles sized at 768px for OpenAI's vision pipeline
226
- ```
227
-
228
- ### Auto-Downscaling
229
-
230
- Images over 10,000px are automatically downscaled before tiling. You can customize the limit:
231
-
232
- ```
233
- > Tile this 7680x4032 screenshot but downscale to 2048px first to save tokens
234
-
235
- Claude will:
236
- 1. Call tiler_tile_image(filePath="./image.png", maxDimension=2048)
237
- 2. Image is downscaled to 2048x1076 before tiling
238
- 3. Fewer tiles = lower token cost (e.g., 4 tiles instead of 32)
239
- ```
240
-
241
- To disable auto-downscaling entirely:
242
-
243
- ```
244
- > Tile this image at full resolution, no downscaling
245
-
246
- Claude will:
247
- 1. Call tiler_tile_image(filePath="./image.png", maxDimension=0)
248
- 2. Image is tiled at its original dimensions
249
- ```
250
-
251
- ### Estimating Costs
252
-
253
- Use `tiler_recommend_settings` to preview token costs before tiling:
254
-
255
- ```
256
- > How many tokens would it cost to tile this 3600x22810 screenshot?
257
-
258
- Claude will:
259
- 1. Call tiler_recommend_settings(filePath="./screenshot.png")
260
- 2. See cost estimates for all 4 models
261
- 3. Make an informed decision before committing to tiling
262
- ```
263
-
264
- With intent and budget hints:
265
-
266
- ```
267
- > Estimate costs for this long document screenshot, keeping tokens low
268
-
269
- Claude will:
270
- 1. Call tiler_recommend_settings(filePath="./doc.png", intent="text_heavy", budget="low")
271
- 2. Get optimized maxDimension recommendation for text-heavy content
272
- ```
273
-
274
- ### Using URLs / Base64
275
-
276
- All image-accepting tools (`tiler_tile_image`, `tiler_recommend_settings`, `tiler_prepare_image`) support multiple input sources:
277
-
278
- ```
279
- > Tile this image from a URL
280
- → tiler_tile_image(sourceUrl="https://example.com/screenshot.png")
281
-
282
- > Tile this base64 image
283
- → tiler_tile_image(imageBase64="iVBORw0KGgo...")
284
- ```
285
-
286
- ### One-Shot Usage
287
-
288
- Use `tiler_prepare_image` to tile and get the first batch in one call:
143
+ ## Tiling for LLM Vision
289
144
 
290
- ```
291
- > Analyze this screenshot
145
+ LLM vision systems have a **maximum input resolution**. When you send an image larger than that limit, the model downscales it before processing. A 3600×22810 full-page screenshot gets shrunk to ~247×1568 by Claude - text becomes unreadable, UI details disappear, and the model can't analyze what it can't see.
292
146
 
293
- Claude will:
294
- 1. Call tiler_prepare_image(filePath="./screenshot.png")
295
- 2. Get tiling metadata + first 5 tiles in a single response
296
- 3. Continue with tiler_get_tiles for remaining tiles if needed
297
- ```
147
+ **Tiling solves this.** This MCP server:
298
148
 
299
- ### Typical Workflow
149
+ 1. Reads the image dimensions and the target model's vision config
150
+ 2. Calculates an optimal grid that keeps every tile within the model's sweet spot
151
+ 3. Extracts tiles as individual images (WebP default, PNG optional) and saves them to disk
152
+ 4. Returns metadata (grid layout, file paths, estimated token cost)
153
+ 5. Serves tiles back as base64 in paginated batches for the LLM to analyze
300
154
 
301
- 1. Capture full-page screenshot with your browser extension
302
- 2. Ask Claude: _"Tile `/path/to/screencapture-localhost-3000.png` and review all sections"_
303
- 3. Claude pages through tiles automatically, analyzing each batch
155
+ Each tile stays within the model's sweet spot - the LLM processes it at full resolution instead of downscaling, preserving text, UI elements, and fine detail.
304
156
 
305
- ## Tile Output Structure
157
+ **Auto-downscaling:** Images over 10,000px on their longest side are automatically downscaled before tiling (configurable via `maxDimension`). This keeps tile counts reasonable and improves LLM comprehension by increasing content density per tile. Set `maxDimension=0` to disable, or pass a custom value (e.g., `maxDimension=5000`) for more aggressive downscaling.
306
158
 
307
- Example: `assets/landscape.png` (7680x4032) tiled with the default Claude config (1092px tiles) produces an 8x4 grid of 32 tiles (~50,880 tokens).
159
+ ### Supported Models
308
160
 
309
- **Grid layout** tiles are numbered `tile_ROW_COL.png`, extracted left-to-right, top-to-bottom:
161
+ | Model | Default tile | Tokens/tile | Max tile | ID |
162
+ |-------|-------------|-------------|----------|-----|
163
+ | Claude | 1092px | 1590 | 1568px | `claude` |
164
+ | OpenAI (GPT-4o/o-series) | 768px | 765 | 2048px | `openai` |
165
+ | Gemini | 768px | 258 | 768px | `gemini` |
166
+ | Gemini 3 | 1536px | 1120 | 3072px | `gemini3` |
310
167
 
311
- ```
312
- 7680px
313
- ┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬────────┐
314
- │ 000_000 │ 000_001 │ 000_002 │ 000_003 │ 000_004 │ 000_005 │ 000_006 │ 000_007│
315
- │ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 36x1092│ 4032px
316
- ├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼────────┤
317
- │ 001_000 │ 001_001 │ 001_002 │ 001_003 │ 001_004 │ 001_005 │ 001_006 │ 001_007│
318
- │ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 36x1092│
319
- ├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼────────┤
320
- │ 002_000 │ 002_001 │ 002_002 │ 002_003 │ 002_004 │ 002_005 │ 002_006 │ 002_007│
321
- │ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 1092x1092│ 36x1092│
322
- ├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼────────┤
323
- │ 003_000 │ 003_001 │ 003_002 │ 003_003 │ 003_004 │ 003_005 │ 003_006 │ 003_007│
324
- │ 1092x756 │ 1092x756 │ 1092x756 │ 1092x756 │ 1092x756 │ 1092x756 │ 1092x756 │ 36x756 │
325
- └──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴────────┘
326
- ```
168
+ > **OpenAI note:** The `openai` config targets the GPT-4o / o-series vision pipeline (512px tile patches). GPT-4.1 uses a fundamentally different pipeline (32x32 pixel patches) and is not currently supported - it would require a separate model config with a different calculation approach.
327
169
 
328
- Edge tiles are smaller: the rightmost column is 36px wide (7680 - 7×1092 = 36), and the bottom row is 756px tall (4032 - 3×1092 = 756).
170
+ > **Gemini 3 note:** Gemini 3 uses a fixed token budget per image (1120 tokens regardless of dimensions). Tiling increases total token cost but preserves fine detail. For cases where detail isn't critical, consider sending a single image instead.
329
171
 
330
- **Output directory:**
172
+ ## Tools
331
173
 
332
- ```
333
- assets/tiles/landscape/
334
- ├── tile_000_000.png # Row 0, Col 0 — 1092x1092
335
- ├── tile_000_001.png # Row 0, Col 1 — 1092x1092
336
- ├── tile_000_002.png # ...
337
- ├── ...
338
- ├── tile_000_007.png # Row 0, Col 7 — 36x1092 (right edge)
339
- ├── tile_001_000.png # Row 1, Col 0
340
- ├── ...
341
- ├── tile_003_006.png # Row 3, Col 6 — 1092x756 (bottom edge)
342
- └── tile_003_007.png # Row 3, Col 7 — 36x756 (corner)
343
- ```
174
+ ### `tiler`
344
175
 
345
- **JSON metadata** returned by `tiler_tile_image`:
176
+ One unified tool that handles all image tiling operations. The mode is auto-detected from the parameters you provide:
346
177
 
347
- ```json
348
- {
349
- "model": "claude",
350
- "sourceImage": {
351
- "width": 7680,
352
- "height": 4032,
353
- "format": "png",
354
- "fileSize": 12345678,
355
- "channels": 4
356
- },
357
- "grid": {
358
- "cols": 8,
359
- "rows": 4,
360
- "totalTiles": 32,
361
- "tileSize": 1092,
362
- "estimatedTokens": 50880
363
- },
364
- "outputDir": "/path/to/assets/tiles/landscape",
365
- "tiles": [
366
- { "index": 0, "row": 0, "col": 0, "position": "0,0", "dimensions": "1092×1092", "filePath": "/path/to/assets/tiles/landscape/tile_000_000.png" },
367
- { "index": 1, "row": 0, "col": 1, "position": "1092,0", "dimensions": "1092×1092", "filePath": "/path/to/assets/tiles/landscape/tile_000_001.png" },
368
- "... 30 more tiles"
369
- ],
370
- "previewPath": "/path/to/assets/tiles/landscape/preview.html",
371
- "resize": {
372
- "originalWidth": 7680,
373
- "originalHeight": 4032,
374
- "resizedWidth": 2048,
375
- "resizedHeight": 1076,
376
- "scaleFactor": 0.267
377
- }
378
- }
379
- ```
380
-
381
- > The `resize` field is only present when `maxDimension` triggered an actual downscale. If the image was already within bounds, it's omitted.
178
+ - **`tilesDir`** present → **Tile retrieval mode** (read-only pagination)
179
+ - **`url`** or **`screenshotPath`** present → **URL capture mode** (screenshot + tile)
180
+ - **`filePath`**, **`sourceUrl`**, **`dataUrl`**, or **`imageBase64`** present → **Tile-image mode**
382
181
 
383
- ### Portrait example
182
+ > **Mode priority:** When multiple mode params are present, the tool resolves by priority:
183
+ > `tilesDir` > `url`/`screenshotPath` > `filePath`/`sourceUrl`/`dataUrl`/`imageBase64`.
184
+ > Avoid passing params from different modes in the same call.
384
185
 
385
- `assets/portrait.png` (3600x22810) tiled with Claude defaults produces a 4x21 grid of 84 tiles (~133,560 tokens).
186
+ **Workflow:**
386
187
 
387
- **Grid layout:**
188
+ The tool uses a two-step process to let you choose the right model before tiling:
388
189
 
389
- ```
390
- 3600px
391
- ┌──────────┬──────────┬──────────┬─────────┐
392
- 000_000 │ 000_001 │ 000_002 │ 000_003
393
- │ 1092x1092│ 1092x1092│ 1092x1092│ 324x1092│
394
- ├──────────┼──────────┼──────────┼─────────┤
395
- │ 001_000 │ 001_001 │ 001_002 │ 001_003 │
396
- │ 1092x1092│ 1092x1092│ 1092x1092│ 324x1092│ 22810px
397
- ├──────────┼──────────┼──────────┼─────────┤
398
- │ ... │ ... │ ... │ ... │ (21 rows)
399
- ├──────────┼──────────┼──────────┼─────────┤
400
- │ 020_000 │ 020_001 │ 020_002 │ 020_003 │
401
- │ 1092x970 │ 1092x970 │ 1092x970 │ 324x970 │
402
- └──────────┴──────────┴──────────┴─────────┘
403
- ```
190
+ 1. **Compare** - Call with only the image source. Returns a comparison table showing tile counts and token estimates for each supported model, plus an interactive HTML preview.
191
+ 2. **Tile** - Call again with the chosen `model` + `outputDir` from step 1, plus:
192
+ - **Image sources:** re-include your original source param (`filePath`, `sourceUrl`, etc.)
193
+ - **Captures:** use `screenshotPath` from step 1 (not the original `url`)
404
194
 
405
- Edge tiles: rightmost column is 324px wide (3600 - 3×1092 = 324), bottom row is 970px tall (22810 - 20×1092 = 970).
195
+ > **Skip the comparison step:** Provide `model` and `outputDir` on the first call to tile immediately.
406
196
 
407
- ## Token Cost Reference
197
+ > **Interactive model picker:** Clients that support MCP elicitation get a dropdown picker instead of the comparison table.
408
198
 
409
- Costs vary by model. Formula: `tokens = totalTiles x tokensPerTile`
199
+ #### Parameters - Image Source (tile-image mode)
410
200
 
411
- ### Claude (1092px tiles, 1590 tokens/tile)
201
+ | Parameter | Type | Required | Default | Description |
202
+ |---|---|---|---|---|
203
+ | `filePath` | string | no* | - | Absolute or relative path to the image file |
204
+ | `sourceUrl` | string | no* | - | HTTPS URL to download the image from (max 50MB, 30s timeout) |
205
+ | `dataUrl` | string | no* | - | Data URL with base64-encoded image |
206
+ | `imageBase64` | string | no* | - | Raw base64-encoded image data |
412
207
 
413
- | Image Dimensions | Tiles | Estimated Tokens |
414
- |---|---|---|
415
- | 1440x3000 | 6 | ~9,540 |
416
- | 3600x5000 | 20 | ~31,800 |
417
- | 3600x22810 | 84 | ~133,560 |
208
+ *At least one image source is required for tile-image mode.
418
209
 
419
- ### OpenAI — GPT-4o/o-series (768px tiles, 765 tokens/tile)
210
+ #### Parameters - URL Capture (capture mode)
420
211
 
421
- | Image Dimensions | Tiles | Estimated Tokens |
422
- |---|---|---|
423
- | 1440x3000 | 8 | ~6,120 |
424
- | 3600x5000 | 35 | ~26,775 |
425
- | 3600x22810 | 150 | ~114,750 |
212
+ | Parameter | Type | Required | Default | Description |
213
+ |---|---|---|---|---|
214
+ | `url` | string | no | - | URL of the web page to capture. Requires Chrome/Chromium installed (or `CHROME_PATH` env var). |
215
+ | `screenshotPath` | string | no | - | Path to a previously captured screenshot. Skips URL capture when provided. |
216
+ | `viewportWidth` | number | no | Auto-detect (fallback 1280) | Browser viewport width in pixels (320-3840) |
217
+ | `waitUntil` | string | no | `"load"` | When to consider the page loaded: `"load"`, `"networkidle"`, or `"domcontentloaded"` |
218
+ | `delay` | number | no | `0` | Additional delay in ms after page load (max 30000) |
426
219
 
427
- ### Gemini (768px tiles, 258 tokens/tile)
220
+ Supports scroll-stitching for pages taller than 16,384px. Automatically triggers lazy-loaded images (`loading="lazy"`) before capture by scrolling through the page. Pages without lazy images are unaffected.
428
221
 
429
- | Image Dimensions | Tiles | Estimated Tokens |
430
- |---|---|---|
431
- | 1440x3000 | 8 | ~2,064 |
432
- | 3600x5000 | 35 | ~9,030 |
433
- | 3600x22810 | 150 | ~38,700 |
222
+ #### Parameters - Tile Retrieval (pagination mode)
434
223
 
435
- ### Gemini 3 (1536px tiles, 1120 tokens/tile)
224
+ | Parameter | Type | Required | Default | Description |
225
+ |---|---|---|---|---|
226
+ | `tilesDir` | string | no | - | Path to tiles directory (returned by a previous tiling call as `outputDir`) |
227
+ | `start` | number | no | `0` | Start tile index (0-based, inclusive) |
228
+ | `end` | number | no | start + 4 | End tile index (0-based, inclusive). Max 5 tiles per batch. |
436
229
 
437
- | Image Dimensions | Tiles | Estimated Tokens |
438
- |---|---|---|
439
- | 1440x3000 | 2 | ~2,240 |
440
- | 3600x5000 | 12 | ~13,440 |
441
- | 3600x22810 | 45 | ~50,400 |
230
+ #### Parameters - Tiling Config (shared across modes)
442
231
 
443
- > **Note:** Gemini 3 uses a fixed 1120 tokens per image regardless of dimensions. More tiles = more total tokens but better detail preservation.
232
+ | Parameter | Type | Required | Default | Description |
233
+ |---|---|---|---|---|
234
+ | `model` | string | no | Auto (cheapest) | Target vision model: `"claude"`, `"openai"`, `"gemini"`, `"gemini3"`. Auto-selects the most token-efficient preset when omitted. |
235
+ | `tileSize` | number | no | Model default | Tile size in pixels. Clamped to model's supported range with a warning if out of bounds. |
236
+ | `maxDimension` | number | no | `10000` | Max dimension in px (0 to disable, or 256-65536). Values 1-255 are silently clamped to 256. Pre-downscales the image so its longest side fits within this value before tiling. |
237
+ | `outputDir` | string | no | See below | Directory to save tiles. Defaults: for `filePath` sources, `tiles/{name}_vN/` next to source (auto-incrementing: `_v1`, `_v2`, ..., `_vN`); for `sourceUrl`/`dataUrl`/`imageBase64`, `{base}/tiles/tiled_{timestamp}_{hex}/`; for captures, `{base}/tiles/capture_{timestamp}_{hex}/`. `{base}` is `~/Desktop`, `~/Downloads`, or `~` (first available). |
238
+ | `page` | number | no | `0` | Tile page to return (0 = first 5, 1 = next 5, etc.) |
239
+ | `format` | string | no | `"webp"` | Output format: `"webp"` (smaller, default) or `"png"` (lossless) |
240
+ | `includeMetadata` | boolean | no | `true` | Analyze each tile and return content hints (blank, low-detail, mixed, high-detail) and brightness stats |
241
+
242
+ ## Behaviors
243
+
244
+ - **Source conflict:** Multiple image source params → highest-precedence source is used with a warning (`filePath` > `sourceUrl` > `dataUrl` > `imageBase64`).
245
+ - **Re-entry:** If `outputDir` already has a preview from the comparison step, the server skips straight to tiling.
246
+ - **Elicitation cancellation:** Cancelling the model picker returns `"Tiling cancelled by user."` without tiling.
247
+ - **Versioned output:** Repeated tiling of the same source creates `_v1`, `_v2`, ..., `_vN` directories to avoid overwriting.
248
+ - **Tile naming:** `tile_ROW_COL.{format}` with zero-padded 3-digit indices (e.g., `tile_000_003.webp`), row-by-row, left-to-right.
444
249
 
445
250
  ## Supported Formats
446
251
 
447
252
  PNG, JPEG, WebP, TIFF, GIF
448
253
 
449
- ## Technical Details
450
-
451
- - **Image processing:** Sharp (libvips) — demand-driven pipeline, streams tiles without full decompression
452
- - **Memory usage:** ~350-400MB peak for 30MB+ PNGs
453
- - **Transport:** stdio (local, single-session)
454
- - **Tile naming:** `tile_ROW_COL.png` (zero-padded, e.g., `tile_000_003.png`)
455
- - **Grid order:** Left-to-right, top-to-bottom
456
- - **Batch limit:** 5 tiles per `tiler_get_tiles` call to stay within MCP response limits
457
-
458
254
  ## Troubleshooting
459
255
 
460
- **"Command not found"** Make sure Node.js 18+ is installed: `node --version`
256
+ **"Command not found"** - Make sure Node.js 20+ is installed: `node --version`
461
257
 
462
- **"File not found"** Use absolute paths. Relative paths resolve from the MCP server's working directory.
258
+ **"File not found"** - Use absolute paths. Relative paths resolve from the MCP server's working directory.
463
259
 
464
- **"MCP tools not available"** Restart your MCP client after config changes. In Claude Code, run `/mcp` to check server status.
260
+ **"MCP tools not available"** - Restart your MCP client after config changes. In Claude Code, run `/mcp` to check server status.
465
261
 
466
- ## Security
262
+ **"Chrome not found"** - Install Google Chrome or set the `CHROME_PATH` environment variable to the Chrome executable (must be an absolute path).
467
263
 
468
- This is a **local MCP server** designed to run on your machine via stdio. It operates with the same filesystem permissions as the MCP client process that spawns it.
264
+ **Running as root / in Docker** - Set `CHROME_NO_SANDBOX=1` to launch Chrome without sandbox (also enabled automatically when running as root).
469
265
 
470
- **Trust model:** This server trusts its MCP client. Path parameters (`filePath`, `outputDir`, `tilesDir`) are resolved and accessed directly there is no sandboxing or path restriction beyond your OS-level permissions. This is expected for local MCP tools where the client (e.g. Claude Code) already has filesystem access.
266
+ **`viewportWidth` auto-detection** - Auto-detection of screen width works on macOS only. On other platforms, falls back to 1280px.
267
+
268
+ ## Security
471
269
 
472
- **URL downloads:** When using `sourceUrl`, the server fetches images over HTTPS only (no HTTP). Downloads are limited to 50MB with a 30-second timeout. Content-Type is validated — non-image responses (text/html, application/json, etc.) are rejected with a clear error. Downloaded files are written to a temp directory and cleaned up after processing. The server does not send any data externally — it only receives. No private/internal IP validation is performed on URLs.
270
+ Local stdio server - runs with the same filesystem permissions as the MCP client that spawns it. No path sandboxing, no SSRF protection on URL downloads.
473
271
 
474
- **If deploying remotely:** This server is not designed for multi-tenant or network-exposed environments. If you expose it beyond local stdio, you should add path validation (restrict to allowed directories), SSRF protection (block private IP ranges like 127.0.0.0/8, 10.0.0.0/8, 169.254.169.254), and authentication.
272
+ **If deploying remotely:** Add path validation, SSRF protection (block private/internal IP ranges), and authentication. This server is not designed for multi-tenant or network-exposed use.
475
273
 
476
274
  ## Requirements
477
275
 
478
- - Node.js 18+
479
- - Compatible MCP client (Claude Code, Claude Desktop, Cursor, VS Code with MCP extension)
276
+ - Node.js 20+
277
+ - Compatible MCP client (Claude Code, Codex CLI, VS Code, Cursor, Claude Desktop)
480
278
 
481
279
  ## License
482
280
 
@@ -1,4 +1,4 @@
1
- export declare const VISION_MODELS: readonly ["claude", "openai", "gemini", "gemini3"];
1
+ export declare const VISION_MODELS: readonly ["claude", "openai", "gemini3", "gemini"];
2
2
  export type VisionModel = (typeof VISION_MODELS)[number];
3
3
  export interface ModelVisionConfig {
4
4
  defaultTileSize: number;
@@ -25,8 +25,27 @@ export declare const ALLOWED_URL_PROTOCOLS: readonly ["https:"];
25
25
  export declare const MAX_BASE64_LENGTH = 67108864;
26
26
  export declare const MAX_DATA_URL_LENGTH: number;
27
27
  export declare const MIN_REMAINDER_RATIO = 0.15;
28
- export declare const IMAGE_INTENTS: readonly ["text_heavy", "ui_screenshot", "diagram", "photo", "general"];
29
- export type ImageIntent = (typeof IMAGE_INTENTS)[number];
30
- export declare const BUDGET_LEVELS: readonly ["low", "default", "max_detail"];
31
- export type BudgetLevel = (typeof BUDGET_LEVELS)[number];
28
+ export declare const SHARP_OPERATION_TIMEOUT_MS = 30000;
29
+ export declare const TILE_OUTPUT_FORMATS: readonly ["png", "webp"];
30
+ export type TileOutputFormat = (typeof TILE_OUTPUT_FORMATS)[number];
31
+ export declare const WEBP_QUALITY = 80;
32
+ export declare const MAX_STITCH_BYTES: number;
33
+ export declare const MAX_CAPTURE_HEIGHT = 200000;
34
+ export declare const CHROME_MAX_CAPTURE_HEIGHT = 16384;
35
+ export declare const CAPTURE_DEFAULT_VIEWPORT_WIDTH = 1280;
36
+ export declare const CAPTURE_DEFAULT_VIEWPORT_HEIGHT = 800;
37
+ export declare const CAPTURE_DEFAULT_TIMEOUT_MS = 60000;
38
+ export declare const CAPTURE_STITCH_SETTLE_MS = 100;
39
+ export declare const CAPTURE_IDLE_TIMEOUT_MS = 500;
40
+ export declare const WAIT_UNTIL_OPTIONS: readonly ["load", "networkidle", "domcontentloaded"];
41
+ export type WaitUntil = (typeof WAIT_UNTIL_OPTIONS)[number];
42
+ export declare const ALLOWED_CAPTURE_PROTOCOLS: readonly ["https:", "http:"];
43
+ export declare const LAZY_LOAD_SCROLL_PAUSE_MS = 100;
44
+ export declare const LAZY_LOAD_IMAGE_TIMEOUT_MS = 5000;
45
+ export declare const LAZY_LOAD_TOTAL_TIMEOUT_MS = 15000;
46
+ export declare const MAX_IMAGE_PIXELS = 256000000;
47
+ export declare const MAX_CHROME_STDERR_BYTES = 1048576;
48
+ export declare const MAX_CHROME_JSON_BYTES = 1048576;
49
+ export declare const MAX_PREVIEW_PIXELS = 16000000;
50
+ export declare const MIN_PREVIEW_WIDTH = 800;
32
51
  //# sourceMappingURL=constants.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,aAAa,oDAAqD,CAAC;AAChF,MAAM,MAAM,WAAW,GAAG,CAAC,OAAO,aAAa,CAAC,CAAC,MAAM,CAAC,CAAC;AAEzD,MAAM,WAAW,iBAAiB;IAChC,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,WAAW,EAAE,iBAAiB,CA6BhE,CAAC;AAEF,eAAO,MAAM,aAAa,EAAE,WAAsB,CAAC;AAGnD,eAAO,MAAM,iBAAiB,QAAuC,CAAC;AACtE,eAAO,MAAM,aAAa,QAAmC,CAAC;AAC9D,eAAO,MAAM,aAAa,QAAmC,CAAC;AAC9D,eAAO,MAAM,eAAe,QAAqC,CAAC;AAElE,eAAO,MAAM,mBAAmB,QAAQ,CAAC;AACzC,eAAO,MAAM,eAAe,QAAQ,CAAC;AACrC,eAAO,MAAM,mBAAmB,IAAI,CAAC;AACrC,eAAO,MAAM,iBAAiB,wDAAyD,CAAC;AACxF,eAAO,MAAM,qBAAqB,IAAI,CAAC;AACvC,eAAO,MAAM,qBAAqB,QAAQ,CAAC;AAG3C,eAAO,MAAM,uBAAuB,QAAmB,CAAC;AACxD,eAAO,MAAM,mBAAmB,QAAS,CAAC;AAC1C,eAAO,MAAM,qBAAqB,qBAAsB,CAAC;AACzD,eAAO,MAAM,iBAAiB,WAAa,CAAC;AAC5C,eAAO,MAAM,mBAAmB,QAA0B,CAAC;AAG3D,eAAO,MAAM,mBAAmB,OAAO,CAAC;AAGxC,eAAO,MAAM,aAAa,yEAA0E,CAAC;AACrG,MAAM,MAAM,WAAW,GAAG,CAAC,OAAO,aAAa,CAAC,CAAC,MAAM,CAAC,CAAC;AAEzD,eAAO,MAAM,aAAa,2CAA4C,CAAC;AACvE,MAAM,MAAM,WAAW,GAAG,CAAC,OAAO,aAAa,CAAC,CAAC,MAAM,CAAC,CAAC"}
1
+ {"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,aAAa,oDAAqD,CAAC;AAChF,MAAM,MAAM,WAAW,GAAG,CAAC,OAAO,aAAa,CAAC,CAAC,MAAM,CAAC,CAAC;AAEzD,MAAM,WAAW,iBAAiB;IAChC,eAAe,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,WAAW,EAAE,iBAAiB,CA6BhE,CAAC;AAEF,eAAO,MAAM,aAAa,EAAE,WAAsB,CAAC;AAGnD,eAAO,MAAM,iBAAiB,QAAuC,CAAC;AACtE,eAAO,MAAM,aAAa,QAAmC,CAAC;AAC9D,eAAO,MAAM,aAAa,QAAmC,CAAC;AAC9D,eAAO,MAAM,eAAe,QAAqC,CAAC;AAElE,eAAO,MAAM,mBAAmB,QAAQ,CAAC;AACzC,eAAO,MAAM,eAAe,QAAQ,CAAC;AACrC,eAAO,MAAM,mBAAmB,IAAI,CAAC;AACrC,eAAO,MAAM,iBAAiB,wDAAyD,CAAC;AACxF,eAAO,MAAM,qBAAqB,IAAI,CAAC;AACvC,eAAO,MAAM,qBAAqB,QAAQ,CAAC;AAG3C,eAAO,MAAM,uBAAuB,QAAmB,CAAC;AACxD,eAAO,MAAM,mBAAmB,QAAS,CAAC;AAC1C,eAAO,MAAM,qBAAqB,qBAAsB,CAAC;AACzD,eAAO,MAAM,iBAAiB,WAAa,CAAC;AAC5C,eAAO,MAAM,mBAAmB,QAA0B,CAAC;AAG3D,eAAO,MAAM,mBAAmB,OAAO,CAAC;AAGxC,eAAO,MAAM,0BAA0B,QAAS,CAAC;AAGjD,eAAO,MAAM,mBAAmB,0BAA2B,CAAC;AAC5D,MAAM,MAAM,gBAAgB,GAAG,CAAC,OAAO,mBAAmB,CAAC,CAAC,MAAM,CAAC,CAAC;AACpE,eAAO,MAAM,YAAY,KAAK,CAAC;AAG/B,eAAO,MAAM,gBAAgB,QAAoB,CAAC;AAClD,eAAO,MAAM,kBAAkB,SAAU,CAAC;AAC1C,eAAO,MAAM,yBAAyB,QAAQ,CAAC;AAC/C,eAAO,MAAM,8BAA8B,OAAO,CAAC;AACnD,eAAO,MAAM,+BAA+B,MAAM,CAAC;AACnD,eAAO,MAAM,0BAA0B,QAAS,CAAC;AACjD,eAAO,MAAM,wBAAwB,MAAM,CAAC;AAC5C,eAAO,MAAM,uBAAuB,MAAM,CAAC;AAC3C,eAAO,MAAM,kBAAkB,sDAAuD,CAAC;AACvF,MAAM,MAAM,SAAS,GAAG,CAAC,OAAO,kBAAkB,CAAC,CAAC,MAAM,CAAC,CAAC;AAC5D,eAAO,MAAM,yBAAyB,8BAA+B,CAAC;AAGtE,eAAO,MAAM,yBAAyB,MAAM,CAAC;AAC7C,eAAO,MAAM,0BAA0B,OAAO,CAAC;AAC/C,eAAO,MAAM,0BAA0B,QAAS,CAAC;AAGjD,eAAO,MAAM,gBAAgB,YAAc,CAAC;AAC5C,eAAO,MAAM,uBAAuB,UAAY,CAAC;AACjD,eAAO,MAAM,qBAAqB,UAAY,CAAC;AAG/C,eAAO,MAAM,kBAAkB,WAAa,CAAC;AAC7C,eAAO,MAAM,iBAAiB,MAAM,CAAC"}