@mindstudio-ai/remy 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +149 -41
  2. package/dist/compiled/tables.md +53 -1
  3. package/dist/headless.d.ts +10 -2
  4. package/dist/headless.js +531 -271
  5. package/dist/index.js +574 -301
  6. package/dist/prompt/.notes.md +0 -1
  7. package/dist/prompt/compiled/tables.md +53 -1
  8. package/dist/prompt/static/authoring.md +10 -0
  9. package/dist/prompt/static/instructions.md +2 -1
  10. package/dist/prompt/static/team.md +1 -1
  11. package/dist/static/authoring.md +10 -0
  12. package/dist/static/instructions.md +2 -1
  13. package/dist/static/team.md +1 -1
  14. package/dist/subagents/.notes-background-agents.md +80 -0
  15. package/dist/subagents/browserAutomation/prompt.md +37 -2
  16. package/dist/subagents/codeSanityCheck/prompt.md +5 -0
  17. package/dist/subagents/designExpert/.notes.md +2 -2
  18. package/dist/subagents/designExpert/data/compile-font-descriptions.sh +125 -0
  19. package/dist/subagents/designExpert/data/compile-inspiration.sh +6 -1
  20. package/dist/subagents/designExpert/data/fonts.json +497 -869
  21. package/dist/subagents/designExpert/data/inspiration.json +97 -245
  22. package/dist/subagents/designExpert/data/inspiration.raw.json +1 -12
  23. package/dist/subagents/designExpert/prompts/animation.md +1 -1
  24. package/dist/subagents/designExpert/prompts/identity.md +4 -2
  25. package/dist/subagents/designExpert/prompts/instructions.md +2 -3
  26. package/dist/subagents/designExpert/prompts/layout.md +1 -13
  27. package/dist/subagents/designExpert/prompts/tool-prompts/design-analysis.md +22 -0
  28. package/dist/subagents/designExpert/prompts/tool-prompts/font-analysis.md +17 -0
  29. package/dist/subagents/productVision/prompt.md +1 -1
  30. package/package.json +1 -1
@@ -142,7 +142,6 @@ The intro framing ("you have a lot on your plate") gives the model permission to
142
142
 
143
143
  | Agent | Role | Tools | Context |
144
144
  |---|---|---|---|
145
- | `visualDesignExpert` | Visual design decisions | searchGoogle, fetchUrl, analyzeReferenceImageOrUrl, screenshot, searchProductScreenshots, generateImages | Spec files + sampled fonts + sampled inspiration |
146
145
  | `productVision` | Roadmap ownership & product strategy | writeRoadmapItem, updateRoadmapItem, deleteRoadmapItem | Spec files + current roadmap |
147
146
  | `sdkConsultant` | MindStudio SDK architecture | None (shells out to `mindstudio ask` CLI) | None (external agent) |
148
147
  | `codeSanityCheck` | Pre-build review | readFile, grep, glob, searchGoogle, fetchUrl, askMindStudioSdk, bash (readonly) | Spec files |
@@ -20,6 +20,23 @@ export const Vendors = db.defineTable<Vendor>('vendors');
20
20
 
21
21
  One export per file. The export name is referenced in `mindstudio.json` and imported in methods. Only define your own columns in the interface — do not add `id`, `created_at`, `updated_at`, or `last_updated_by` (they're provided automatically, see below).
22
22
 
23
+ ### Table Options
24
+
25
+ `defineTable<T>()` accepts an optional second argument with table-level configuration:
26
+
27
+ ```typescript
28
+ export const Users = db.defineTable<User>('users', {
29
+ unique: [['email']],
30
+ defaults: { role: 'member', status: 'active' },
31
+ });
32
+ ```
33
+
34
+ - **`unique`** — `(keyof T & string)[][]` — Column groups that form unique constraints. Each entry is a string array of column names. These are required for `upsert()` (see below). Schema sync on the platform creates the actual SQLite UNIQUE indexes.
35
+ - Single column: `[['email']]`
36
+ - Compound: `[['userId', 'orgId']]`
37
+ - Multiple constraints: `[['email'], ['slug']]`
38
+ - **`defaults`** — `Partial<T>` — Default values applied client-side in `push()` and `upsert()` before building the INSERT. Explicit values in the input override defaults.
39
+
23
40
  ### Column Types
24
41
 
25
42
  | TypeScript type | SQLite type | Notes |
@@ -79,6 +96,30 @@ const vendors = await Vendors.push([
79
96
  ]);
80
97
  ```
81
98
 
99
+ If the table has `defaults` configured, missing fields are filled in automatically. Explicit values override defaults.
100
+
101
+ ### Upsert (Insert or Update)
102
+
103
+ ```typescript
104
+ // Insert if no conflict on 'email', otherwise update the existing row
105
+ const user = await Users.upsert('email', {
106
+ email: 'alice@acme.com',
107
+ name: 'Alice',
108
+ role: 'admin',
109
+ });
110
+
111
+ // Compound conflict key — pass an array
112
+ const membership = await Memberships.upsert(['userId', 'orgId'], {
113
+ userId: user.id,
114
+ orgId: org.id,
115
+ role: 'member',
116
+ });
117
+ ```
118
+
119
+ `upsert(conflictKey, data)` generates `INSERT ... ON CONFLICT(...) DO UPDATE SET ...` using SQLite's `excluded.` syntax. All non-conflict columns are updated on conflict. Returns a `Mutation<T>` — works with `await` standalone or inside `db.batch()`, same as `push()` and `update()`.
120
+
121
+ The conflict key must match a declared `unique` constraint on the table. Throws `MindStudioError` with code `no_unique_constraint` if no match.
122
+
82
123
  ### Reading Records
83
124
 
84
125
  ```typescript
@@ -162,9 +203,20 @@ db.ago(db.days(7) + db.hours(12)) // composable — 7.5 days ago
162
203
  Invoices.filter(i => i.dueDate < db.ago(db.days(30)))
163
204
  ```
164
205
 
206
+ ### Error Handling on Queries
207
+
208
+ Both `Query<T>` and `Mutation<T>` support `.then()` and `.catch()` directly:
209
+
210
+ ```typescript
211
+ const user = await Users.upsert('email', data).catch(err => {
212
+ if (err.code === 'no_unique_constraint') { /* ... */ }
213
+ throw err;
214
+ });
215
+ ```
216
+
165
217
  ### Batching
166
218
 
167
- `db.batch()` combines multiple operations into a single HTTP round-trip. Every `await` on a table operation is a network call, so batching is critical for performance. Use it whenever you have multiple reads, writes, or a mix of both:
219
+ `db.batch()` combines multiple operations into a single HTTP round-trip. Every `await` on a table operation is a network call, so batching is critical for performance. Use it whenever you have multiple reads, writes, or a mix of both. `upsert()` works in batches just like `push()` and `update()`:
168
220
 
169
221
  ```typescript
170
222
  // Reads: fetch related data in one call instead of sequential awaits
@@ -25,6 +25,16 @@ Users often care about look and feel as much as (or more than) underlying data s
25
25
 
26
26
  Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — the user should never see raw CSS, code, or technical values in the prose. Write "square corners on all cards" not `border-radius: 0`. Write "no shadows" not `box-shadow: none`. Technical specifics belong in annotations.
27
27
 
28
+ When the design expert provides specific implementation details — CSS values, spacing, font sizes, rotation angles, shadow definitions, animation timings, or things to pay special attention to or watch out for — capture them as annotations on the relevant prose. The design expert's recommendations are precise and intentional; don't summarize them into vague language. The prose describes the intent, the annotations preserve the exact values the coder needs:
29
+
30
+ ```markdown
31
+ Cards float at varied angles with [rounded corners]{border-radius: 24px} on a pure black background.
32
+ ~~~
33
+ transform: rotate() with values between -15deg and 15deg, varied per card
34
+ box-shadow: 0 8px 32px rgba(0,0,0,0.3) for floating depth
35
+ ~~~
36
+ ```
37
+
28
38
  When you have image URLs (from the design expert), embed them directly in the spec using markdown image syntax. Write descriptive alt text that captures what the image actually depicts (this helps accessibility and helps the coding agent understand the image without loading it). Use the surrounding prose to explain the design intent — what the image is for, how it should be used in the layout, and why it was chosen.
29
39
 
30
40
  ```markdown
@@ -6,6 +6,7 @@
6
6
 
7
7
  ## Principles
8
8
  - The spec in `src/` is the source of truth. When in doubt, consult the spec before making code changes. When behavior changes, update the spec first.
9
+ - Always keep the spec up to date after making changes to the code, especially when adding features or building things from the roadmap.
9
10
  - Change only what the task requires. Match existing styles. Keep solutions simple.
10
11
  - Read files before editing them. Understand the context before making changes.
11
12
  - When the user asks you to make a change, execute it fully — all steps, no pausing for confirmation. Use `confirmDestructiveAction` to gate before destructive or irreversible actions (e.g., deleting data, resetting the database). For large changes that touch many files or involve significant design decisions, use `presentPlan` to get user approval first — but only when the scope genuinely warrants it or the user asks to see a plan. Most work should be done autonomously.
@@ -17,7 +18,7 @@
17
18
  ## Communication
18
19
  The user can already see your tool calls, so most of your work is visible without narration. Focus text output on three things:
19
20
  - **Decisions that need input.** Questions, tradeoffs, ambiguity that blocks progress.
20
- - **Milestones.** What you built, what it looks like, what changed. Summarize in plain language rather than listing a per-file changelog. When a sub-agent returns visual results (images, design direction), share them with the user inline before continuing your work — let them see what's been created while you keep going.
21
+ - **Milestones.** What you built, what it looks like, what changed. Summarize in plain language rather than listing a per-file changelog.
21
22
  - **Errors or blockers.** Something failed or the approach needs to shift.
22
23
 
23
24
  Skip the rest: narrating what you're about to do, restating what the user asked, explaining tool calls they can already see.
@@ -12,7 +12,7 @@ Your designer. Consult for any visual decision — choosing a color, picking fon
12
12
 
13
13
  The design expert cannot see your conversation with the user, so include all relevant context and requirements in your task. It can take screenshots of the app preview on its own — just ask it to review what's been built.
14
14
 
15
- Returns concrete resources: hex values, font names with CSS URLs, image URLs, layout descriptions.
15
+ Returns concrete resources: hex values, font names with CSS URLs, image URLs, layout descriptions. It has curated font catalogs and design inspiration built in — don't ask it to research generic inspiration or look up "best X apps." Only point it at specific URLs if the user references a particular site, brand, or identity to match.
16
16
 
17
17
  Always consult the design expert during intake and before building any new product features from the roadmap.
18
18
 
@@ -25,6 +25,16 @@ Users often care about look and feel as much as (or more than) underlying data s
25
25
 
26
26
  Write specs in natural, human language. Describe what the app does the way you'd explain it to a colleague. The spec rendered with annotations hidden is a human-forward document that anyone can read. The spec with annotations visible is the agent-forward document that drives code generation. Keep the prose clean and readable — the user should never see raw CSS, code, or technical values in the prose. Write "square corners on all cards" not `border-radius: 0`. Write "no shadows" not `box-shadow: none`. Technical specifics belong in annotations.
27
27
 
28
+ When the design expert provides specific implementation details — CSS values, spacing, font sizes, rotation angles, shadow definitions, animation timings, or things to pay special attention to or watch out for — capture them as annotations on the relevant prose. The design expert's recommendations are precise and intentional; don't summarize them into vague language. The prose describes the intent, the annotations preserve the exact values the coder needs:
29
+
30
+ ```markdown
31
+ Cards float at varied angles with [rounded corners]{border-radius: 24px} on a pure black background.
32
+ ~~~
33
+ transform: rotate() with values between -15deg and 15deg, varied per card
34
+ box-shadow: 0 8px 32px rgba(0,0,0,0.3) for floating depth
35
+ ~~~
36
+ ```
37
+
28
38
  When you have image URLs (from the design expert), embed them directly in the spec using markdown image syntax. Write descriptive alt text that captures what the image actually depicts (this helps accessibility and helps the coding agent understand the image without loading it). Use the surrounding prose to explain the design intent — what the image is for, how it should be used in the layout, and why it was chosen.
29
39
 
30
40
  ```markdown
@@ -6,6 +6,7 @@
6
6
 
7
7
  ## Principles
8
8
  - The spec in `src/` is the source of truth. When in doubt, consult the spec before making code changes. When behavior changes, update the spec first.
9
+ - Always keep the spec up to date after making changes to the code, especially when adding features or building things from the roadmap.
9
10
  - Change only what the task requires. Match existing styles. Keep solutions simple.
10
11
  - Read files before editing them. Understand the context before making changes.
11
12
  - When the user asks you to make a change, execute it fully — all steps, no pausing for confirmation. Use `confirmDestructiveAction` to gate before destructive or irreversible actions (e.g., deleting data, resetting the database). For large changes that touch many files or involve significant design decisions, use `presentPlan` to get user approval first — but only when the scope genuinely warrants it or the user asks to see a plan. Most work should be done autonomously.
@@ -17,7 +18,7 @@
17
18
  ## Communication
18
19
  The user can already see your tool calls, so most of your work is visible without narration. Focus text output on three things:
19
20
  - **Decisions that need input.** Questions, tradeoffs, ambiguity that blocks progress.
20
- - **Milestones.** What you built, what it looks like, what changed. Summarize in plain language rather than listing a per-file changelog. When a sub-agent returns visual results (images, design direction), share them with the user inline before continuing your work — let them see what's been created while you keep going.
21
+ - **Milestones.** What you built, what it looks like, what changed. Summarize in plain language rather than listing a per-file changelog.
21
22
  - **Errors or blockers.** Something failed or the approach needs to shift.
22
23
 
23
24
  Skip the rest: narrating what you're about to do, restating what the user asked, explaining tool calls they can already see.
@@ -12,7 +12,7 @@ Your designer. Consult for any visual decision — choosing a color, picking fon
12
12
 
13
13
  The design expert cannot see your conversation with the user, so include all relevant context and requirements in your task. It can take screenshots of the app preview on its own — just ask it to review what's been built.
14
14
 
15
- Returns concrete resources: hex values, font names with CSS URLs, image URLs, layout descriptions.
15
+ Returns concrete resources: hex values, font names with CSS URLs, image URLs, layout descriptions. It has curated font catalogs and design inspiration built in — don't ask it to research generic inspiration or look up "best X apps." Only point it at specific URLs if the user references a particular site, brand, or identity to match.
16
16
 
17
17
  Always consult the design expert during intake and before building any new product features from the roadmap.
18
18
 
@@ -0,0 +1,80 @@
1
+ # Background Agent Execution — Design Doc
2
+
3
+ Draft design for allowing sub-agents to return early and continue working in the background.
4
+
5
+ ## The problem
6
+
7
+ Some sub-agent tasks don't need to block Remy's turn. Product vision seeding roadmap items, for example — Remy needs the high-level plan to continue, but doesn't need to wait for all 15 files to be written. Currently, Remy blocks until the sub-agent finishes completely.
8
+
9
+ ## Design
10
+
11
+ ### Two new tools available to sub-agents
12
+
13
+ **`returnAndContinueInBackground`**
14
+ - Input: `{ response: string }` — the text to return to Remy immediately
15
+ - Called mid-loop by the sub-agent when it has enough to unblock Remy
16
+ - Resolves the parent tool promise with the response text
17
+ - The sub-agent loop continues running in the background
18
+ - All subsequent events emitted with `background: true` flag
19
+
20
+ **`finishBackgroundWork`**
21
+ - Input: `{ result: string, silent: boolean }` — final outcome report
22
+ - Called at the end of background work
23
+ - `silent: true` — queue a notification for Remy's next turn (hidden message)
24
+ - `silent: false` — trigger an automated message to wake Remy immediately
25
+ - Failures should generally use `silent: false` so Remy can address them
26
+
27
+ ### Runner changes
28
+
29
+ The runner needs to support a split lifecycle:
30
+
31
+ 1. Normal loop execution until `returnAndContinueInBackground` is called
32
+ 2. At that point, resolve the outer promise with `{ text: response, messages: [...so far] }`
33
+ 3. Continue the loop in a detached async context (own AbortController, not tied to Remy's turn)
34
+ 4. The `emit` wrapper adds `background: true` to all events after the split point
35
+ 5. When the sub-agent finishes (naturally or via `finishBackgroundWork`):
36
+ - Update `subAgentMessages` on the original tool block in `state.messages`
37
+ - Save the session
38
+ - If not silent, inject an automated message to trigger a new Remy turn
39
+
40
+ ### AgentEvent changes
41
+
42
+ Add optional `background?: boolean` to all event types that have `parentToolId`. The frontend uses this to render background work differently (collapsed, subtle indicator, etc.).
43
+
44
+ ### History / subAgentMessages
45
+
46
+ The `subAgentMessages` array on the tool content block gets updated in two phases:
47
+ 1. At `returnAndContinueInBackground` time — messages so far are attached (captured in the early return)
48
+ 2. At background completion — the full message array replaces the partial one, session is saved
49
+
50
+ A `backgroundStartIndex` on the tool content block marks where the early return happened in the messages array, so the frontend knows which messages were "live" vs "background."
51
+
52
+ ### Notification queue
53
+
54
+ The headless layer maintains a notification queue:
55
+ - Background agents push to it when they finish (via `finishBackgroundWork`)
56
+ - On next `runTurn`, headless flushes queued notifications as prepended hidden messages
57
+ - If `silent: false`, headless also sends an automated message to trigger a new turn immediately
58
+
59
+ ### Process management
60
+
61
+ The headless layer tracks active background agents:
62
+ - `get_background_agents` action → returns list with id, name, startedAt, status
63
+ - `cancel_background_agent` action → aborts a specific background agent
64
+ - The frontend can show active background work and let users kill dangling agents
65
+
66
+ ### Which sub-agents would use this?
67
+
68
+ - **productVision** — return lane summary immediately, write roadmap files in background (silent)
69
+ - **designExpert** — could return font/color recommendations immediately, generate images in background (silent)
70
+ - **codeSanityCheck** — probably NOT a candidate, Remy needs the advice before proceeding
71
+ - **browserAutomation** — probably NOT a candidate, results inform Remy's next action
72
+
73
+ ### What to build (ordered)
74
+
75
+ 1. `returnAndContinueInBackground` and `finishBackgroundWork` tool definitions
76
+ 2. Runner split-lifecycle support (detached async continuation)
77
+ 3. `background: true` flag on AgentEvent types
78
+ 4. Notification queue in headless layer
79
+ 5. Background agent process tracking in headless layer
80
+ 6. Update productVision prompt to use `returnAndContinueInBackground`
@@ -1,6 +1,7 @@
1
1
  You are a browser smoke test agent. You verify that features work end to end by interacting with the live preview. Focus on outcomes: does the feature work? Did the expected content appear? Just do the thing and see if it worked.
2
2
 
3
- The user is watching the automation happen on their screen in real-time. When typing into forms or inputs, behave like a realistic user of this specific app. Use the app context (if provided) to understand the audience and tone. Type the way that audience would actually type — not formal, not robotic. The coding agent's name is Remy, so use that and the email remy@mindstudio.ai for any testing that requires a name or email.
3
+ ## Testiner Persona
4
+ The user is watching the automation happen on their screen in real-time. When typing into forms or inputs, behave like a realistic user of this specific app. Use the app context (if provided) to understand the audience and tone. Type the way that audience would actually type — not formal, not robotic. The coding agent's name is Remy, so use that and the email remy@mindstudio.ai as the basis for any testing that requires a persona.
4
5
 
5
6
  ## Snapshot format
6
7
 
@@ -21,9 +22,12 @@ Each interactive element has a `[ref=eN]` you can use to target it.
21
22
  - `snapshot`: Get the current page state. Always do this first and after action batches to verify results. Waits for network requests to settle.
22
23
  - `click`: Click an element. The cursor animates to it, then dispatches full pointer/mouse/click events.
23
24
  - `type`: Type text into an input. Characters appear one at a time. Set `clear: true` to clear the field first.
25
+ - `select`: Select a dropdown option by text. Target the `<select>` element, set `option` to the option text.
24
26
  - `wait`: Wait for an element to appear (polls every 100ms, default 5s timeout). Also waits for network to settle after the element is found.
27
+ - `navigate`: Navigate to a new URL within the app. Waits for the new page to load before continuing with subsequent steps. Use this instead of evaluate with `window.location.href` when you need to navigate and then continue interacting with the new page. Steps after navigate execute on the new page automatically.
25
28
  - `evaluate`: Run arbitrary JavaScript in the page and return the result.
26
- - `screenshot`: Capture a screenshot of the current page. Returns a CDN URL with dimensions. Separate tool call (not a browserCommand step).
29
+ - `styles`: Read computed CSS styles from page elements. Pass a `properties` array with camelCase CSS property names (e.g., `["backgroundColor", "borderRadius", "fontSize"]`). Omit `properties` for a default set covering colors, typography, spacing, borders, shadows, dimensions, and layout. Uses the same targeting as click/type (ref, text, role, label, selector). Omit the target to get styles for all elements from the last snapshot.
30
+ - `screenshot`: Full-page viewport-stitched screenshot. Returns base64 JPEG with dimensions. Available both as a browserCommand step (useful at the end of an action batch) and as a separate tool call (returns a CDN URL).
27
31
 
28
32
  ## Element targeting (tried in order)
29
33
 
@@ -78,6 +82,37 @@ Navigate to a sub-page and verify content:
78
82
  }
79
83
  ```
80
84
 
85
+ Select a dropdown option and screenshot the result:
86
+ ```json
87
+ {
88
+ "steps": [
89
+ { "command": "select", "label": "Country", "option": "United States" },
90
+ { "command": "screenshot" }
91
+ ]
92
+ }
93
+ ```
94
+
95
+ Navigate to a sub-page and interact with it:
96
+ ```json
97
+ {
98
+ "steps": [
99
+ { "command": "navigate", "url": "/quiz" },
100
+ { "command": "wait", "text": "what's your aura?", "timeout": 8000 },
101
+ { "command": "type", "ref": "e3", "text": "blue" },
102
+ { "command": "screenshot" }
103
+ ]
104
+ }
105
+ ```
106
+
107
+ Check computed styles on an element:
108
+ ```json
109
+ {
110
+ "steps": [
111
+ { "command": "styles", "text": "Sign Up", "properties": ["backgroundColor", "borderRadius", "boxShadow"] }
112
+ ]
113
+ }
114
+ ```
115
+
81
116
  Check a count with evaluate:
82
117
  ```json
83
118
  {
@@ -17,6 +17,11 @@ Most things are fine. These are fast-moving products built by non-technical user
17
17
  These are things we already know about and have decided to accept:
18
18
 
19
19
  - Limited browser support for `oklch` gradients using `in <colorspace>` syntax — we accept the compatibility tradeoff for better color quality
20
+ - Limited browser support for CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) - we accept this tradeoff
21
+ - Libraries we know are actively maintained, don't bother checking:
22
+ - swr
23
+ - framer-motion
24
+ - styled-components
20
25
 
21
26
  ### Common pitfalls (always flag these)
22
27
 
@@ -242,7 +242,7 @@ Google Images is explicitly excluded — too much noise/junk for design work.
242
242
  - **Font catalog** — 105 fonts (80 Fontshare + 14 Google Fonts + 11 Open Foundry) with 51 curated pairings, compiled in `data/fonts.json`. Runtime-sampled per invocation.
243
243
  - **Design inspiration** — Godly screenshots rehosted on MindStudio CDN, pre-analyzed via vision model, compiled in `data/inspiration.json`. Runtime-sampled per invocation. Compilation script at `data/compile-inspiration.sh`.
244
244
  - **Runtime sampling** — `prompt.ts` samples 15 fonts + 5 pairings + 5 inspiration images per invocation.
245
- - **Tools** — searchGoogle, fetchUrl, analyzeReferenceImageOrUrl, screenshot, searchProductScreenshots, generateImages.
245
+ - **Tools** — searchGoogle, fetchUrl, analyzeReferenceImageOrUrl, screenshot, generateImages.
246
246
  - **Prompt** — split into files in `prompts/` (identity, color, animation, layout, icons, images, resources, instructions, frontend-design-notes), assembled via template includes.
247
247
  - **Spec context** — automatically injected via `loadSpecContext()` from `subagents/common/context.ts`. Agent sees the full project spec without Remy summarizing.
248
248
  - **Image generation** — Seedream 4.5 via `generateImages` tool. Prompt guidance emphasizes: style/medium first, then subject; avoid hex codes (rendered as text); generate visual ingredients not UI components; default to real subjects over abstract.
@@ -251,7 +251,7 @@ Google Images is explicitly excluded — too much noise/junk for design work.
251
251
  ## Changes from initial design
252
252
 
253
253
  - Removed stock photo search (Pexels) and image editing tools — AI generation produces better bespoke results.
254
- - Removed Google Images for design inspiration — too much noise. Added `searchProductScreenshots` for real product UI research.
254
+ - Removed Google Images for design inspiration — too much noise.
255
255
  - Consolidated `analyzeImage`, `analyzeDesignReference`, and `screenshotAndAnalyze` into single `analyzeReferenceImageOrUrl` tool that auto-detects image URLs vs website URLs.
256
256
  - Added `screenshot` tool (external, sandbox-resolved) so the agent can capture the app preview directly.
257
257
  - Font examples in all Remy prompts changed from Google Fonts (DM Sans) to Fontshare (Satoshi) to avoid reinforcing AI defaults.
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Analyze font specimen images and write descriptions into fonts.json.
4
+ #
5
+ # Reads each font from fonts.json, looks for a matching specimen image
6
+ # in specimens/fonts/{slug}.png, runs analyze-image, and writes the
7
+ # description back to the font's "description" field.
8
+ #
9
+ # Run: bash src/subagents/designExpert/data/compile-font-descriptions.sh
10
+ # Supports resuming — skips fonts that already have a description.
11
+
12
+ set -euo pipefail
13
+
14
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
15
+ FONTS_FILE="$SCRIPT_DIR/fonts.json"
16
+ SPECIMENS_BASE="https://i.mscdn.ai/remy-font-specimens/fonts"
17
+ PROMPT_FILE="$SCRIPT_DIR/../prompts/tool-prompts/font-analysis.md"
18
+
19
+ if [ ! -f "$PROMPT_FILE" ]; then
20
+ echo "Error: $PROMPT_FILE not found"
21
+ exit 1
22
+ fi
23
+ PROMPT=$(cat "$PROMPT_FILE")
24
+
25
+ if [ ! -f "$FONTS_FILE" ]; then
26
+ echo "Error: $FONTS_FILE not found"
27
+ exit 1
28
+ fi
29
+
30
+ # Get fonts that need descriptions
31
+ SLUGS=$(python3 -c "
32
+ import json
33
+ data = json.load(open('$FONTS_FILE'))
34
+ for f in data['fonts']:
35
+ if not f.get('description'):
36
+ print(f['slug'])
37
+ ")
38
+
39
+ TOTAL=$(python3 -c "import json; print(len(json.load(open('$FONTS_FILE'))['fonts']))")
40
+ DONE=$(python3 -c "import json; print(sum(1 for f in json.load(open('$FONTS_FILE'))['fonts'] if f.get('description')))")
41
+ echo "Font descriptions: $DONE/$TOTAL already done"
42
+
43
+ if [ -z "$SLUGS" ]; then
44
+ echo "All fonts already have descriptions."
45
+ exit 0
46
+ fi
47
+
48
+ REMAINING=$(echo "$SLUGS" | wc -l | tr -d ' ')
49
+ echo "Processing $REMAINING remaining fonts..."
50
+
51
+ TMPDIR=$(mktemp -d)
52
+
53
+ process_one() {
54
+ local slug="$1"
55
+ local url="${SPECIMENS_BASE}/${slug}.png"
56
+ local outfile="$TMPDIR/${slug}.txt"
57
+
58
+ DESCRIPTION=$(mindstudio analyze-image \
59
+ --prompt "$PROMPT" \
60
+ --image-url "$url" \
61
+ --output-key analysis \
62
+ --no-meta 2>&1) || true
63
+
64
+ if [ -z "$DESCRIPTION" ] || echo "$DESCRIPTION" | grep -q '"error"'; then
65
+ echo " FAILED — $slug"
66
+ return
67
+ fi
68
+
69
+ echo "$DESCRIPTION" > "$outfile"
70
+ echo " OK — $slug"
71
+ }
72
+
73
+ merge_batch() {
74
+ python3 -c "
75
+ import json, os
76
+ fonts_file = '$FONTS_FILE'
77
+ tmp_dir = '$TMPDIR'
78
+ with open(fonts_file, 'r') as f:
79
+ data = json.load(f)
80
+ slug_to_font = {f['slug']: f for f in data['fonts']}
81
+ for filename in os.listdir(tmp_dir):
82
+ if not filename.endswith('.txt'):
83
+ continue
84
+ slug = filename[:-4]
85
+ if slug in slug_to_font:
86
+ with open(os.path.join(tmp_dir, filename)) as f:
87
+ slug_to_font[slug]['description'] = f.read().strip()
88
+ os.remove(os.path.join(tmp_dir, filename))
89
+ with open(fonts_file, 'w') as f:
90
+ json.dump(data, f, indent=2)
91
+ f.write('\n')
92
+ "
93
+ }
94
+
95
+ # Process in batches of 5
96
+ BATCH=()
97
+ BATCH_COUNT=0
98
+ while IFS= read -r slug; do
99
+ BATCH+=("$slug")
100
+ if [ ${#BATCH[@]} -eq 5 ]; then
101
+ BATCH_COUNT=$((BATCH_COUNT + ${#BATCH[@]}))
102
+ echo " Batch $BATCH_COUNT/$REMAINING"
103
+ for s in "${BATCH[@]}"; do
104
+ process_one "$s" &
105
+ done
106
+ wait
107
+ merge_batch
108
+ BATCH=()
109
+ fi
110
+ done <<< "$SLUGS"
111
+
112
+ # Process remaining
113
+ if [ ${#BATCH[@]} -gt 0 ]; then
114
+ BATCH_COUNT=$((BATCH_COUNT + ${#BATCH[@]}))
115
+ echo " Batch $BATCH_COUNT/$REMAINING"
116
+ for s in "${BATCH[@]}"; do
117
+ process_one "$s" &
118
+ done
119
+ wait
120
+ merge_batch
121
+ fi
122
+
123
+ rm -rf "$TMPDIR"
124
+ FINAL=$(python3 -c "import json; print(sum(1 for f in json.load(open('$FONTS_FILE'))['fonts'] if f.get('description')))")
125
+ echo "Done. $FINAL/$TOTAL fonts have descriptions."
@@ -15,7 +15,12 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
15
15
  RAW_FILE="$SCRIPT_DIR/inspiration.raw.json"
16
16
  OUT_FILE="$SCRIPT_DIR/inspiration.json"
17
17
 
18
- PROMPT="Analyze this website/app screenshot as a design reference. Assess: 1) Mood/aesthetic 2) Color palette with approximate hex values and palette strategy 3) Typography style 4) Layout composition (symmetric/asymmetric, grid structure, whitespace usage, content density) 5) What makes it distinctive and interesting vs generic AI-generated interfaces. Be specific and concise."
18
+ PROMPT_FILE="$SCRIPT_DIR/../prompts/tool-prompts/design-analysis.md"
19
+ if [ ! -f "$PROMPT_FILE" ]; then
20
+ echo "Error: $PROMPT_FILE not found"
21
+ exit 1
22
+ fi
23
+ PROMPT=$(cat "$PROMPT_FILE")
19
24
 
20
25
  if [ ! -f "$RAW_FILE" ]; then
21
26
  echo "Error: $RAW_FILE not found"