elementus-ai 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,21 @@
1
+ # Changelog
2
+
3
+ All notable changes are documented here. This project adheres to [Semantic Versioning](https://semver.org).
4
+
5
+ ## 1.2.0
6
+
7
+ ### Added
8
+
9
+ - Bundled TypeScript type definitions (`index.d.ts`), exposed via the package `types` field — no `@types/...` package or `declare module` shim required.
10
+ - Exported types: `createElementus`, `ElementusOptions`, `Elementus`, `ElementusPage`, `AiLocatorOptions`.
11
+ - `ElementusPage` types `page.locator(selector, { ai })` by extending Playwright's own locator options, so the `{ ai }` hint type-checks while plain locators and native options keep working.
12
+ - `@playwright/test` types resolve as an optional peer — WDIO/Appium-only projects don't need Playwright installed.
13
+ - README: new **TypeScript** section (typed fixture + Page Object Model patterns); the One-Prompt Setup is now TypeScript-aware.
14
+
15
+ ### Changed
16
+
17
+ - No runtime changes. `elementus.js` is untouched; this release is type definitions and documentation only.
18
+
19
+ ## 1.1.1
20
+
21
+ - Previous release.
package/README.md CHANGED
@@ -24,18 +24,20 @@ I just installed the npm package "elementus-ai" — a self-healing element resol
24
24
  - Search for: playwright.config, wdio.conf, appium config files
25
25
  - Check package.json for: @playwright/test, playwright, webdriverio, wdio, appium
26
26
  - Read a few existing test files to understand the test structure
27
+ - Note whether the project is TypeScript (tsconfig.json or .ts test files) — this changes the fixture syntax (see step 3)
27
28
  - If none found, tell me you can't detect a supported framework and stop
28
29
 
29
30
  2. CHOOSE THE LLM PROVIDER
30
- - Ask me: "Do you want to use a local LLM (LM Studio, free, private) or Google Gemini (cloud, fast, ~$0.01/500 tests)?"
31
+ - Ask me: "Do you want to use a local LLM (LM Studio, free, private) or Google Gemini (cloud, fast, ~$0.001 per AI-healed selector on gemini-3.5-flash; selectors that still work cost nothing)?"
31
32
  - If Gemini: ask for API key or check for GEMINI_API_KEY env var
32
- - If LM Studio: use defaults (localhost:1234, gemma model)
33
+ - If LM Studio: use defaults (localhost:1234) with a vision/grounding model loaded (recommended: holo-3.1-9b)
33
34
 
34
35
  3. INTEGRATE BASED ON MY FRAMEWORK
35
36
 
36
37
  For Playwright:
37
38
  - Create or update a fixtures file that wraps page with el.wrapPage(page)
38
39
  - Make sure all tests import from the fixtures file instead of @playwright/test
40
+ - TypeScript projects: use import/export and type the override as base.extend<{ page: ElementusPage }>({ ... }) (import ElementusPage from elementus-ai) so { ai } is autocompleted and documented. Types are bundled — do NOT add @types or a "declare module" shim
39
41
  - Set actionTimeout: 10000 in playwright config (Elementus respects framework timeouts)
40
42
 
41
43
  For WebDriverIO:
@@ -90,24 +92,26 @@ await p.locator('#submit-btn', { ai: 'Submit order button' }).click()
90
92
  await p.locator('#stable-element').click()
91
93
  ```
92
94
 
95
+ > **Using TypeScript or ESM?** `import { createElementus } from 'elementus-ai'` — type definitions are bundled. See [TypeScript](#typescript) for the typed fixture pattern.
96
+
93
97
  ## LLM Provider Setup
94
98
 
95
99
  ### Option A: Local LLM via LM Studio (free, private)
96
100
 
97
101
  1. Download [LM Studio](https://lmstudio.ai)
98
- 2. Load a vision-capable model (e.g., `gemma-4-26b-a4b-it`)
102
+ 2. Load a vision-capable model. Recommended: **`holo-3.1-9b`** — a GUI-grounding model that locates on-screen elements far better than general chat VLMs, and it's small (9B). Any vision model works, but grounding models earn their keep on the vision-fallback path.
99
103
  3. Start the local server (default: `http://localhost:1234`)
100
104
 
101
105
  ```javascript
102
106
  const el = createElementus({
103
107
  provider: 'lmstudio',
104
108
  lmStudioUrl: 'http://localhost:1234/v1/chat/completions',
105
- model: 'gemma-4-26b-a4b-it',
109
+ model: 'holo-3.1-9b',
106
110
  })
107
111
  ```
108
112
 
109
113
  Tips for the local setup:
110
- - **Vision accuracy:** a dedicated GUI-grounding model (e.g. `Holo2-8B`, Apache-2.0 GGUF on Hugging Face) typically grounds screen coordinates better than general chat VLMs — benchmark numbers are vendor-reported (Nov 2025), verify it loads in your LM Studio version before switching.
114
+ - **Context length:** set it to 16k+ in LM Studio the ARIA-snapshot grounding step can send large prompts, and the default 4k will silently truncate.
111
115
  - **Semantic matching:** load an embedding model (e.g. `text-embedding-nomic-embed-text-v1.5`) and set `embeddingModel` to let paraphrased descriptions ("sign in" vs "log in") resolve without vision.
112
116
 
113
117
  ### Option B: Google Gemini API (cloud, fast, better vision)
@@ -193,6 +197,55 @@ await d.$('~emailField', { ai: 'Email input' }).setValue('test@test.com')
193
197
 
194
198
  Works with Flutter, React Native, native Android/iOS — any Appium driver.
195
199
 
200
+ ## TypeScript
201
+
202
+ Type definitions are bundled — there is no `@types/elementus-ai` package to install and no `declare module` shim to write. Because `@playwright/test` is an *optional* peer, WDIO/Appium-only projects can use the types without installing Playwright.
203
+
204
+ ```ts
205
+ import { createElementus, type ElementusPage } from 'elementus-ai'
206
+ ```
207
+
208
+ **Typed Playwright fixture.** `wrapPage` changes the page's runtime value but not its static type, so override the `page` fixture's type with `ElementusPage` — then `{ ai }` is recognized and autocompleted (with docs) in your tests:
209
+
210
+ ```ts
211
+ // fixtures.ts
212
+ import { test as base, expect } from '@playwright/test'
213
+ import { createElementus, type ElementusPage } from 'elementus-ai'
214
+
215
+ const el = createElementus({ provider: 'gemini', geminiApiKey: process.env.GEMINI_API_KEY })
216
+
217
+ export const test = base.extend<{ page: ElementusPage }>({
218
+ page: async ({ page }, use) => {
219
+ await use(el.wrapPage(page))
220
+ },
221
+ })
222
+ export { expect }
223
+
224
+ // In tests — page is already wrapped and typed:
225
+ test('example', async ({ page }) => {
226
+ await page.locator('#btn', { ai: 'Submit button' }).click() // { ai } type-checks
227
+ await page.locator('#btn').click() // plain locator, zero overhead
228
+ })
229
+ ```
230
+
231
+ > The override is for editor support — IntelliSense and inline docs for `{ ai }`. It heals at runtime either way, and because Playwright's `locator()` options are permissive, `{ ai }` compiles with or without the override; the override just surfaces it as a documented option.
232
+
233
+ **Page Object Model.** Type the page your objects receive as `ElementusPage`:
234
+
235
+ ```ts
236
+ import { type ElementusPage } from 'elementus-ai'
237
+
238
+ abstract class BasePage {
239
+ constructor(protected readonly page: ElementusPage) {}
240
+ }
241
+
242
+ class LoginPage extends BasePage {
243
+ readonly submit = this.page.locator('#submit', { ai: 'Submit button' })
244
+ }
245
+ ```
246
+
247
+ **Exported types:** `ElementusOptions`, `Elementus`, `ElementusPage`, `AiLocatorOptions`. `AiLocatorOptions` is Playwright's own `locator()` option type plus `ai?: string`, derived from the installed Playwright version so it never drifts.
248
+
196
249
  ## API Reference
197
250
 
198
251
  ### `el.wrapPage(page)`
@@ -243,7 +296,7 @@ createElementus({
243
296
 
244
297
  // LM Studio
245
298
  lmStudioUrl: 'http://localhost:1234/v1/chat/completions',
246
- model: 'gemma-4-26b-a4b-it',
299
+ model: 'holo-3.1-9b',
247
300
 
248
301
  // Gemini
249
302
  geminiApiKey: null, // or GEMINI_API_KEY env var
package/elementus.js CHANGED
@@ -20,13 +20,13 @@
20
20
  *
21
21
  * Option A — Local LLM via LM Studio (free, private, no API key):
22
22
  * 1. Download LM Studio from https://lmstudio.ai
23
- * 2. Load a vision-capable model (e.g., gemma-4-26b-a4b-it)
23
+ * 2. Load a vision-capable model (recommended: holo-3.1-9b, a GUI-grounding model)
24
24
  * 3. Start the local server (default: http://localhost:1234)
25
25
  * 4. Configure:
26
26
  * const el = createElementus({
27
27
  * provider: 'lmstudio',
28
28
  * lmStudioUrl: 'http://localhost:1234/v1/chat/completions',
29
- * model: 'gemma-4-26b-a4b-it',
29
+ * model: 'holo-3.1-9b',
30
30
  * })
31
31
  *
32
32
  * Option B — Google Gemini API (cloud, fast, better vision):
@@ -158,7 +158,7 @@
158
158
  *
159
159
  * // LM Studio (when provider = 'lmstudio')
160
160
  * lmStudioUrl: 'http://localhost:1234/v1/chat/completions',
161
- * model: 'gemma-4-26b-a4b-it',
161
+ * model: 'holo-3.1-9b',
162
162
  *
163
163
  * // Gemini (when provider = 'gemini')
164
164
  * geminiApiKey: null, // or GEMINI_API_KEY env var
@@ -287,7 +287,7 @@ const path = require('path')
287
287
  const DEFAULTS = {
288
288
  provider: 'lmstudio',
289
289
  lmStudioUrl: 'http://localhost:1234/v1/chat/completions',
290
- model: 'gemma-4-26b-a4b-it',
290
+ model: 'holo-3.1-9b',
291
291
  geminiApiKey: null,
292
292
  geminiModel: 'gemini-3.5-flash',
293
293
  maxCandidates: 20,
@@ -367,7 +367,7 @@ const REGION_LABELS = [
367
367
  * @param {Object} userConfig
368
368
  * @param {'lmstudio'|'gemini'} [userConfig.provider='lmstudio'] - LLM provider
369
369
  * @param {string} [userConfig.lmStudioUrl='http://localhost:1234/v1/chat/completions'] - LM Studio endpoint
370
- * @param {string} [userConfig.model='gemma-4-26b-a4b-it'] - LM Studio model name
370
+ * @param {string} [userConfig.model='holo-3.1-9b'] - LM Studio model name
371
371
  * @param {string|null} [userConfig.geminiApiKey=null] - Google Gemini API key (or GEMINI_API_KEY env var)
372
372
  * @param {string} [userConfig.geminiModel='gemini-3.5-flash'] - Gemini model ID
373
373
  * @param {number} [userConfig.maxCandidates=20] - max elements sent to LLM for disambiguation
package/index.d.ts ADDED
@@ -0,0 +1,81 @@
1
+ // Type definitions for elementus-ai
2
+ // Project: https://github.com/Morph93/elementus
3
+ //
4
+ // Self-healing element resolution for Playwright, WebdriverIO & Appium.
5
+ // These types describe the Playwright/core API. WebdriverIO's global `$`
6
+ // augmentation lives in the separate, opt-in `wdio.d.ts`.
7
+ //
8
+ // `@playwright/test` is an OPTIONAL peer dependency. The `@ts-ignore` below lets
9
+ // WDIO/Appium-only consumers (who have no Playwright installed) fall back to
10
+ // `any` for these types instead of failing module resolution.
11
+ // @ts-ignore -- optional peer dependency
12
+ import type { Page, Locator } from '@playwright/test'
13
+
14
+ export interface ElementusOptions {
15
+ /** LLM provider. @default 'lmstudio' */
16
+ provider?: 'lmstudio' | 'gemini'
17
+ /** LM Studio chat-completions endpoint. @default 'http://localhost:1234/v1/chat/completions' */
18
+ lmStudioUrl?: string
19
+ /** LM Studio model name. @default 'holo-3.1-9b' */
20
+ model?: string
21
+ /** Google Gemini API key (or set the GEMINI_API_KEY env var). @default null */
22
+ geminiApiKey?: string | null
23
+ /** Gemini model id. @default 'gemini-3.5-flash' */
24
+ geminiModel?: string
25
+ /** Max elements sent to the LLM for disambiguation. @default 20 */
26
+ maxCandidates?: number
27
+ /** Save debug screenshots to `debugDir`. @default false */
28
+ debug?: boolean
29
+ /** Directory for debug screenshots (required when `debug` is true). @default null */
30
+ debugDir?: string | null
31
+ /** Custom stop words to ignore in descriptions (replaces the defaults). @default null */
32
+ stopWords?: Set<string> | null
33
+ /** Max screenshot width (px) sent to the vision LLM. @default 1280 */
34
+ visionMaxWidth?: number
35
+ /** Opt-in fingerprint cache file, e.g. './elementus-cache.json'. @default null */
36
+ cacheFile?: string | null
37
+ /** Opt-in embedding model for semantic paraphrase matching. @default null */
38
+ embeddingModel?: string | null
39
+ }
40
+
41
+ /**
42
+ * Playwright's own `locator()` options, plus the Elementus `ai` hint.
43
+ * Derived from the installed Playwright types so it never drifts.
44
+ */
45
+ export type AiLocatorOptions = NonNullable<Parameters<Page['locator']>[1]> & {
46
+ /** Natural-language description; the self-healing fallback used when `selector` breaks. */
47
+ ai?: string
48
+ }
49
+
50
+ /**
51
+ * A Playwright Page whose `locator()` also accepts `{ ai }`. Locators created
52
+ * with an `ai` hint self-heal when the selector breaks; locators without it are
53
+ * returned unchanged (zero overhead).
54
+ */
55
+ export type ElementusPage = Page & {
56
+ locator(selector: string, options?: AiLocatorOptions): Locator
57
+ }
58
+
59
+ export interface Elementus {
60
+ /**
61
+ * Wrap a Playwright Page so `page.locator(selector, { ai })` self-heals.
62
+ * Call once per test, or in a fixture for the whole suite.
63
+ */
64
+ wrapPage(page: Page): ElementusPage
65
+ /**
66
+ * Wrap a WebdriverIO/Appium browser so `$(selector, { ai })` self-heals.
67
+ * Returns the same object it was given (now AI-aware).
68
+ */
69
+ wrapBrowser<T>(browser: T): T
70
+ /** Try `locator` first; fall back to AI resolution if it fails. */
71
+ locate(ctx: Page, locator: Locator, description: string): Promise<Locator>
72
+ /** Resolve an element from a natural-language description alone. */
73
+ find(ctx: Page, description: string): Promise<Locator>
74
+ /** Click with an optimized fallback (goto for links, JS click for buttons). */
75
+ click(ctx: Page, locator: Locator, description: string): Promise<void>
76
+ /** Low-level: wrap a single locator with AI fallback. Prefer wrapPage(). */
77
+ wrap(ctx: Page, locator: Locator, description: string): Locator
78
+ }
79
+
80
+ /** Create an Elementus instance with the given configuration. */
81
+ export function createElementus(options?: ElementusOptions): Elementus
package/package.json CHANGED
@@ -1,8 +1,9 @@
1
1
  {
2
2
  "name": "elementus-ai",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "Self-healing element resolution for Playwright, WDIO & Appium. AI-powered fallback when selectors break.",
5
5
  "main": "elementus.js",
6
+ "types": "index.d.ts",
6
7
  "scripts": {
7
8
  "test": "playwright test test/playwright.spec.js",
8
9
  "test:smoke": "playwright test test/playwright.spec.js -g \"T01 |T02 |T09 |T17 |T23 \""
@@ -51,8 +52,10 @@
51
52
  },
52
53
  "files": [
53
54
  "elementus.js",
55
+ "index.d.ts",
54
56
  "wdio.d.ts",
55
57
  "README.md",
58
+ "CHANGELOG.md",
56
59
  "LICENSE"
57
60
  ]
58
61
  }