appium-mcp 1.79.1 → 1.81.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +6 -0
- package/dist/core.d.ts +2 -0
- package/dist/core.d.ts.map +1 -1
- package/dist/core.js +1 -0
- package/dist/core.js.map +1 -1
- package/dist/create-server.d.ts +8 -0
- package/dist/create-server.d.ts.map +1 -1
- package/dist/create-server.js +3 -1
- package/dist/create-server.js.map +1 -1
- package/dist/policy.d.ts +17 -0
- package/dist/policy.d.ts.map +1 -0
- package/dist/policy.js +137 -0
- package/dist/policy.js.map +1 -0
- package/dist/scripts/eval-documentation-rag.d.ts +44 -17
- package/dist/scripts/eval-documentation-rag.d.ts.map +1 -1
- package/dist/scripts/eval-documentation-rag.js +181 -106
- package/dist/scripts/eval-documentation-rag.js.map +1 -1
- package/dist/scripts/rag-eval-dataset.json +337 -39
- package/dist/tests/create-server.test.js +230 -1
- package/dist/tests/create-server.test.js.map +1 -1
- package/dist/tests/policy.test.d.ts +2 -0
- package/dist/tests/policy.test.d.ts.map +1 -0
- package/dist/tests/policy.test.js +44 -0
- package/dist/tests/policy.test.js.map +1 -0
- package/dist/tools/documentation/markdown-header-splitter.d.ts.map +1 -1
- package/dist/tools/documentation/markdown-header-splitter.js +67 -67
- package/dist/tools/documentation/markdown-header-splitter.js.map +1 -1
- package/package.json +1 -2
- package/server.json +2 -2
- package/src/core.ts +7 -0
- package/src/create-server.ts +12 -0
- package/src/policy.ts +230 -0
- package/src/resources/submodules.zip +0 -0
- package/src/scripts/eval-documentation-rag.ts +261 -140
- package/src/scripts/rag-eval-dataset.json +337 -39
- package/src/tools/documentation/markdown-header-splitter.ts +82 -82
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
## [1.81.0](https://github.com/appium/appium-mcp/compare/v1.80.0...v1.81.0) (2026-05-29)
|
|
2
|
+
|
|
3
|
+
### Features
|
|
4
|
+
|
|
5
|
+
* add lightweight allowlist policy ([#369](https://github.com/appium/appium-mcp/issues/369)) ([2a52174](https://github.com/appium/appium-mcp/commit/2a52174cea59d02333c0af86cf3faa5a4ccf8715))
|
|
6
|
+
|
|
7
|
+
## [1.80.0](https://github.com/appium/appium-mcp/compare/v1.79.1...v1.80.0) (2026-05-28)
|
|
8
|
+
|
|
9
|
+
### Features
|
|
10
|
+
|
|
11
|
+
* **rag:** update evals to use answer spans ([#371](https://github.com/appium/appium-mcp/issues/371)) ([e59ea55](https://github.com/appium/appium-mcp/commit/e59ea5597eff268021bb588312931a5d8ceef090))
|
|
12
|
+
|
|
1
13
|
## [1.79.1](https://github.com/appium/appium-mcp/compare/v1.79.0...v1.79.1) (2026-05-28)
|
|
2
14
|
|
|
3
15
|
### Bug Fixes
|
package/README.md
CHANGED
|
@@ -341,6 +341,8 @@ HTTP and streamable MCP clients may **disconnect briefly** (reconnect, reload, p
|
|
|
341
341
|
|
|
342
342
|
Use `appium-mcp/core` to compose the default Appium MCP server with custom business logic without maintaining a fork. Plugins can register MCP tools, prompts, resources, and resource templates, and can wrap tool execution with lifecycle hooks.
|
|
343
343
|
|
|
344
|
+
`createAppiumMcpServer({ policy })` can also hide nonmatching tools and resources from MCP discovery. Policy rules are regular expressions matched against tool and resource names exactly as registered. The policy is applied at registration time to both single and batch registration methods. Resource policy matches the resource `name` only; resources or resource templates without a string `name` cannot match a non-empty `allowResources` list.
|
|
345
|
+
|
|
344
346
|
```ts
|
|
345
347
|
import { createAppiumMcpServer } from 'appium-mcp/core';
|
|
346
348
|
import type {
|
|
@@ -375,6 +377,10 @@ class CheckoutPlugin implements AppiumMcpPlugin {
|
|
|
375
377
|
const server = createAppiumMcpServer({
|
|
376
378
|
plugins: [new CheckoutPlugin()],
|
|
377
379
|
additionalInstructions: 'Custom checkout policies are active.',
|
|
380
|
+
policy: {
|
|
381
|
+
allowTools: [/^appium_session_management$/, /^assert_checkout_summary$/],
|
|
382
|
+
allowResources: [/^Generate Code With Locators$/],
|
|
383
|
+
},
|
|
378
384
|
});
|
|
379
385
|
|
|
380
386
|
await server.start({ transportType: 'stdio' });
|
package/dist/core.d.ts
CHANGED
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
*/
|
|
8
8
|
export { createAppiumMcpServer } from './create-server.js';
|
|
9
9
|
export type { CreateAppiumMcpServerOptions } from './create-server.js';
|
|
10
|
+
export { evaluatePolicyTarget } from './policy.js';
|
|
11
|
+
export type { AppiumMcpPolicy, PolicyDecision, PolicyDecisionReason, PolicyTargetKind, } from './policy.js';
|
|
10
12
|
export { AppiumMcpCore, formatVerificationReport, McpRegistry, PluginManager, verifyAppiumMcpNames, } from './plugin.js';
|
|
11
13
|
export type { AppiumMcpPlugin, PluginContext, PluginSessionContext, ToolCallContext, ToolCallResult, VerificationDuplicate, VerificationDuplicateKind, VerificationEntry, VerificationReport, VerifyAppiumMcpNamesOptions, } from './plugin.js';
|
|
12
14
|
//# sourceMappingURL=core.d.ts.map
|
package/dist/core.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"core.d.ts","sourceRoot":"","sources":["../src/core.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAC3D,YAAY,EAAE,4BAA4B,EAAE,MAAM,oBAAoB,CAAC;AACvE,OAAO,EACL,aAAa,EACb,wBAAwB,EACxB,WAAW,EACX,aAAa,EACb,oBAAoB,GACrB,MAAM,aAAa,CAAC;AACrB,YAAY,EACV,eAAe,EACf,aAAa,EACb,oBAAoB,EACpB,eAAe,EACf,cAAc,EACd,qBAAqB,EACrB,yBAAyB,EACzB,iBAAiB,EACjB,kBAAkB,EAClB,2BAA2B,GAC5B,MAAM,aAAa,CAAC"}
|
|
1
|
+
{"version":3,"file":"core.d.ts","sourceRoot":"","sources":["../src/core.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAC3D,YAAY,EAAE,4BAA4B,EAAE,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,YAAY,EACV,eAAe,EACf,cAAc,EACd,oBAAoB,EACpB,gBAAgB,GACjB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,aAAa,EACb,wBAAwB,EACxB,WAAW,EACX,aAAa,EACb,oBAAoB,GACrB,MAAM,aAAa,CAAC;AACrB,YAAY,EACV,eAAe,EACf,aAAa,EACb,oBAAoB,EACpB,eAAe,EACf,cAAc,EACd,qBAAqB,EACrB,yBAAyB,EACzB,iBAAiB,EACjB,kBAAkB,EAClB,2BAA2B,GAC5B,MAAM,aAAa,CAAC"}
|
package/dist/core.js
CHANGED
|
@@ -6,5 +6,6 @@
|
|
|
6
6
|
* hooks.
|
|
7
7
|
*/
|
|
8
8
|
export { createAppiumMcpServer } from './create-server.js';
|
|
9
|
+
export { evaluatePolicyTarget } from './policy.js';
|
|
9
10
|
export { AppiumMcpCore, formatVerificationReport, McpRegistry, PluginManager, verifyAppiumMcpNames, } from './plugin.js';
|
|
10
11
|
//# sourceMappingURL=core.js.map
|
package/dist/core.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"core.js","sourceRoot":"","sources":["../src/core.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAE3D,OAAO,EACL,aAAa,EACb,wBAAwB,EACxB,WAAW,EACX,aAAa,EACb,oBAAoB,GACrB,MAAM,aAAa,CAAC"}
|
|
1
|
+
{"version":3,"file":"core.js","sourceRoot":"","sources":["../src/core.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAE3D,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAOnD,OAAO,EACL,aAAa,EACb,wBAAwB,EACxB,WAAW,EACX,aAAa,EACb,oBAAoB,GACrB,MAAM,aAAa,CAAC"}
|
package/dist/create-server.d.ts
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
*/
|
|
15
15
|
import { FastMCP } from 'fastmcp';
|
|
16
16
|
import type { AppiumMcpPlugin } from './plugin.js';
|
|
17
|
+
import { type AppiumMcpPolicy } from './policy.js';
|
|
17
18
|
export interface CreateAppiumMcpServerOptions {
|
|
18
19
|
/**
|
|
19
20
|
* List of plugins to register with the server.
|
|
@@ -35,6 +36,13 @@ export interface CreateAppiumMcpServerOptions {
|
|
|
35
36
|
* Additional instructions appended to the default SERVER_INSTRUCTIONS.
|
|
36
37
|
*/
|
|
37
38
|
additionalInstructions?: string;
|
|
39
|
+
/**
|
|
40
|
+
* Lightweight allowlist policy for registered tools and resources.
|
|
41
|
+
*
|
|
42
|
+
* Empty allowlists allow all matching target kinds. Non-empty allowlists hide
|
|
43
|
+
* nonmatching capabilities from discovery by skipping registration.
|
|
44
|
+
*/
|
|
45
|
+
policy?: AppiumMcpPolicy;
|
|
38
46
|
}
|
|
39
47
|
/**
|
|
40
48
|
* Factory function that creates and wires a fully-configured Appium MCP server,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"create-server.d.ts","sourceRoot":"","sources":["../src/create-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAOlC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"create-server.d.ts","sourceRoot":"","sources":["../src/create-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAOlC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAiB,KAAK,eAAe,EAAE,MAAM,aAAa,CAAC;AAalE,MAAM,WAAW,4BAA4B;IAC3C;;;;OAIG;IACH,OAAO,CAAC,EAAE,eAAe,EAAE,CAAC;IAE5B;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB;;;OAGG;IACH,aAAa,CAAC,EAAE,GAAG,MAAM,IAAI,MAAM,IAAI,MAAM,EAAE,CAAC;IAEhD;;OAEG;IACH,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAEhC;;;;;OAKG;IACH,MAAM,CAAC,EAAE,eAAe,CAAC;CAC1B;AAQD;;;;;;;;;GASG;AACH,wBAAgB,qBAAqB,CACnC,OAAO,GAAE,4BAAiC,GACzC,OAAO,CAkKT"}
|
package/dist/create-server.js
CHANGED
|
@@ -19,6 +19,7 @@ import registerResources from './resources/index.js';
|
|
|
19
19
|
import { safeDeleteAllSessions, listSessions } from './session-store.js';
|
|
20
20
|
import log from './logger.js';
|
|
21
21
|
import { PluginManager } from './plugin.js';
|
|
22
|
+
import { installPolicy } from './policy.js';
|
|
22
23
|
const SERVER_VERSION = pkg.version;
|
|
23
24
|
const SERVER_INSTRUCTIONS = [
|
|
24
25
|
'Appium mobile automation through MCP. Defaults that avoid broken flows:',
|
|
@@ -42,7 +43,7 @@ const SERVER_INSTRUCTIONS = [
|
|
|
42
43
|
* @returns A configured `FastMCP` instance ready to be `start()`-ed.
|
|
43
44
|
*/
|
|
44
45
|
export function createAppiumMcpServer(options = {}) {
|
|
45
|
-
const { plugins = [], serverName = 'MCP Appium', serverVersion = SERVER_VERSION, additionalInstructions, } = options;
|
|
46
|
+
const { plugins = [], serverName = 'MCP Appium', serverVersion = SERVER_VERSION, additionalInstructions, policy, } = options;
|
|
46
47
|
const instructions = additionalInstructions
|
|
47
48
|
? `${SERVER_INSTRUCTIONS}\n${additionalInstructions}`
|
|
48
49
|
: SERVER_INSTRUCTIONS;
|
|
@@ -51,6 +52,7 @@ export function createAppiumMcpServer(options = {}) {
|
|
|
51
52
|
version: serverVersion,
|
|
52
53
|
instructions,
|
|
53
54
|
});
|
|
55
|
+
installPolicy(server, policy);
|
|
54
56
|
// -------------------------------------------------------------------------
|
|
55
57
|
// 1. Install plugin hooks BEFORE registering any tools so that every built-in
|
|
56
58
|
// and plugin tool is wrapped with beforeCall / afterCall.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"create-server.js","sourceRoot":"","sources":["../src/create-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,GAAG,MAAM,iBAAiB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AACxD,OAAO,aAAa,MAAM,kBAAkB,CAAC;AAC7C,OAAO,iBAAiB,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,GAAG,MAAM,aAAa,CAAC;AAC9B,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"create-server.js","sourceRoot":"","sources":["../src/create-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,GAAG,MAAM,iBAAiB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AACxD,OAAO,aAAa,MAAM,kBAAkB,CAAC;AAC7C,OAAO,iBAAiB,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,GAAG,MAAM,aAAa,CAAC;AAC9B,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,OAAO,EAAE,aAAa,EAAwB,MAAM,aAAa,CAAC;AAElE,MAAM,cAAc,GAAG,GAAG,CAAC,OAA0C,CAAC;AAEtE,MAAM,mBAAmB,GAAG;IAC1B,yEAAyE;IACzE,6LAA6L;IAC7L,+JAA+J;IAC/J,8GAA8G;IAC9G,oJAAoJ;IACpJ,iGAAiG;CAClG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAsCb,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E;;;;;;;;;GASG;AACH,MAAM,UAAU,qBAAqB,CACnC,UAAwC,EAAE;IAE1C,MAAM,EACJ,OAAO,GAAG,EAAE,EACZ,UAAU,GAAG,YAAY,EACzB,aAAa,GAAG,cAAc,EAC9B,sBAAsB,EACtB,MAAM,GACP,GAAG,OAAO,CAAC;IAEZ,MAAM,YAAY,GAAG,sBAAsB;QACzC,CAAC,CAAC,GAAG,mBAAmB,KAAK,sBAAsB,EAAE;QACrD,CAAC,CAAC,mBAAmB,CAAC;IAExB,MAAM,MAAM,GAAG,IAAI,OAAO,CAAC;QACzB,IAAI,EAAE,UAAU;QAChB,OAAO,EAAE,aAAa;QACtB,YAAY;KACb,CAAC,CAAC;IAEH,aAAa,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE9B,4EAA4E;IAC5E,8EAA8E;IAC9E,6DAA6D;IAC7D,4EAA4E;IAC5E,MAAM,OAAO,GAAG,IAAI,aAAa,CAAC,MAAM,CAAC,CAAC;IAC1C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAC5B,CAAC;IAED,4EAA4E;IAC5E,4EAA4E;IAC5E,8DAA8D;IAC9D,4EAA4E;IAC5E,OAAO,CAAC,0BAA0B,EAAE,CAAC;IAErC,4EAA4E;IAC5E,uDAAuD;IACvD,4EAA4E;IAC5E,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC1B,aAAa,CAAC,MAAM,CAAC,CAAC;IAEtB,4EAA4E;IAC5E,4EAA4E;IAC5E,yDAAyD;IACzD,4EAA4E;IAC5E,IAAI,iBAAiB,GAAG,CAAC,CAAC;IAC1B,IAAI,iBAAiB,GAAG,KAAK,CAAC;IAE9B,gFAAgF;IAChF,uDAAuD;IACvD,IAAI,uBAAuB,GAAyB,IAAI,CAAC;IACzD,IAAI,oBAAoB,GAAyB,IAAI,CAAC;IAEtD;;;;;OAKG;IACH,MAAM,wBAAwB,GAAG,KAAK,IAAmB,EAAE;QACzD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;QACT,CAAC;QAED,IAAI,oBAAoB,IAAI,IAAI,EAAE,CAAC;YACjC,MAAM,oBAAoB,CAAC;QAC7B,CAAC;QAED,IAAI,iBAAiB,EAAE,CAAC;YACtB,OAAO;QACT,CAAC;QAED,uBAAuB,KAAK,CAAC,KAAK,IAAI,EAAE;YACtC,IAAI,CAAC;gBACH,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;gBAC3B,iBAAiB,GAAG,IAAI,CAAC;YAC3B,CAAC;oBAAS,CAAC;gBACT,uBAAuB,GAAG,IAAI,CAAC;YACjC,CAAC;QACH,CAAC,CAAC,EAAE,CAAC;QAEL,MAAM,uBAAuB,CAAC;IAChC,CAAC,CAAC;IAEF;;;OAGG;IACH,MAAM,oBAAoB,GAAG,KAAK,IAAmB,EAAE;QACrD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;QACT,CAAC;QAED,IAAI,uBAAuB,IAAI,IAAI,EAAE,CAAC;YACpC,MAAM,uBAAuB,CAAC;QAChC,CAAC;QAED,IAAI,iBAAiB,GAAG,CAAC,IAAI,CAAC,iBAAiB,EAAE,CAAC;YAChD,OAAO;QACT,CAAC;QAED,oBAAoB,KAAK,CAAC,KAAK,IAAI,EAAE;YACnC,IAAI,CAAC;gBACH,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;gBACxB,iBAAiB,GAAG,KAAK,CAAC;YAC5B,CAAC;oBAAS,CAAC;gBACT,oBAAoB,GAAG,IAAI,CAAC;YAC9B,CAAC;QACH,CAAC,CAAC,EAAE,CAAC;QAEL,MAAM,oBAAoB,CAAC;IAC7B,CAAC,CAAC;IAEF,4EAA4E;IAC5E,iDAAiD;IACjD,4EAA4E;IAC5E,MAAM,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE;QACnC,GAAG,CAAC,IAAI,CAAC,mBAAmB,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QAC7C,iBAAiB,IAAI,CAAC,CAAC;QAEvB,kDAAkD;QAClD,MAAM,wBAAwB,EAAE,CAAC;IACnC,CAAC,CAAC,CAAC;IAEH,MAAM,CAAC,EAAE,CAAC,YAAY,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE;QACtC,GAAG,CAAC,IAAI,CAAC,sBAAsB,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QAChD,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,iBAAiB,GAAG,CAAC,CAAC,CAAC;QAEvD,IAAI,iBAAiB,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO;QACT,CAAC;QAED,MAAM,MAAM,GAAG,8BAA8B,EAAE,CAAC;QAChD,MAAM,aAAa,GAAG,YAAY,EAAE,CAAC,MAAM,CACzC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,SAAS,KAAK,OAAO,CAC3C,CAAC;QAEF,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;YAClD,GAAG,CAAC,IAAI,CACN,GAAG,aAAa,CAAC,MAAM,kDAAkD;gBACvE,yCAAyC,CAC5C,CAAC;QACJ,CAAC;aAAM,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC;gBACH,GAAG,CAAC,IAAI,CACN,GAAG,aAAa,CAAC,MAAM,0DAA0D,CAClF,CAAC;gBACF,MAAM,YAAY,GAAG,MAAM,qBAAqB,EAAE,CAAC;gBACnD,GAAG,CAAC,IAAI,CACN,GAAG,YAAY,oDAAoD,CACpE,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,GAAG,CAAC,KAAK,CAAC,0CAA0C,EAAE,KAAK,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;QAED,wDAAwD;QACxD,MAAM,oBAAoB,EAAE,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,8BAA8B;IACrC,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,+BAA+B,EAAE,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC9E,IAAI,GAAG,KAAK,MAAM,EAAE,CAAC;QACnB,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,GAAG,KAAK,YAAY,EAAE,CAAC;QACzB,GAAG,CAAC,IAAI,CACN,oCAAoC,GAAG,+CAA+C,CACvF,CAAC;IACJ,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC"}
|
package/dist/policy.d.ts
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { FastMCP } from 'fastmcp';
|
|
2
|
+
export interface AppiumMcpPolicy {
|
|
3
|
+
allowTools?: readonly RegExp[];
|
|
4
|
+
allowResources?: readonly RegExp[];
|
|
5
|
+
}
|
|
6
|
+
export type PolicyTargetKind = 'tool' | 'resource';
|
|
7
|
+
export type PolicyDecisionReason = 'empty_allowlist' | 'matched_allowlist' | 'not_in_allowlist';
|
|
8
|
+
export interface PolicyDecision {
|
|
9
|
+
allowed: boolean;
|
|
10
|
+
reason: PolicyDecisionReason;
|
|
11
|
+
targetKind: PolicyTargetKind;
|
|
12
|
+
target: string;
|
|
13
|
+
matchedRule?: string;
|
|
14
|
+
}
|
|
15
|
+
export declare function evaluatePolicyTarget(policy: AppiumMcpPolicy | undefined, targetKind: PolicyTargetKind, target: string): PolicyDecision;
|
|
16
|
+
export declare function installPolicy(server: FastMCP, policy?: AppiumMcpPolicy): void;
|
|
17
|
+
//# sourceMappingURL=policy.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"policy.d.ts","sourceRoot":"","sources":["../src/policy.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAGvC,MAAM,WAAW,eAAe;IAC9B,UAAU,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAC/B,cAAc,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;CACpC;AAED,MAAM,MAAM,gBAAgB,GAAG,MAAM,GAAG,UAAU,CAAC;AAEnD,MAAM,MAAM,oBAAoB,GAC5B,iBAAiB,GACjB,mBAAmB,GACnB,kBAAkB,CAAC;AAEvB,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,oBAAoB,CAAC;IAC7B,UAAU,EAAE,gBAAgB,CAAC;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAUD,wBAAgB,oBAAoB,CAClC,MAAM,EAAE,eAAe,GAAG,SAAS,EACnC,UAAU,EAAE,gBAAgB,EAC5B,MAAM,EAAE,MAAM,GACb,cAAc,CA+BhB;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,EAAE,eAAe,GAAG,IAAI,CA0E7E"}
|
package/dist/policy.js
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import log from './logger.js';
|
|
2
|
+
export function evaluatePolicyTarget(policy, targetKind, target) {
|
|
3
|
+
const allowlist = targetKind === 'tool' ? policy?.allowTools : policy?.allowResources;
|
|
4
|
+
if (!allowlist || allowlist.length === 0) {
|
|
5
|
+
return {
|
|
6
|
+
allowed: true,
|
|
7
|
+
reason: 'empty_allowlist',
|
|
8
|
+
targetKind,
|
|
9
|
+
target,
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
for (const rule of allowlist) {
|
|
13
|
+
if (matchesRule(rule, target)) {
|
|
14
|
+
return {
|
|
15
|
+
allowed: true,
|
|
16
|
+
reason: 'matched_allowlist',
|
|
17
|
+
targetKind,
|
|
18
|
+
target,
|
|
19
|
+
matchedRule: rule.toString(),
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return {
|
|
24
|
+
allowed: false,
|
|
25
|
+
reason: 'not_in_allowlist',
|
|
26
|
+
targetKind,
|
|
27
|
+
target,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
export function installPolicy(server, policy) {
|
|
31
|
+
if (!policy) {
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
validatePolicy(policy);
|
|
35
|
+
const originalAddTool = server.addTool.bind(server);
|
|
36
|
+
server.addTool = ((toolDef) => {
|
|
37
|
+
if (!isToolAllowed(policy, toolDef)) {
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
return originalAddTool(toolDef);
|
|
41
|
+
});
|
|
42
|
+
const originalAddTools = server.addTools.bind(server);
|
|
43
|
+
server.addTools = ((toolDefs) => {
|
|
44
|
+
const allowedToolDefs = toolDefs.filter((toolDef) => isToolAllowed(policy, toolDef));
|
|
45
|
+
if (allowedToolDefs.length === 0) {
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
return originalAddTools(allowedToolDefs);
|
|
49
|
+
});
|
|
50
|
+
const originalAddResource = server.addResource.bind(server);
|
|
51
|
+
server.addResource = ((resourceDef) => {
|
|
52
|
+
if (!isResourceAllowed(policy, resourceDef, 'resource')) {
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
return originalAddResource(resourceDef);
|
|
56
|
+
});
|
|
57
|
+
const originalAddResources = server.addResources.bind(server);
|
|
58
|
+
server.addResources = ((resourceDefs) => {
|
|
59
|
+
const allowedResourceDefs = resourceDefs.filter((resourceDef) => isResourceAllowed(policy, resourceDef, 'resource'));
|
|
60
|
+
if (allowedResourceDefs.length === 0) {
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
return originalAddResources(allowedResourceDefs);
|
|
64
|
+
});
|
|
65
|
+
const originalAddResourceTemplate = server.addResourceTemplate.bind(server);
|
|
66
|
+
server.addResourceTemplate = ((resourceTemplateDef) => {
|
|
67
|
+
if (!isResourceAllowed(policy, resourceTemplateDef, 'resource template')) {
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
return originalAddResourceTemplate(resourceTemplateDef);
|
|
71
|
+
});
|
|
72
|
+
const originalAddResourceTemplates = server.addResourceTemplates.bind(server);
|
|
73
|
+
server.addResourceTemplates = ((resourceTemplateDefs) => {
|
|
74
|
+
const allowedResourceTemplateDefs = resourceTemplateDefs.filter((resourceTemplateDef) => isResourceAllowed(policy, resourceTemplateDef, 'resource template'));
|
|
75
|
+
if (allowedResourceTemplateDefs.length === 0) {
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
return originalAddResourceTemplates(allowedResourceTemplateDefs);
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
function isToolAllowed(policy, toolDef) {
|
|
82
|
+
const decision = evaluatePolicyTarget(policy, 'tool', toolDef.name);
|
|
83
|
+
if (!decision.allowed) {
|
|
84
|
+
log.warn(`Policy denied tool registration: ${formatPolicyTargetForLog(decision.target)} (${decision.reason})`);
|
|
85
|
+
}
|
|
86
|
+
return decision.allowed;
|
|
87
|
+
}
|
|
88
|
+
function isResourceAllowed(policy, resourceDef, label) {
|
|
89
|
+
const target = readResourceName(resourceDef);
|
|
90
|
+
const decision = evaluatePolicyTarget(policy, 'resource', target);
|
|
91
|
+
if (!decision.allowed) {
|
|
92
|
+
log.warn(`Policy denied ${label} registration: ${formatResourceTargetForLog(resourceDef, decision.target)} (${decision.reason})`);
|
|
93
|
+
}
|
|
94
|
+
return decision.allowed;
|
|
95
|
+
}
|
|
96
|
+
function matchesRule(rule, target) {
|
|
97
|
+
return new RegExp(rule.source, rule.flags).test(target);
|
|
98
|
+
}
|
|
99
|
+
function validatePolicy(policy) {
|
|
100
|
+
validateAllowlist(policy.allowTools, 'policy.allowTools');
|
|
101
|
+
validateAllowlist(policy.allowResources, 'policy.allowResources');
|
|
102
|
+
}
|
|
103
|
+
function validateAllowlist(allowlist, label) {
|
|
104
|
+
if (allowlist === undefined) {
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
if (!Array.isArray(allowlist)) {
|
|
108
|
+
throw new TypeError(`${label} must be an array of RegExp values`);
|
|
109
|
+
}
|
|
110
|
+
for (const rule of allowlist) {
|
|
111
|
+
if (!(rule instanceof RegExp)) {
|
|
112
|
+
throw new TypeError(`${label} must contain only RegExp values`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function formatPolicyTargetForLog(target) {
|
|
117
|
+
return target.length > 0 ? target : '<unnamed>';
|
|
118
|
+
}
|
|
119
|
+
function formatResourceTargetForLog(resourceDef, target) {
|
|
120
|
+
const label = formatPolicyTargetForLog(target);
|
|
121
|
+
const identifiers = readResourceLogIdentifiers(resourceDef);
|
|
122
|
+
return identifiers.length > 0 ? `${label}; ${identifiers.join('; ')}` : label;
|
|
123
|
+
}
|
|
124
|
+
function readResourceLogIdentifiers(resourceDef) {
|
|
125
|
+
const identifiers = [];
|
|
126
|
+
if ('uri' in resourceDef && resourceDef.uri.length > 0) {
|
|
127
|
+
identifiers.push(`uri=${resourceDef.uri}`);
|
|
128
|
+
}
|
|
129
|
+
if ('uriTemplate' in resourceDef && resourceDef.uriTemplate.length > 0) {
|
|
130
|
+
identifiers.push(`uriTemplate=${resourceDef.uriTemplate}`);
|
|
131
|
+
}
|
|
132
|
+
return identifiers;
|
|
133
|
+
}
|
|
134
|
+
function readResourceName(resourceDef) {
|
|
135
|
+
return typeof resourceDef.name === 'string' ? resourceDef.name : '';
|
|
136
|
+
}
|
|
137
|
+
//# sourceMappingURL=policy.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"policy.js","sourceRoot":"","sources":["../src/policy.ts"],"names":[],"mappings":"AACA,OAAO,GAAG,MAAM,aAAa,CAAC;AA8B9B,MAAM,UAAU,oBAAoB,CAClC,MAAmC,EACnC,UAA4B,EAC5B,MAAc;IAEd,MAAM,SAAS,GACb,UAAU,KAAK,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,MAAM,EAAE,cAAc,CAAC;IAEtE,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzC,OAAO;YACL,OAAO,EAAE,IAAI;YACb,MAAM,EAAE,iBAAiB;YACzB,UAAU;YACV,MAAM;SACP,CAAC;IACJ,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,WAAW,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;YAC9B,OAAO;gBACL,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE,mBAAmB;gBAC3B,UAAU;gBACV,MAAM;gBACN,WAAW,EAAE,IAAI,CAAC,QAAQ,EAAE;aAC7B,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO;QACL,OAAO,EAAE,KAAK;QACd,MAAM,EAAE,kBAAkB;QAC1B,UAAU;QACV,MAAM;KACP,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAe,EAAE,MAAwB;IACrE,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO;IACT,CAAC;IAED,cAAc,CAAC,MAAM,CAAC,CAAC;IAEvB,MAAM,eAAe,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACpD,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC,OAAqB,EAAkC,EAAE;QAC1E,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;YACpC,OAAO;QACT,CAAC;QACD,OAAO,eAAe,CAAC,OAAO,CAAC,CAAC;IAClC,CAAC,CAAuB,CAAC;IAEzB,MAAM,gBAAgB,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACtD,MAAM,CAAC,QAAQ,GAAG,CAAC,CACjB,QAAuB,EACU,EAAE;QACnC,MAAM,eAAe,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAClD,aAAa,CAAC,MAAM,EAAE,OAAO,CAAC,CACd,CAAC;QACnB,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,OAAO;QACT,CAAC;QACD,OAAO,gBAAgB,CAAC,eAAe,CAAC,CAAC;IAC3C,CAAC,CAAwB,CAAC;IAE1B,MAAM,mBAAmB,GAAG,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC5D,MAAM,CAAC,WAAW,GAAG,CAAC,CACpB,WAA6B,EACO,EAAE;QACtC,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE,CAAC;YACxD,OAAO;QACT,CAAC;QACD,OAAO,mBAAmB,CAAC,WAAW,CAAC,CAAC;IAC1C,CAAC,CAA2B,CAAC;IAE7B,MAAM,oBAAoB,GAAG,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9D,MAAM,CAAC,YAAY,GAAG,CAAC,CACrB,YAA+B,EACM,EAAE;QACvC,MAAM,mBAAmB,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,EAAE,CAC9D,iBAAiB,CAAC,MAAM,EAAE,WAAW,EAAE,UAAU,CAAC,CAC9B,CAAC;QACvB,IAAI,mBAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrC,OAAO;QACT,CAAC;QACD,OAAO,oBAAoB,CAAC,mBAAmB,CAAC,CAAC;IACnD,CAAC,CAA4B,CAAC;IAE9B,MAAM,2BAA2B,GAAG,MAAM,CAAC,mBAAmB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC5E,MAAM,CAAC,mBAAmB,GAAG,CAAC,CAC5B,mBAA6C,EACD,EAAE;QAC9C,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,mBAAmB,EAAE,mBAAmB,CAAC,EAAE,CAAC;YACzE,OAAO;QACT,CAAC;QACD,OAAO,2BAA2B,CAAC,mBAAmB,CAAC,CAAC;IAC1D,CAAC,CAAmC,CAAC;IAErC,MAAM,4BAA4B,GAAG,MAAM,CAAC,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9E,MAAM,CAAC,oBAAoB,GAAG,CAAC,CAC7B,oBAA+C,EACF,EAAE;QAC/C,MAAM,2BAA2B,GAAG,oBAAoB,CAAC,MAAM,CAC7D,CAAC,mBAAmB,EAAE,EAAE,CACtB,iBAAiB,CAAC,MAAM,EAAE,mBAAmB,EAAE,mBAAmB,CAAC,CACzC,CAAC;QAC/B,IAAI,2BAA2B,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7C,OAAO;QACT,CAAC;QACD,OAAO,4BAA4B,CAAC,2BAA2B,CAAC,CAAC;IACnE,CAAC,CAAoC,CAAC;AACxC,CAAC;AAED,SAAS,aAAa,CACpB,MAAuB,EACvB,OAAqB;IAErB,MAAM,QAAQ,GAAG,oBAAoB,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IACpE,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QACtB,GAAG,CAAC,IAAI,CACN,oCAAoC,wBAAwB,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,QAAQ,CAAC,MAAM,GAAG,CACrG,CAAC;IACJ,CAAC;IACD,OAAO,QAAQ,CAAC,OAAO,CAAC;AAC1B,CAAC;AAED,SAAS,iBAAiB,CACxB,MAAuB,EACvB,WAAgC,EAChC,KAAuC;IAEvC,MAAM,MAAM,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,QAAQ,GAAG,oBAAoB,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;IAClE,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QACtB,GAAG,CAAC,IAAI,CACN,iBAAiB,KAAK,kBAAkB,0BAA0B,CAAC,WAAW,EAAE,QAAQ,CAAC,MAAM,CAAC,KAAK,QAAQ,CAAC,MAAM,GAAG,CACxH,CAAC;IACJ,CAAC;IACD,OAAO,QAAQ,CAAC,OAAO,CAAC;AAC1B,CAAC;AAED,SAAS,WAAW,CAAC,IAAY,EAAE,MAAc;IAC/C,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC1D,CAAC;AAED,SAAS,cAAc,CAAC,MAAuB;IAC7C,iBAAiB,CAAC,MAAM,CAAC,UAAU,EAAE,mBAAmB,CAAC,CAAC;IAC1D,iBAAiB,CAAC,MAAM,CAAC,cAAc,EAAE,uBAAuB,CAAC,CAAC;AACpE,CAAC;AAED,SAAS,iBAAiB,CACxB,SAAwC,EACxC,KAAa;IAEb,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;QAC5B,OAAO;IACT,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,SAAS,CAAC,GAAG,KAAK,oCAAoC,CAAC,CAAC;IACpE,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,CAAC,CAAC,IAAI,YAAY,MAAM,CAAC,EAAE,CAAC;YAC9B,MAAM,IAAI,SAAS,CAAC,GAAG,KAAK,kCAAkC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,wBAAwB,CAAC,MAAc;IAC9C,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC;AAClD,CAAC;AAED,SAAS,0BAA0B,CACjC,WAAgC,EAChC,MAAc;IAEd,MAAM,KAAK,GAAG,wBAAwB,CAAC,MAAM,CAAC,CAAC;IAC/C,MAAM,WAAW,GAAG,0BAA0B,CAAC,WAAW,CAAC,CAAC;IAE5D,OAAO,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;AAChF,CAAC;AAED,SAAS,0BAA0B,CACjC,WAAgC;IAEhC,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,IAAI,KAAK,IAAI,WAAW,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvD,WAAW,CAAC,IAAI,CAAC,OAAO,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC;IAC7C,CAAC;IACD,IAAI,aAAa,IAAI,WAAW,IAAI,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvE,WAAW,CAAC,IAAI,CAAC,eAAe,WAAW,CAAC,WAAW,EAAE,CAAC,CAAC;IAC7D,CAAC;IACD,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,SAAS,gBAAgB,CAAC,WAAgC;IACxD,OAAO,OAAO,WAAW,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;AACtE,CAAC"}
|
|
@@ -1,23 +1,50 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* RAG
|
|
2
|
+
* Answer-grounded RAG eval for the Appium documentation tool.
|
|
3
|
+
*
|
|
4
|
+
* Runs the documentation_query retrieval pipeline against a fixed set of
|
|
5
|
+
* realistic queries and asks the only question that matters downstream:
|
|
6
|
+
* "did the answer text actually land in the chunks an LLM would see?"
|
|
7
|
+
*
|
|
8
|
+
* What we measure:
|
|
9
|
+
*
|
|
10
|
+
* 1. answerSpanRecall@K
|
|
11
|
+
* For each query, the dataset declares short verbatim phrases lifted
|
|
12
|
+
* from the docs (`answerSpans`). We concatenate the top-K retrieved
|
|
13
|
+
* chunks and check what fraction of the spans appears in that text.
|
|
14
|
+
* "anyOf" semantics: a query that finds at least one span counts as a
|
|
15
|
+
* hit. Spans are 30-140 chars and chosen so any reasonable chunk
|
|
16
|
+
* containing the answer will include them, regardless of chunk
|
|
17
|
+
* boundaries -- so the metric is splitter-neutral.
|
|
18
|
+
*
|
|
19
|
+
* 2. hit@{1,3,5,10}
|
|
20
|
+
* Did any chunk at rank <= K carry any answerSpan? Direct measure of
|
|
21
|
+
* "does the LLM see the answer" at different context budgets.
|
|
22
|
+
*
|
|
23
|
+
* 3. MRR
|
|
24
|
+
* Mean reciprocal rank of the *first* chunk that carries an answerSpan.
|
|
25
|
+
* MRR-equivalent on content, not on file paths -- a chunk from the
|
|
26
|
+
* right file but wrong section is worth nothing here.
|
|
27
|
+
*
|
|
28
|
+
* 4. contextEfficiency
|
|
29
|
+
* For queries we hit, 1000 * spansCovered / totalChars(topK). Spans-per-
|
|
30
|
+
* kchar density. Low = lots of noise around the answer.
|
|
31
|
+
*
|
|
32
|
+
* 5. fileRecall@{5,10} (diagnostic only)
|
|
33
|
+
* Did the right *file* appear in top-K? Kept so we can spot the
|
|
34
|
+
* "right-file wrong-chunk" failure mode (right file present but no
|
|
35
|
+
* answerSpan landed).
|
|
36
|
+
*
|
|
37
|
+
* Match semantics: lowercase + collapse whitespace, then substring check.
|
|
3
38
|
*
|
|
4
39
|
* Usage (after `npm run build`):
|
|
5
|
-
* node dist/scripts/eval-documentation-rag.js
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
* the
|
|
12
|
-
*
|
|
13
|
-
* 4. Persists results to src/scripts/eval-results/<ISO>.json plus a
|
|
14
|
-
* `latest.json` so successive runs can be diffed.
|
|
15
|
-
*
|
|
16
|
-
* Notes:
|
|
17
|
-
* - The first run after a server restart pays the embedding cold start
|
|
18
|
-
* (~30-60s for the current corpus).
|
|
19
|
-
* - Match strategy: a retrieved source matches an expected source if its
|
|
20
|
-
* `relativePath` ends with the expected path (case-sensitive).
|
|
40
|
+
* node dist/scripts/eval-documentation-rag.js \
|
|
41
|
+
* [--top-k=10] [--label=NAME] [--quiet] [--no-save]
|
|
42
|
+
*
|
|
43
|
+
* --top-k=N number of chunks to retrieve & evaluate (default 10)
|
|
44
|
+
* --label=N label written into the saved run, useful for comparing
|
|
45
|
+
* index variants (e.g. --label=before, --label=after)
|
|
46
|
+
* --quiet suppress the per-query log lines and table
|
|
47
|
+
* --no-save don't persist results JSON to disk
|
|
21
48
|
*/
|
|
22
49
|
export {};
|
|
23
50
|
//# sourceMappingURL=eval-documentation-rag.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eval-documentation-rag.d.ts","sourceRoot":"","sources":["../../src/scripts/eval-documentation-rag.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"eval-documentation-rag.d.ts","sourceRoot":"","sources":["../../src/scripts/eval-documentation-rag.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG"}
|