@crowi/plugin-search-opensearch 0.1.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +165 -0
- package/dist/index.d.mts +268 -0
- package/dist/index.d.ts +268 -0
- package/dist/index.js +835 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +805 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +46 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import { z } from 'zod/v3';
|
|
2
|
+
import { SearchDriver, PluginLogger, SearchQueryViewer, SearchQueryGrants, CrowiPlugin } from '@crowi/plugin-api';
|
|
3
|
+
import { Client } from '@opensearch-project/opensearch';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* OpenSearch driver implementing the `SearchDriver` contract. Owns
|
|
7
|
+
* the Client, the `${indexName}-current` alias (legacy ops compat),
|
|
8
|
+
* single-doc index / remove, query against the alias, and rebuild-
|
|
9
|
+
* from-scratch in 2k-doc bulk batches with bookmark counts pre-fetched
|
|
10
|
+
* in one aggregate. Document field shape (path / body / username /
|
|
11
|
+
* grant / granted_users / *_count / *_at) matches the ES plugin's
|
|
12
|
+
* shape so a cluster migration is a re-point + rebuild rather than a
|
|
13
|
+
* mapping rewrite.
|
|
14
|
+
*
|
|
15
|
+
* SDK note: `@opensearch-project/opensearch` 3.x returns
|
|
16
|
+
* `{ body, statusCode, ... }` wrappers around every API response (the
|
|
17
|
+
* shape inherited from the old `elasticsearch-js` 7.x line). The
|
|
18
|
+
* Elasticsearch 9 client we use for `@crowi/plugin-search-elasticsearch`
|
|
19
|
+
* collapsed those wrappers — so every call site here unwraps `body`
|
|
20
|
+
* explicitly. Bulk requests likewise take `{ body: operations }`, not
|
|
21
|
+
* the ES 9 `{ operations }` keyword.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
type Analyzer = 'default' | 'kuromoji' | 'sudachi';
|
|
25
|
+
interface OpenSearchDriverConfig {
|
|
26
|
+
url: string;
|
|
27
|
+
indexName: string;
|
|
28
|
+
requestTimeout: number;
|
|
29
|
+
analyzer: Analyzer;
|
|
30
|
+
}
|
|
31
|
+
interface OpenSearchDriverDeps {
|
|
32
|
+
log?: PluginLogger;
|
|
33
|
+
/**
|
|
34
|
+
* Iterate every page in the Mongo Page collection in cursor-style.
|
|
35
|
+
* Plugin can't import the Page model directly, so the manager wires
|
|
36
|
+
* this in from `ctx.model('Page')`. Each yielded doc is the lean
|
|
37
|
+
* shape produced by `Page.getStreamOfFindAll({ publicOnly: false })`.
|
|
38
|
+
*/
|
|
39
|
+
iteratePages?: (handler: (page: PageStreamDoc) => Promise<void>) => Promise<void>;
|
|
40
|
+
/** Total page count, used for progress reporting. */
|
|
41
|
+
countAllPages?: () => Promise<number>;
|
|
42
|
+
/**
|
|
43
|
+
* Bulk-fetch bookmark counts for every page in one Mongo aggregate.
|
|
44
|
+
* Avoids the per-doc N+1 lookup the legacy rebuild used. Returns a
|
|
45
|
+
* `Map<pageId, count>`; pages without bookmarks may be absent
|
|
46
|
+
* (caller defaults to 0).
|
|
47
|
+
*/
|
|
48
|
+
getBookmarkCountsBulk?: () => Promise<Map<string, number>>;
|
|
49
|
+
/** Total user count, used to scale the bookmark-count factor. */
|
|
50
|
+
countUsers?: () => Promise<number>;
|
|
51
|
+
}
|
|
52
|
+
/** The lean Page document shape we expect from the rebuild stream. */
|
|
53
|
+
interface PageStreamDoc {
|
|
54
|
+
_id: {
|
|
55
|
+
toString: () => string;
|
|
56
|
+
} | string;
|
|
57
|
+
path: string;
|
|
58
|
+
redirectTo: string | null;
|
|
59
|
+
status: string;
|
|
60
|
+
grant: number;
|
|
61
|
+
grantedUsers?: Array<{
|
|
62
|
+
toString: () => string;
|
|
63
|
+
} | string>;
|
|
64
|
+
creator?: {
|
|
65
|
+
username?: string;
|
|
66
|
+
};
|
|
67
|
+
revision?: {
|
|
68
|
+
body?: string;
|
|
69
|
+
};
|
|
70
|
+
liker?: unknown[];
|
|
71
|
+
commentCount?: number;
|
|
72
|
+
bookmarkCount?: number;
|
|
73
|
+
createdAt?: Date;
|
|
74
|
+
updatedAt?: Date;
|
|
75
|
+
}
|
|
76
|
+
interface OpenSearchDriver extends SearchDriver {
|
|
77
|
+
/** Currently-targeted alias name (`<indexName>-current`). Exposed for tests / admin UI. */
|
|
78
|
+
readonly aliasName: string;
|
|
79
|
+
/** OpenSearch node URI parsed out of `config.url`. */
|
|
80
|
+
readonly node: string;
|
|
81
|
+
/** Base index name (without timestamp / `-current` suffix). */
|
|
82
|
+
readonly baseIndexName: string;
|
|
83
|
+
/** Test-only handle to the underlying client. */
|
|
84
|
+
readonly client: Client;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Mutable driver state. `createOpenSearchDriver` receives a ref to
|
|
88
|
+
* this; each driver method snapshots the fields it needs *once at the
|
|
89
|
+
* top* of the call, so a concurrent `reconfigure` cannot swap the
|
|
90
|
+
* client / index name mid-operation. `reconfigure` mutates the fields
|
|
91
|
+
* in place via {@link applyConfigInPlace}; the next call sees the new
|
|
92
|
+
* values. An empty `url` leaves `client` as `null` — the methods then
|
|
93
|
+
* throw a `Search not configured` error rather than touching a stale
|
|
94
|
+
* client.
|
|
95
|
+
*/
|
|
96
|
+
interface OSDriverState {
|
|
97
|
+
/** `null` when `url` is empty (driver configured-but-disabled). */
|
|
98
|
+
client: Client | null;
|
|
99
|
+
/** OpenSearch node URI parsed out of `config.url`; empty string when `url` is empty. */
|
|
100
|
+
node: string;
|
|
101
|
+
/** Base index name (without timestamp / `-current` suffix). */
|
|
102
|
+
baseIndexName: string;
|
|
103
|
+
/** Runtime alias the driver reads / writes (`<baseIndexName>-current`). */
|
|
104
|
+
aliasName: string;
|
|
105
|
+
analyzer: Analyzer;
|
|
106
|
+
requestTimeout: number;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Build a fresh {@link OSDriverState} from a config. An empty `url`
|
|
110
|
+
* yields a disabled state (`client: null`) instead of throwing — the
|
|
111
|
+
* driver stays registered but every method rejects with a
|
|
112
|
+
* `Search not configured` error.
|
|
113
|
+
*/
|
|
114
|
+
declare function applyConfig(config: OpenSearchDriverConfig): OSDriverState;
|
|
115
|
+
/**
|
|
116
|
+
* Mutate `target` in place to reflect `config`. Used by `reconfigure`:
|
|
117
|
+
* the old client reference is returned so the caller can `close()` it
|
|
118
|
+
* (fire-and-forget) once the swap is done — inflight operations have
|
|
119
|
+
* already snapshotted the old client and will run to completion.
|
|
120
|
+
*/
|
|
121
|
+
declare function applyConfigInPlace(target: OSDriverState, config: OpenSearchDriverConfig): {
|
|
122
|
+
oldClient: Client | null;
|
|
123
|
+
};
|
|
124
|
+
/**
|
|
125
|
+
* Build the search driver around an {@link OSDriverState} ref. Methods
|
|
126
|
+
* snapshot `state` *once at the top* — a `reconfigure` running
|
|
127
|
+
* concurrently with an inflight call cannot swap the client mid-call;
|
|
128
|
+
* the next call sees the new client / index name.
|
|
129
|
+
*/
|
|
130
|
+
declare function createOpenSearchDriver(state: OSDriverState, deps?: OpenSearchDriverDeps): OpenSearchDriver;
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Search-string parser for the OpenSearch driver.
|
|
134
|
+
*
|
|
135
|
+
* Splits a free-form query into positive / negative keywords and
|
|
136
|
+
* phrases. Lifted from the legacy `packages/api/src/service/query.ts`
|
|
137
|
+
* with no behaviour changes — preserved here as a plugin-private
|
|
138
|
+
* helper because the parser is currently OpenSearch / Elasticsearch
|
|
139
|
+
* specific (the +/- and `"phrase"` syntax maps directly to
|
|
140
|
+
* `multi_match` queries). When a future driver wants the same shape,
|
|
141
|
+
* factor it back into `@crowi/plugin-api`.
|
|
142
|
+
*/
|
|
143
|
+
type PositiveAndNegative<T> = {
|
|
144
|
+
positive: T;
|
|
145
|
+
negative: T;
|
|
146
|
+
};
|
|
147
|
+
type ParsedSearchQuery = {
|
|
148
|
+
keywords: PositiveAndNegative<string[]>;
|
|
149
|
+
phrases: PositiveAndNegative<string[]>;
|
|
150
|
+
};
|
|
151
|
+
declare const parseQuery: (query: string) => ParsedSearchQuery;
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Build an OpenSearch search request body from the SearchQuery shape
|
|
155
|
+
* exposed by `@crowi/plugin-api`. The driver passes a parsed
|
|
156
|
+
* keyword/phrase tree plus the viewer + grants from the original
|
|
157
|
+
* SearchQuery; this module composes them into a single bool query.
|
|
158
|
+
*
|
|
159
|
+
* The wire shape is identical to Elasticsearch's query DSL (OpenSearch
|
|
160
|
+
* forked from ES 7.10.2 and has kept the search API surface compatible),
|
|
161
|
+
* so the builder is a 1:1 copy of `@crowi/plugin-search-elasticsearch`'s
|
|
162
|
+
* builder for now. Kept private here rather than shared because the two
|
|
163
|
+
* drivers may diverge in the future (e.g. OpenSearch's neural / k-NN
|
|
164
|
+
* extensions) and hard-coupling them would block that.
|
|
165
|
+
*
|
|
166
|
+
* Design notes:
|
|
167
|
+
* - All filters are composed at the top-level `bool`. We never nest
|
|
168
|
+
* a second `bool` for the same operator type (must / filter /
|
|
169
|
+
* should / must_not), so the generated body is small and easy to
|
|
170
|
+
* diff in tests.
|
|
171
|
+
* - The grant filter mirrors the legacy ES Searcher precisely:
|
|
172
|
+
* a non-public page (RESTRICTED / SPECIFIED / OWNER) is hidden
|
|
173
|
+
* unless its `username` field matches the viewer's username.
|
|
174
|
+
* For SPECIFIED / OWNER / RESTRICTED pages, we additionally allow
|
|
175
|
+
* the page through if `granted_users` contains the viewer id —
|
|
176
|
+
* the legacy query only checked `username`, but the new
|
|
177
|
+
* SearchableDoc lets us index `granted_users` precisely so we
|
|
178
|
+
* can express "shared with me" as well.
|
|
179
|
+
* - Type filter (portal / public / user) reproduces the legacy
|
|
180
|
+
* `path.raw` regex / prefix queries.
|
|
181
|
+
*/
|
|
182
|
+
|
|
183
|
+
type FunctionScoreParams = {
|
|
184
|
+
fieldValueFactor: {
|
|
185
|
+
field: string;
|
|
186
|
+
factor?: number;
|
|
187
|
+
modifier?: 'log' | 'log1p' | 'log2p' | 'ln' | 'ln1p' | 'ln2p' | 'square' | 'sqrt' | 'reciprocal' | 'none';
|
|
188
|
+
missing: number;
|
|
189
|
+
};
|
|
190
|
+
boostMode: 'multiply' | 'replace' | 'sum' | 'avg' | 'max' | 'min';
|
|
191
|
+
};
|
|
192
|
+
interface BuildSearchBodyParams {
|
|
193
|
+
parsed: ParsedSearchQuery;
|
|
194
|
+
pathPrefix?: string;
|
|
195
|
+
viewer?: SearchQueryViewer;
|
|
196
|
+
grants?: SearchQueryGrants;
|
|
197
|
+
functionScore?: FunctionScoreParams;
|
|
198
|
+
from: number;
|
|
199
|
+
size: number;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Build the OpenSearch search request body. Returns an object suitable
|
|
203
|
+
* for `client.search({ index, body })`.
|
|
204
|
+
*/
|
|
205
|
+
declare function buildSearchBody(params: BuildSearchBodyParams): {
|
|
206
|
+
from: number;
|
|
207
|
+
size: number;
|
|
208
|
+
sort: Array<Record<string, unknown>>;
|
|
209
|
+
highlight: Record<string, unknown>;
|
|
210
|
+
query: Record<string, unknown>;
|
|
211
|
+
_source: string[];
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* @crowi/plugin-search-opensearch — search driver registering
|
|
216
|
+
* `'opensearch'` against the SearchRegistry.
|
|
217
|
+
*
|
|
218
|
+
* Activation: add this plugin to the runner's `crowi.config.json`
|
|
219
|
+
* `plugins` array and set `search.driver: 'opensearch'`. Configure
|
|
220
|
+
* via the Mongo Config namespace `plugin:@crowi/plugin-search-opensearch:*`
|
|
221
|
+
* — operators set the connection URL exclusively from the admin UI.
|
|
222
|
+
*/
|
|
223
|
+
|
|
224
|
+
declare const OpenSearchConfigSchema: z.ZodObject<{
|
|
225
|
+
/**
|
|
226
|
+
* `https://[user:pass@]host[:port][/indexName]`. Empty string keeps
|
|
227
|
+
* the driver registered but disabled — `query()` will throw a
|
|
228
|
+
* helpful error and `index()` becomes a no-op.
|
|
229
|
+
*
|
|
230
|
+
* Marked `@sensitive` because the URL embeds the cluster password
|
|
231
|
+
* (Bonsai-style `https://USER:PASS@HOST/INDEX`); we don't want
|
|
232
|
+
* Mongo to keep it in plaintext.
|
|
233
|
+
*/
|
|
234
|
+
url: z.ZodDefault<z.ZodString>;
|
|
235
|
+
/**
|
|
236
|
+
* Base index name. Used as the `indexName` if not provided in the
|
|
237
|
+
* URL path. The runtime alias `${indexName}-current` is what the
|
|
238
|
+
* driver actually targets for read / write.
|
|
239
|
+
*/
|
|
240
|
+
indexName: z.ZodDefault<z.ZodString>;
|
|
241
|
+
requestTimeout: z.ZodDefault<z.ZodNumber>;
|
|
242
|
+
/**
|
|
243
|
+
* Mapping flavour. Cluster requirements:
|
|
244
|
+
* - `default`: no extra OpenSearch plugin.
|
|
245
|
+
* - `kuromoji`: `analysis-kuromoji` plugin (Apache 2.0, a
|
|
246
|
+
* separate distribution from OpenSearch core — install via
|
|
247
|
+
* `bin/opensearch-plugin install analysis-kuromoji`).
|
|
248
|
+
* - `sudachi`: `analysis-sudachi` (OpenSearch-compatible fork
|
|
249
|
+
* from WorksApplications) + dictionary; operators must build
|
|
250
|
+
* a derived image. Picking this without the plugin makes
|
|
251
|
+
* `rebuild()` fail.
|
|
252
|
+
*/
|
|
253
|
+
analyzer: z.ZodDefault<z.ZodEnum<["default", "kuromoji", "sudachi"]>>;
|
|
254
|
+
}, "strict", z.ZodTypeAny, {
|
|
255
|
+
url: string;
|
|
256
|
+
indexName: string;
|
|
257
|
+
requestTimeout: number;
|
|
258
|
+
analyzer: "default" | "kuromoji" | "sudachi";
|
|
259
|
+
}, {
|
|
260
|
+
url?: string | undefined;
|
|
261
|
+
indexName?: string | undefined;
|
|
262
|
+
requestTimeout?: number | undefined;
|
|
263
|
+
analyzer?: "default" | "kuromoji" | "sudachi" | undefined;
|
|
264
|
+
}>;
|
|
265
|
+
type OpenSearchConfig = z.infer<typeof OpenSearchConfigSchema>;
|
|
266
|
+
declare const plugin: CrowiPlugin;
|
|
267
|
+
|
|
268
|
+
export { type Analyzer, type OSDriverState, type OpenSearchConfig, OpenSearchConfigSchema, type OpenSearchDriver, type OpenSearchDriverConfig, type OpenSearchDriverDeps, type PageStreamDoc, applyConfig, applyConfigInPlace, buildSearchBody, createOpenSearchDriver, plugin as default, parseQuery };
|