@crowi/plugin-search-opensearch 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,268 @@
1
+ import { z } from 'zod/v3';
2
+ import { SearchDriver, PluginLogger, SearchQueryViewer, SearchQueryGrants, CrowiPlugin } from '@crowi/plugin-api';
3
+ import { Client } from '@opensearch-project/opensearch';
4
+
5
+ /**
6
+ * OpenSearch driver implementing the `SearchDriver` contract. Owns
7
+ * the Client, the `${indexName}-current` alias (legacy ops compat),
8
+ * single-doc index / remove, query against the alias, and rebuild-
9
+ * from-scratch in 2k-doc bulk batches with bookmark counts pre-fetched
10
+ * in one aggregate. Document field shape (path / body / username /
11
+ * grant / granted_users / *_count / *_at) matches the ES plugin's
12
+ * shape so a cluster migration is a re-point + rebuild rather than a
13
+ * mapping rewrite.
14
+ *
15
+ * SDK note: `@opensearch-project/opensearch` 3.x returns
16
+ * `{ body, statusCode, ... }` wrappers around every API response (the
17
+ * shape inherited from the old `elasticsearch-js` 7.x line). The
18
+ * Elasticsearch 9 client we use for `@crowi/plugin-search-elasticsearch`
19
+ * collapsed those wrappers — so every call site here unwraps `body`
20
+ * explicitly. Bulk requests likewise take `{ body: operations }`, not
21
+ * the ES 9 `{ operations }` keyword.
22
+ */
23
+
24
+ type Analyzer = 'default' | 'kuromoji' | 'sudachi';
25
+ interface OpenSearchDriverConfig {
26
+ url: string;
27
+ indexName: string;
28
+ requestTimeout: number;
29
+ analyzer: Analyzer;
30
+ }
31
+ interface OpenSearchDriverDeps {
32
+ log?: PluginLogger;
33
+ /**
34
+ * Iterate every page in the Mongo Page collection in cursor-style.
35
+ * Plugin can't import the Page model directly, so the manager wires
36
+ * this in from `ctx.model('Page')`. Each yielded doc is the lean
37
+ * shape produced by `Page.getStreamOfFindAll({ publicOnly: false })`.
38
+ */
39
+ iteratePages?: (handler: (page: PageStreamDoc) => Promise<void>) => Promise<void>;
40
+ /** Total page count, used for progress reporting. */
41
+ countAllPages?: () => Promise<number>;
42
+ /**
43
+ * Bulk-fetch bookmark counts for every page in one Mongo aggregate.
44
+ * Avoids the per-doc N+1 lookup the legacy rebuild used. Returns a
45
+ * `Map<pageId, count>`; pages without bookmarks may be absent
46
+ * (caller defaults to 0).
47
+ */
48
+ getBookmarkCountsBulk?: () => Promise<Map<string, number>>;
49
+ /** Total user count, used to scale the bookmark-count factor. */
50
+ countUsers?: () => Promise<number>;
51
+ }
52
+ /** The lean Page document shape we expect from the rebuild stream. */
53
+ interface PageStreamDoc {
54
+ _id: {
55
+ toString: () => string;
56
+ } | string;
57
+ path: string;
58
+ redirectTo: string | null;
59
+ status: string;
60
+ grant: number;
61
+ grantedUsers?: Array<{
62
+ toString: () => string;
63
+ } | string>;
64
+ creator?: {
65
+ username?: string;
66
+ };
67
+ revision?: {
68
+ body?: string;
69
+ };
70
+ liker?: unknown[];
71
+ commentCount?: number;
72
+ bookmarkCount?: number;
73
+ createdAt?: Date;
74
+ updatedAt?: Date;
75
+ }
76
+ interface OpenSearchDriver extends SearchDriver {
77
+ /** Currently-targeted alias name (`<indexName>-current`). Exposed for tests / admin UI. */
78
+ readonly aliasName: string;
79
+ /** OpenSearch node URI parsed out of `config.url`. */
80
+ readonly node: string;
81
+ /** Base index name (without timestamp / `-current` suffix). */
82
+ readonly baseIndexName: string;
83
+ /** Test-only handle to the underlying client. */
84
+ readonly client: Client;
85
+ }
86
+ /**
87
+ * Mutable driver state. `createOpenSearchDriver` receives a ref to
88
+ * this; each driver method snapshots the fields it needs *once at the
89
+ * top* of the call, so a concurrent `reconfigure` cannot swap the
90
+ * client / index name mid-operation. `reconfigure` mutates the fields
91
+ * in place via {@link applyConfigInPlace}; the next call sees the new
92
+ * values. An empty `url` leaves `client` as `null` — the methods then
93
+ * throw a `Search not configured` error rather than touching a stale
94
+ * client.
95
+ */
96
+ interface OSDriverState {
97
+ /** `null` when `url` is empty (driver configured-but-disabled). */
98
+ client: Client | null;
99
+ /** OpenSearch node URI parsed out of `config.url`; empty string when `url` is empty. */
100
+ node: string;
101
+ /** Base index name (without timestamp / `-current` suffix). */
102
+ baseIndexName: string;
103
+ /** Runtime alias the driver reads / writes (`<baseIndexName>-current`). */
104
+ aliasName: string;
105
+ analyzer: Analyzer;
106
+ requestTimeout: number;
107
+ }
108
+ /**
109
+ * Build a fresh {@link OSDriverState} from a config. An empty `url`
110
+ * yields a disabled state (`client: null`) instead of throwing — the
111
+ * driver stays registered but every method rejects with a
112
+ * `Search not configured` error.
113
+ */
114
+ declare function applyConfig(config: OpenSearchDriverConfig): OSDriverState;
115
+ /**
116
+ * Mutate `target` in place to reflect `config`. Used by `reconfigure`:
117
+ * the old client reference is returned so the caller can `close()` it
118
+ * (fire-and-forget) once the swap is done — inflight operations have
119
+ * already snapshotted the old client and will run to completion.
120
+ */
121
+ declare function applyConfigInPlace(target: OSDriverState, config: OpenSearchDriverConfig): {
122
+ oldClient: Client | null;
123
+ };
124
+ /**
125
+ * Build the search driver around an {@link OSDriverState} ref. Methods
126
+ * snapshot `state` *once at the top* — a `reconfigure` running
127
+ * concurrently with an inflight call cannot swap the client mid-call;
128
+ * the next call sees the new client / index name.
129
+ */
130
+ declare function createOpenSearchDriver(state: OSDriverState, deps?: OpenSearchDriverDeps): OpenSearchDriver;
131
+
132
+ /**
133
+ * Search-string parser for the OpenSearch driver.
134
+ *
135
+ * Splits a free-form query into positive / negative keywords and
136
+ * phrases. Lifted from the legacy `packages/api/src/service/query.ts`
137
+ * with no behaviour changes — preserved here as a plugin-private
138
+ * helper because the parser is currently OpenSearch / Elasticsearch
139
+ * specific (the +/- and `"phrase"` syntax maps directly to
140
+ * `multi_match` queries). When a future driver wants the same shape,
141
+ * factor it back into `@crowi/plugin-api`.
142
+ */
143
+ type PositiveAndNegative<T> = {
144
+ positive: T;
145
+ negative: T;
146
+ };
147
+ type ParsedSearchQuery = {
148
+ keywords: PositiveAndNegative<string[]>;
149
+ phrases: PositiveAndNegative<string[]>;
150
+ };
151
+ declare const parseQuery: (query: string) => ParsedSearchQuery;
152
+
153
+ /**
154
+ * Build an OpenSearch search request body from the SearchQuery shape
155
+ * exposed by `@crowi/plugin-api`. The driver passes a parsed
156
+ * keyword/phrase tree plus the viewer + grants from the original
157
+ * SearchQuery; this module composes them into a single bool query.
158
+ *
159
+ * The wire shape is identical to Elasticsearch's query DSL (OpenSearch
160
+ * forked from ES 7.10.2 and has kept the search API surface compatible),
161
+ * so the builder is a 1:1 copy of `@crowi/plugin-search-elasticsearch`'s
162
+ * builder for now. Kept private here rather than shared because the two
163
+ * drivers may diverge in the future (e.g. OpenSearch's neural / k-NN
164
+ * extensions) and hard-coupling them would block that.
165
+ *
166
+ * Design notes:
167
+ * - All filters are composed at the top-level `bool`. We never nest
168
+ * a second `bool` for the same operator type (must / filter /
169
+ * should / must_not), so the generated body is small and easy to
170
+ * diff in tests.
171
+ * - The grant filter mirrors the legacy ES Searcher precisely:
172
+ * a non-public page (RESTRICTED / SPECIFIED / OWNER) is hidden
173
+ * unless its `username` field matches the viewer's username.
174
+ * For SPECIFIED / OWNER / RESTRICTED pages, we additionally allow
175
+ * the page through if `granted_users` contains the viewer id —
176
+ * the legacy query only checked `username`, but the new
177
+ * SearchableDoc lets us index `granted_users` precisely so we
178
+ * can express "shared with me" as well.
179
+ * - Type filter (portal / public / user) reproduces the legacy
180
+ * `path.raw` regex / prefix queries.
181
+ */
182
+
183
+ type FunctionScoreParams = {
184
+ fieldValueFactor: {
185
+ field: string;
186
+ factor?: number;
187
+ modifier?: 'log' | 'log1p' | 'log2p' | 'ln' | 'ln1p' | 'ln2p' | 'square' | 'sqrt' | 'reciprocal' | 'none';
188
+ missing: number;
189
+ };
190
+ boostMode: 'multiply' | 'replace' | 'sum' | 'avg' | 'max' | 'min';
191
+ };
192
+ interface BuildSearchBodyParams {
193
+ parsed: ParsedSearchQuery;
194
+ pathPrefix?: string;
195
+ viewer?: SearchQueryViewer;
196
+ grants?: SearchQueryGrants;
197
+ functionScore?: FunctionScoreParams;
198
+ from: number;
199
+ size: number;
200
+ }
201
+ /**
202
+ * Build the OpenSearch search request body. Returns an object suitable
203
+ * for `client.search({ index, body })`.
204
+ */
205
+ declare function buildSearchBody(params: BuildSearchBodyParams): {
206
+ from: number;
207
+ size: number;
208
+ sort: Array<Record<string, unknown>>;
209
+ highlight: Record<string, unknown>;
210
+ query: Record<string, unknown>;
211
+ _source: string[];
212
+ };
213
+
214
+ /**
215
+ * @crowi/plugin-search-opensearch — search driver registering
216
+ * `'opensearch'` against the SearchRegistry.
217
+ *
218
+ * Activation: add this plugin to the runner's `crowi.config.json`
219
+ * `plugins` array and set `search.driver: 'opensearch'`. Configure
220
+ * via the Mongo Config namespace `plugin:@crowi/plugin-search-opensearch:*`
221
+ * — operators set the connection URL exclusively from the admin UI.
222
+ */
223
+
224
+ declare const OpenSearchConfigSchema: z.ZodObject<{
225
+ /**
226
+ * `https://[user:pass@]host[:port][/indexName]`. Empty string keeps
227
+ * the driver registered but disabled — `query()` will throw a
228
+ * helpful error and `index()` becomes a no-op.
229
+ *
230
+ * Marked `@sensitive` because the URL embeds the cluster password
231
+ * (Bonsai-style `https://USER:PASS@HOST/INDEX`); we don't want
232
+ * Mongo to keep it in plaintext.
233
+ */
234
+ url: z.ZodDefault<z.ZodString>;
235
+ /**
236
+ * Base index name. Used as the `indexName` if not provided in the
237
+ * URL path. The runtime alias `${indexName}-current` is what the
238
+ * driver actually targets for read / write.
239
+ */
240
+ indexName: z.ZodDefault<z.ZodString>;
241
+ requestTimeout: z.ZodDefault<z.ZodNumber>;
242
+ /**
243
+ * Mapping flavour. Cluster requirements:
244
+ * - `default`: no extra OpenSearch plugin.
245
+ * - `kuromoji`: `analysis-kuromoji` plugin (Apache 2.0, a
246
+ * separate distribution from OpenSearch core — install via
247
+ * `bin/opensearch-plugin install analysis-kuromoji`).
248
+ * - `sudachi`: `analysis-sudachi` (OpenSearch-compatible fork
249
+ * from WorksApplications) + dictionary; operators must build
250
+ * a derived image. Picking this without the plugin makes
251
+ * `rebuild()` fail.
252
+ */
253
+ analyzer: z.ZodDefault<z.ZodEnum<["default", "kuromoji", "sudachi"]>>;
254
+ }, "strict", z.ZodTypeAny, {
255
+ url: string;
256
+ indexName: string;
257
+ requestTimeout: number;
258
+ analyzer: "default" | "kuromoji" | "sudachi";
259
+ }, {
260
+ url?: string | undefined;
261
+ indexName?: string | undefined;
262
+ requestTimeout?: number | undefined;
263
+ analyzer?: "default" | "kuromoji" | "sudachi" | undefined;
264
+ }>;
265
+ type OpenSearchConfig = z.infer<typeof OpenSearchConfigSchema>;
266
+ declare const plugin: CrowiPlugin;
267
+
268
+ export { type Analyzer, type OSDriverState, type OpenSearchConfig, OpenSearchConfigSchema, type OpenSearchDriver, type OpenSearchDriverConfig, type OpenSearchDriverDeps, type PageStreamDoc, applyConfig, applyConfigInPlace, buildSearchBody, createOpenSearchDriver, plugin as default, parseQuery };