@crowi/plugin-search-opensearch 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Sotaro KARASAWA <sotaro.k@gmail.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,165 @@
1
+ # @crowi/plugin-search-opensearch
2
+
3
+ OpenSearch search driver for Crowi 2.0. Indexes pages on create /
4
+ update / delete, serves the wiki search box, and rebuilds the whole
5
+ index from scratch on demand. Targets a `<indexName>-current` alias so
6
+ a rebuild can swap the underlying index atomically.
7
+
8
+ The plugin is a sibling of [`@crowi/plugin-search-elasticsearch`](../plugin-search-elasticsearch):
9
+ the wire-level document shape, alias name, mapping JSONs and query DSL
10
+ are identical, so a cluster migration between the two backends is a
11
+ re-point + rebuild rather than a mapping rewrite.
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ crowi-admin plugin add @crowi/plugin-search-opensearch
17
+ ```
18
+
19
+ (or, in dev: `pnpm --filter @crowi/api add -D @crowi/plugin-search-opensearch`)
20
+
21
+ ## Configure
22
+
23
+ ### 1. Activate the driver in `crowi.config.json`
24
+
25
+ ```jsonc
26
+ {
27
+ "plugins": ["@crowi/plugin-search-opensearch"],
28
+ "search": { "driver": "opensearch" }
29
+ }
30
+ ```
31
+
32
+ A server restart is required when `search.driver` changes — Crowi
33
+ reads this file once at boot.
34
+
35
+ ### 2. Fill in connection settings in the admin UI
36
+
37
+ Open `/admin/plugins` and edit `@crowi/plugin-search-opensearch`:
38
+
39
+ - **`url`** — `https://[user:pass@]host[:port][/indexName]`. The URL
40
+ embeds the cluster password, so it is encrypted at rest with
41
+ `CROWI_ENCRYPTION_KEY`. Only Basic Auth via the URL is supported
42
+ (AWS SigV4 / IAM auth is intentionally out of scope; a managed
43
+ OpenSearch deployment using fine-grained access control still works
44
+ with a Basic Auth user).
45
+ - **`indexName`** — base index name (default `crowi`). The driver
46
+ reads / writes the `<indexName>-current` alias.
47
+ - **`requestTimeout`** — per-request timeout in ms (default `5000`).
48
+ - **`analyzer`** — `default` / `kuromoji` / `sudachi` (see below).
49
+
50
+ The admin UI is the single source of truth for these settings — there
51
+ is no env-variable fallback.
52
+
53
+ ### 3. Build the initial index
54
+
55
+ ```bash
56
+ crowi-admin search rebuild
57
+ ```
58
+
59
+ This creates a fresh `<indexName>-<timestamp>-<rand>` index, indexes
60
+ all pages in 2000-document bulk batches with pre-fetched bookmark
61
+ counts, and atomically swaps `<indexName>-current` to the new index.
62
+
63
+ ## Hot-reload (no restart needed)
64
+
65
+ This plugin implements `reconfigure`, so **saving connection settings
66
+ in the admin UI applies without a server restart**. When you save:
67
+
68
+ - the `url` / `indexName` / `requestTimeout` / `analyzer` changes are
69
+ picked up by the live driver,
70
+ - a fresh OpenSearch client is built and the previous one is closed
71
+ in the background (its HTTP keep-alive pool drains),
72
+ - the admin UI shows a "saved — applied immediately" toast.
73
+
74
+ Mechanics: the driver holds a module-scope state ref; each operation
75
+ (`query` / `index` / `remove` / `rebuild`) snapshots the state once at
76
+ the top of the call, so a save that lands mid-request cannot retarget
77
+ an inflight operation onto a different cluster. The next request sees
78
+ the new settings.
79
+
80
+ ### Caveats
81
+
82
+ - **Analyzer changes need a manual rebuild.** Switching `analyzer`
83
+ (`default` / `kuromoji` / `sudachi`) updates the setting immediately,
84
+ but the **existing index keeps its old analyzer** — analyzers are
85
+ fixed at index-creation time. Run `crowi-admin search rebuild` to
86
+ create a new index with the new analyzer and swap the alias to it.
87
+ - **Empty `url` → configured `url` is restart-only.** If `url` was
88
+ empty at boot, the driver is not registered and there is nothing for
89
+ `reconfigure` to mutate; configure a `url` and restart once. After
90
+ that, all further changes hot-reload. Clearing a configured `url`
91
+ (configured → empty) *is* handled live — search requests then fail
92
+ with a clear `Search not configured` error until a `url` is set again.
93
+ - A rebuild that is already running when you reconfigure runs to
94
+ completion against the cluster / index name it started with.
95
+
96
+ ## Analyzer flavours
97
+
98
+ | Analyzer | Cluster requirement |
99
+ |---|---|
100
+ | `default` | No extra OpenSearch plugin. |
101
+ | `kuromoji` | `analysis-kuromoji` (Apache 2.0, from `opensearch-project/analysis-kuromoji`). Unlike Elasticsearch, this is a **separate distribution** from OpenSearch core — install it on every cluster node with `bin/opensearch-plugin install analysis-kuromoji` and restart. |
102
+ | `sudachi` | Third-party `analysis-sudachi` (OpenSearch-compatible fork from WorksApplications) + a dictionary. Operators must bundle these into a custom image. Picking this without the plugin makes `rebuild()` fail. |
103
+
104
+ For a wiki with mostly Japanese content, `kuromoji` is the typical
105
+ choice. The mapping JSON (`src/mappings/kuromoji.json`) names the
106
+ analyzer as `kuromoji`, which is the identifier both the Elastic and
107
+ OpenSearch distributions install under, so no per-engine tweak is
108
+ needed in the mapping itself.
109
+
110
+ ## Trying it locally
111
+
112
+ The default `docker compose` stack ships an **Elasticsearch** service
113
+ for the sibling plugin, not OpenSearch. To exercise this plugin
114
+ locally, add an override file:
115
+
116
+ ```yaml
117
+ # compose.override.yml
118
+ services:
119
+ opensearch:
120
+ image: opensearchproject/opensearch:2.18.0
121
+ environment:
122
+ - discovery.type=single-node
123
+ - plugins.security.disabled=true
124
+ - OPENSEARCH_INITIAL_ADMIN_PASSWORD=Crowi-Dev-Passw0rd!
125
+ ports:
126
+ - "9201:9200"
127
+ ```
128
+
129
+ Then:
130
+
131
+ 1. `docker compose up -d opensearch` — brings up OpenSearch on `:9201`
132
+ (`:9200` is taken by the dev Elasticsearch service if you left it
133
+ running).
134
+ 2. In `/admin/plugins`, set `@crowi/plugin-search-opensearch`'s `url`
135
+ to `http://opensearch:9201/crowi` (or `http://localhost:9201/crowi`
136
+ from outside the compose network) and save; restart once if the
137
+ driver was previously unconfigured.
138
+ 3. From `/admin/search`, run a rebuild to populate the index.
139
+ 4. Change `requestTimeout` (or point `indexName` at a freshly rebuilt
140
+ index) and save **without restarting**. The next search query uses
141
+ the new settings — confirmed by the api log line
142
+ `reconfigured opensearch search driver (...)`.
143
+
144
+ ## Why a separate plugin from `@crowi/plugin-search-elasticsearch`?
145
+
146
+ Both backends speak the same query DSL today, but the SDKs differ
147
+ (`@opensearch-project/opensearch` vs `@elastic/elasticsearch`) and the
148
+ backends are intentionally allowed to diverge — OpenSearch's neural /
149
+ k-NN extensions, the Elastic-licensed features in newer ES versions,
150
+ and the analyzer-plugin distribution stories are all separate concerns.
151
+ Keeping the drivers in distinct npm packages means each can pin its own
152
+ SDK / mapping fork without forcing the other to follow.
153
+
154
+ The parse-query / query-builder / mapping JSON files are 1:1 copies of
155
+ the ES plugin's today, but **on purpose** — we will revisit a shared
156
+ core when a third driver arrives.
157
+
158
+ ## See also
159
+
160
+ - [`@crowi/plugin-search-elasticsearch`](../plugin-search-elasticsearch) —
161
+ the sibling Elasticsearch driver.
162
+ - RFC-0001 §"Search" for the search-driver plugin architecture.
163
+ - [`@crowi/plugin-storage-aws-s3`](../plugin-storage-aws-s3) — the
164
+ reference implementation of the same state-ref + snapshot hot-reload
165
+ pattern.
@@ -0,0 +1,268 @@
1
+ import { z } from 'zod/v3';
2
+ import { SearchDriver, PluginLogger, SearchQueryViewer, SearchQueryGrants, CrowiPlugin } from '@crowi/plugin-api';
3
+ import { Client } from '@opensearch-project/opensearch';
4
+
5
+ /**
6
+ * OpenSearch driver implementing the `SearchDriver` contract. Owns
7
+ * the Client, the `${indexName}-current` alias (legacy ops compat),
8
+ * single-doc index / remove, query against the alias, and rebuild-
9
+ * from-scratch in 2k-doc bulk batches with bookmark counts pre-fetched
10
+ * in one aggregate. Document field shape (path / body / username /
11
+ * grant / granted_users / *_count / *_at) matches the ES plugin's
12
+ * shape so a cluster migration is a re-point + rebuild rather than a
13
+ * mapping rewrite.
14
+ *
15
+ * SDK note: `@opensearch-project/opensearch` 3.x returns
16
+ * `{ body, statusCode, ... }` wrappers around every API response (the
17
+ * shape inherited from the old `elasticsearch-js` 7.x line). The
18
+ * Elasticsearch 9 client we use for `@crowi/plugin-search-elasticsearch`
19
+ * collapsed those wrappers — so every call site here unwraps `body`
20
+ * explicitly. Bulk requests likewise take `{ body: operations }`, not
21
+ * the ES 9 `{ operations }` keyword.
22
+ */
23
+
24
+ type Analyzer = 'default' | 'kuromoji' | 'sudachi';
25
+ interface OpenSearchDriverConfig {
26
+ url: string;
27
+ indexName: string;
28
+ requestTimeout: number;
29
+ analyzer: Analyzer;
30
+ }
31
+ interface OpenSearchDriverDeps {
32
+ log?: PluginLogger;
33
+ /**
34
+ * Iterate every page in the Mongo Page collection in cursor-style.
35
+ * Plugin can't import the Page model directly, so the manager wires
36
+ * this in from `ctx.model('Page')`. Each yielded doc is the lean
37
+ * shape produced by `Page.getStreamOfFindAll({ publicOnly: false })`.
38
+ */
39
+ iteratePages?: (handler: (page: PageStreamDoc) => Promise<void>) => Promise<void>;
40
+ /** Total page count, used for progress reporting. */
41
+ countAllPages?: () => Promise<number>;
42
+ /**
43
+ * Bulk-fetch bookmark counts for every page in one Mongo aggregate.
44
+ * Avoids the per-doc N+1 lookup the legacy rebuild used. Returns a
45
+ * `Map<pageId, count>`; pages without bookmarks may be absent
46
+ * (caller defaults to 0).
47
+ */
48
+ getBookmarkCountsBulk?: () => Promise<Map<string, number>>;
49
+ /** Total user count, used to scale the bookmark-count factor. */
50
+ countUsers?: () => Promise<number>;
51
+ }
52
+ /** The lean Page document shape we expect from the rebuild stream. */
53
+ interface PageStreamDoc {
54
+ _id: {
55
+ toString: () => string;
56
+ } | string;
57
+ path: string;
58
+ redirectTo: string | null;
59
+ status: string;
60
+ grant: number;
61
+ grantedUsers?: Array<{
62
+ toString: () => string;
63
+ } | string>;
64
+ creator?: {
65
+ username?: string;
66
+ };
67
+ revision?: {
68
+ body?: string;
69
+ };
70
+ liker?: unknown[];
71
+ commentCount?: number;
72
+ bookmarkCount?: number;
73
+ createdAt?: Date;
74
+ updatedAt?: Date;
75
+ }
76
+ interface OpenSearchDriver extends SearchDriver {
77
+ /** Currently-targeted alias name (`<indexName>-current`). Exposed for tests / admin UI. */
78
+ readonly aliasName: string;
79
+ /** OpenSearch node URI parsed out of `config.url`. */
80
+ readonly node: string;
81
+ /** Base index name (without timestamp / `-current` suffix). */
82
+ readonly baseIndexName: string;
83
+ /** Test-only handle to the underlying client. */
84
+ readonly client: Client;
85
+ }
86
+ /**
87
+ * Mutable driver state. `createOpenSearchDriver` receives a ref to
88
+ * this; each driver method snapshots the fields it needs *once at the
89
+ * top* of the call, so a concurrent `reconfigure` cannot swap the
90
+ * client / index name mid-operation. `reconfigure` mutates the fields
91
+ * in place via {@link applyConfigInPlace}; the next call sees the new
92
+ * values. An empty `url` leaves `client` as `null` — the methods then
93
+ * throw a `Search not configured` error rather than touching a stale
94
+ * client.
95
+ */
96
+ interface OSDriverState {
97
+ /** `null` when `url` is empty (driver configured-but-disabled). */
98
+ client: Client | null;
99
+ /** OpenSearch node URI parsed out of `config.url`; empty string when `url` is empty. */
100
+ node: string;
101
+ /** Base index name (without timestamp / `-current` suffix). */
102
+ baseIndexName: string;
103
+ /** Runtime alias the driver reads / writes (`<baseIndexName>-current`). */
104
+ aliasName: string;
105
+ analyzer: Analyzer;
106
+ requestTimeout: number;
107
+ }
108
+ /**
109
+ * Build a fresh {@link OSDriverState} from a config. An empty `url`
110
+ * yields a disabled state (`client: null`) instead of throwing — the
111
+ * driver stays registered but every method rejects with a
112
+ * `Search not configured` error.
113
+ */
114
+ declare function applyConfig(config: OpenSearchDriverConfig): OSDriverState;
115
+ /**
116
+ * Mutate `target` in place to reflect `config`. Used by `reconfigure`:
117
+ * the old client reference is returned so the caller can `close()` it
118
+ * (fire-and-forget) once the swap is done — inflight operations have
119
+ * already snapshotted the old client and will run to completion.
120
+ */
121
+ declare function applyConfigInPlace(target: OSDriverState, config: OpenSearchDriverConfig): {
122
+ oldClient: Client | null;
123
+ };
124
+ /**
125
+ * Build the search driver around an {@link OSDriverState} ref. Methods
126
+ * snapshot `state` *once at the top* — a `reconfigure` running
127
+ * concurrently with an inflight call cannot swap the client mid-call;
128
+ * the next call sees the new client / index name.
129
+ */
130
+ declare function createOpenSearchDriver(state: OSDriverState, deps?: OpenSearchDriverDeps): OpenSearchDriver;
131
+
132
+ /**
133
+ * Search-string parser for the OpenSearch driver.
134
+ *
135
+ * Splits a free-form query into positive / negative keywords and
136
+ * phrases. Lifted from the legacy `packages/api/src/service/query.ts`
137
+ * with no behaviour changes — preserved here as a plugin-private
138
+ * helper because the parser is currently OpenSearch / Elasticsearch
139
+ * specific (the +/- and `"phrase"` syntax maps directly to
140
+ * `multi_match` queries). When a future driver wants the same shape,
141
+ * factor it back into `@crowi/plugin-api`.
142
+ */
143
+ type PositiveAndNegative<T> = {
144
+ positive: T;
145
+ negative: T;
146
+ };
147
+ type ParsedSearchQuery = {
148
+ keywords: PositiveAndNegative<string[]>;
149
+ phrases: PositiveAndNegative<string[]>;
150
+ };
151
+ declare const parseQuery: (query: string) => ParsedSearchQuery;
152
+
153
+ /**
154
+ * Build an OpenSearch search request body from the SearchQuery shape
155
+ * exposed by `@crowi/plugin-api`. The driver passes a parsed
156
+ * keyword/phrase tree plus the viewer + grants from the original
157
+ * SearchQuery; this module composes them into a single bool query.
158
+ *
159
+ * The wire shape is identical to Elasticsearch's query DSL (OpenSearch
160
+ * forked from ES 7.10.2 and has kept the search API surface compatible),
161
+ * so the builder is a 1:1 copy of `@crowi/plugin-search-elasticsearch`'s
162
+ * builder for now. Kept private here rather than shared because the two
163
+ * drivers may diverge in the future (e.g. OpenSearch's neural / k-NN
164
+ * extensions) and hard-coupling them would block that.
165
+ *
166
+ * Design notes:
167
+ * - All filters are composed at the top-level `bool`. We never nest
168
+ * a second `bool` for the same operator type (must / filter /
169
+ * should / must_not), so the generated body is small and easy to
170
+ * diff in tests.
171
+ * - The grant filter mirrors the legacy ES Searcher precisely:
172
+ * a non-public page (RESTRICTED / SPECIFIED / OWNER) is hidden
173
+ * unless its `username` field matches the viewer's username.
174
+ * For SPECIFIED / OWNER / RESTRICTED pages, we additionally allow
175
+ * the page through if `granted_users` contains the viewer id —
176
+ * the legacy query only checked `username`, but the new
177
+ * SearchableDoc lets us index `granted_users` precisely so we
178
+ * can express "shared with me" as well.
179
+ * - Type filter (portal / public / user) reproduces the legacy
180
+ * `path.raw` regex / prefix queries.
181
+ */
182
+
183
+ type FunctionScoreParams = {
184
+ fieldValueFactor: {
185
+ field: string;
186
+ factor?: number;
187
+ modifier?: 'log' | 'log1p' | 'log2p' | 'ln' | 'ln1p' | 'ln2p' | 'square' | 'sqrt' | 'reciprocal' | 'none';
188
+ missing: number;
189
+ };
190
+ boostMode: 'multiply' | 'replace' | 'sum' | 'avg' | 'max' | 'min';
191
+ };
192
+ interface BuildSearchBodyParams {
193
+ parsed: ParsedSearchQuery;
194
+ pathPrefix?: string;
195
+ viewer?: SearchQueryViewer;
196
+ grants?: SearchQueryGrants;
197
+ functionScore?: FunctionScoreParams;
198
+ from: number;
199
+ size: number;
200
+ }
201
+ /**
202
+ * Build the OpenSearch search request body. Returns an object suitable
203
+ * for `client.search({ index, body })`.
204
+ */
205
+ declare function buildSearchBody(params: BuildSearchBodyParams): {
206
+ from: number;
207
+ size: number;
208
+ sort: Array<Record<string, unknown>>;
209
+ highlight: Record<string, unknown>;
210
+ query: Record<string, unknown>;
211
+ _source: string[];
212
+ };
213
+
214
+ /**
215
+ * @crowi/plugin-search-opensearch — search driver registering
216
+ * `'opensearch'` against the SearchRegistry.
217
+ *
218
+ * Activation: add this plugin to the runner's `crowi.config.json`
219
+ * `plugins` array and set `search.driver: 'opensearch'`. Configure
220
+ * via the Mongo Config namespace `plugin:@crowi/plugin-search-opensearch:*`
221
+ * — operators set the connection URL exclusively from the admin UI.
222
+ */
223
+
224
+ declare const OpenSearchConfigSchema: z.ZodObject<{
225
+ /**
226
+ * `https://[user:pass@]host[:port][/indexName]`. Empty string keeps
227
+ * the driver registered but disabled — `query()` will throw a
228
+ * helpful error and `index()` becomes a no-op.
229
+ *
230
+ * Marked `@sensitive` because the URL embeds the cluster password
231
+ * (Bonsai-style `https://USER:PASS@HOST/INDEX`); we don't want
232
+ * Mongo to keep it in plaintext.
233
+ */
234
+ url: z.ZodDefault<z.ZodString>;
235
+ /**
236
+ * Base index name. Used as the `indexName` if not provided in the
237
+ * URL path. The runtime alias `${indexName}-current` is what the
238
+ * driver actually targets for read / write.
239
+ */
240
+ indexName: z.ZodDefault<z.ZodString>;
241
+ requestTimeout: z.ZodDefault<z.ZodNumber>;
242
+ /**
243
+ * Mapping flavour. Cluster requirements:
244
+ * - `default`: no extra OpenSearch plugin.
245
+ * - `kuromoji`: `analysis-kuromoji` plugin (Apache 2.0, a
246
+ * separate distribution from OpenSearch core — install via
247
+ * `bin/opensearch-plugin install analysis-kuromoji`).
248
+ * - `sudachi`: `analysis-sudachi` (OpenSearch-compatible fork
249
+ * from WorksApplications) + dictionary; operators must build
250
+ * a derived image. Picking this without the plugin makes
251
+ * `rebuild()` fail.
252
+ */
253
+ analyzer: z.ZodDefault<z.ZodEnum<["default", "kuromoji", "sudachi"]>>;
254
+ }, "strict", z.ZodTypeAny, {
255
+ url: string;
256
+ indexName: string;
257
+ requestTimeout: number;
258
+ analyzer: "default" | "kuromoji" | "sudachi";
259
+ }, {
260
+ url?: string | undefined;
261
+ indexName?: string | undefined;
262
+ requestTimeout?: number | undefined;
263
+ analyzer?: "default" | "kuromoji" | "sudachi" | undefined;
264
+ }>;
265
+ type OpenSearchConfig = z.infer<typeof OpenSearchConfigSchema>;
266
+ declare const plugin: CrowiPlugin;
267
+
268
+ export { type Analyzer, type OSDriverState, type OpenSearchConfig, OpenSearchConfigSchema, type OpenSearchDriver, type OpenSearchDriverConfig, type OpenSearchDriverDeps, type PageStreamDoc, applyConfig, applyConfigInPlace, buildSearchBody, createOpenSearchDriver, plugin as default, parseQuery };