@crowi/plugin-search-elasticsearch 0.1.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +105 -0
- package/dist/index.d.mts +250 -0
- package/dist/index.d.ts +250 -0
- package/dist/index.js +831 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +801 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +46 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,831 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
ElasticsearchConfigSchema: () => ElasticsearchConfigSchema,
|
|
24
|
+
applyConfig: () => applyConfig,
|
|
25
|
+
applyConfigInPlace: () => applyConfigInPlace,
|
|
26
|
+
buildSearchBody: () => buildSearchBody,
|
|
27
|
+
createElasticsearchDriver: () => createElasticsearchDriver,
|
|
28
|
+
default: () => index_default,
|
|
29
|
+
parseQuery: () => parseQuery
|
|
30
|
+
});
|
|
31
|
+
module.exports = __toCommonJS(index_exports);
|
|
32
|
+
var import_v3 = require("zod/v3");
|
|
33
|
+
|
|
34
|
+
// src/driver.ts
|
|
35
|
+
var import_elasticsearch = require("@elastic/elasticsearch");
|
|
36
|
+
|
|
37
|
+
// src/parse-query.ts
|
|
38
|
+
var normalize = (query) => {
|
|
39
|
+
return query.trim().replace(/\s+/g, " ");
|
|
40
|
+
};
|
|
41
|
+
var splitKeywordsAndPhrases = (query) => {
|
|
42
|
+
const phraseRegExp = /(-?"[^"]*")/g;
|
|
43
|
+
const keywords = query.replace(phraseRegExp, "").split(/\s+/g).filter(Boolean);
|
|
44
|
+
const phrases = (query.match(phraseRegExp) || []).map(normalize);
|
|
45
|
+
return { keywords, phrases };
|
|
46
|
+
};
|
|
47
|
+
var splitPositiveAndNegative = (queries) => {
|
|
48
|
+
const positive = [];
|
|
49
|
+
const negative = [];
|
|
50
|
+
for (const query of queries) {
|
|
51
|
+
const isNegative = query.startsWith("-");
|
|
52
|
+
const target = isNegative ? negative : positive;
|
|
53
|
+
const newQuery = isNegative ? query.substring(1) : query;
|
|
54
|
+
if (newQuery) {
|
|
55
|
+
target.push(newQuery);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return { positive, negative };
|
|
59
|
+
};
|
|
60
|
+
var unquote = (query) => {
|
|
61
|
+
return query.slice(1, -1);
|
|
62
|
+
};
|
|
63
|
+
var parseQuery = (query) => {
|
|
64
|
+
const { keywords, phrases } = splitKeywordsAndPhrases(normalize(query));
|
|
65
|
+
const { positive: positiveKeywords, negative: negativeKeywords } = splitPositiveAndNegative(keywords);
|
|
66
|
+
const { positive: positivePhrases, negative: negativePhrases } = splitPositiveAndNegative(phrases);
|
|
67
|
+
return {
|
|
68
|
+
keywords: {
|
|
69
|
+
positive: positiveKeywords,
|
|
70
|
+
negative: negativeKeywords
|
|
71
|
+
},
|
|
72
|
+
phrases: {
|
|
73
|
+
positive: positivePhrases.map(unquote).filter(Boolean),
|
|
74
|
+
negative: negativePhrases.map(unquote).filter(Boolean)
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// src/query-builder.ts
|
|
80
|
+
var GRANT_PUBLIC = 1;
|
|
81
|
+
var defaultKeywordQueryFields = ["path.ja^2", "body.ja", "path.en^1.2", "body.en"];
|
|
82
|
+
var defaultPhraseQueryFields = ["path.raw^2", "body"];
|
|
83
|
+
var portalQuery = { regexp: { "path.raw": ".*/" } };
|
|
84
|
+
var userPathQuery = { prefix: { "path.raw": "/user/" } };
|
|
85
|
+
var emptyBuckets = () => ({ must: [], filter: [], should: [], must_not: [] });
|
|
86
|
+
var appendKeywords = (buckets, keywords, operator, kind) => {
|
|
87
|
+
if (keywords.length === 0) return;
|
|
88
|
+
buckets[kind].push({
|
|
89
|
+
multi_match: {
|
|
90
|
+
query: keywords.join(" "),
|
|
91
|
+
fields: defaultKeywordQueryFields,
|
|
92
|
+
operator
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
};
|
|
96
|
+
var appendPhrases = (buckets, phrases, operator, kind) => {
|
|
97
|
+
for (const phrase of phrases) {
|
|
98
|
+
buckets[kind].push({
|
|
99
|
+
multi_match: {
|
|
100
|
+
type: "phrase",
|
|
101
|
+
query: phrase,
|
|
102
|
+
fields: defaultPhraseQueryFields,
|
|
103
|
+
operator
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
var appendTypeFilter = (buckets, type) => {
|
|
109
|
+
switch (type) {
|
|
110
|
+
case "portal":
|
|
111
|
+
buckets.must_not.push(userPathQuery);
|
|
112
|
+
buckets.filter.push(portalQuery);
|
|
113
|
+
return;
|
|
114
|
+
case "public":
|
|
115
|
+
buckets.must_not.push(userPathQuery);
|
|
116
|
+
buckets.must_not.push(portalQuery);
|
|
117
|
+
return;
|
|
118
|
+
case "user":
|
|
119
|
+
buckets.filter.push(userPathQuery);
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
var appendPathPrefix = (buckets, pathPrefix) => {
|
|
124
|
+
const trimmed = pathPrefix.endsWith("/") ? pathPrefix.slice(0, -1) : pathPrefix;
|
|
125
|
+
buckets.filter.push({
|
|
126
|
+
wildcard: {
|
|
127
|
+
"path.raw": `${trimmed}/*`
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
};
|
|
131
|
+
var appendGrantFilter = (buckets, viewer) => {
|
|
132
|
+
if (!viewer) {
|
|
133
|
+
buckets.filter.push({ match: { grant: GRANT_PUBLIC } });
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
if (viewer.isAdmin) {
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
buckets.filter.push({
|
|
140
|
+
bool: {
|
|
141
|
+
should: [{ term: { grant: GRANT_PUBLIC } }, { term: { username: viewer.username } }, { term: { granted_users: viewer.id } }],
|
|
142
|
+
minimum_should_match: 1
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
};
|
|
146
|
+
function buildSearchBody(params) {
|
|
147
|
+
const { parsed, pathPrefix, viewer, grants, functionScore, from, size } = params;
|
|
148
|
+
const buckets = emptyBuckets();
|
|
149
|
+
appendKeywords(buckets, parsed.keywords.positive, "and", "must");
|
|
150
|
+
appendKeywords(buckets, parsed.keywords.negative, "or", "must_not");
|
|
151
|
+
appendPhrases(buckets, parsed.phrases.positive, "and", "must");
|
|
152
|
+
appendPhrases(buckets, parsed.phrases.negative, "or", "must_not");
|
|
153
|
+
if (pathPrefix) {
|
|
154
|
+
appendPathPrefix(buckets, pathPrefix);
|
|
155
|
+
}
|
|
156
|
+
if (grants?.types && grants.types.length > 0) {
|
|
157
|
+
if (grants.types.length === 1) {
|
|
158
|
+
appendTypeFilter(buckets, grants.types[0]);
|
|
159
|
+
} else {
|
|
160
|
+
const typeShoulds = grants.types.map((t) => {
|
|
161
|
+
const inner = emptyBuckets();
|
|
162
|
+
appendTypeFilter(inner, t);
|
|
163
|
+
return { bool: pruneBool(inner) };
|
|
164
|
+
});
|
|
165
|
+
buckets.filter.push({
|
|
166
|
+
bool: { should: typeShoulds, minimum_should_match: 1 }
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
appendGrantFilter(buckets, viewer);
|
|
171
|
+
const baseQuery = { bool: pruneBool(buckets) };
|
|
172
|
+
const query = functionScore ? {
|
|
173
|
+
function_score: {
|
|
174
|
+
query: baseQuery,
|
|
175
|
+
field_value_factor: functionScore.fieldValueFactor,
|
|
176
|
+
boost_mode: functionScore.boostMode
|
|
177
|
+
}
|
|
178
|
+
} : baseQuery;
|
|
179
|
+
return {
|
|
180
|
+
from,
|
|
181
|
+
size,
|
|
182
|
+
sort: [{ _score: "desc" }],
|
|
183
|
+
highlight: {
|
|
184
|
+
pre_tags: ["<mark>"],
|
|
185
|
+
post_tags: ["</mark>"],
|
|
186
|
+
fields: {
|
|
187
|
+
"path.ja": {},
|
|
188
|
+
"body.ja": {},
|
|
189
|
+
body: {}
|
|
190
|
+
}
|
|
191
|
+
},
|
|
192
|
+
query,
|
|
193
|
+
_source: ["path", "bookmark_count", "username", "grant"]
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
function pruneBool(buckets) {
|
|
197
|
+
const out = {};
|
|
198
|
+
if (buckets.must.length > 0) out.must = buckets.must;
|
|
199
|
+
if (buckets.filter.length > 0) out.filter = buckets.filter;
|
|
200
|
+
if (buckets.should.length > 0) out.should = buckets.should;
|
|
201
|
+
if (buckets.must_not.length > 0) out.must_not = buckets.must_not;
|
|
202
|
+
return out;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// src/mappings/default.json
|
|
206
|
+
var default_default = {
|
|
207
|
+
settings: {
|
|
208
|
+
analysis: {
|
|
209
|
+
filter: {
|
|
210
|
+
english_stop: {
|
|
211
|
+
type: "stop",
|
|
212
|
+
stopwords: "_english_"
|
|
213
|
+
},
|
|
214
|
+
english_stemmer: {
|
|
215
|
+
type: "stemmer",
|
|
216
|
+
language: "english"
|
|
217
|
+
},
|
|
218
|
+
english_possessive_stemmer: {
|
|
219
|
+
type: "stemmer",
|
|
220
|
+
language: "possessive_english"
|
|
221
|
+
}
|
|
222
|
+
},
|
|
223
|
+
tokenizer: {
|
|
224
|
+
ngram_tokenizer: {
|
|
225
|
+
type: "ngram",
|
|
226
|
+
min_gram: 2,
|
|
227
|
+
max_gram: 3,
|
|
228
|
+
token_chars: ["letter", "digit"]
|
|
229
|
+
}
|
|
230
|
+
},
|
|
231
|
+
analyzer: {
|
|
232
|
+
english: {
|
|
233
|
+
tokenizer: "ngram_tokenizer",
|
|
234
|
+
filter: ["english_possessive_stemmer", "lowercase", "english_stop", "english_stemmer"]
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
},
|
|
239
|
+
mappings: {
|
|
240
|
+
properties: {
|
|
241
|
+
path: {
|
|
242
|
+
type: "text",
|
|
243
|
+
fields: {
|
|
244
|
+
raw: {
|
|
245
|
+
type: "text",
|
|
246
|
+
analyzer: "keyword"
|
|
247
|
+
},
|
|
248
|
+
en: {
|
|
249
|
+
type: "text",
|
|
250
|
+
analyzer: "english"
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
},
|
|
254
|
+
body: {
|
|
255
|
+
type: "text",
|
|
256
|
+
fields: {
|
|
257
|
+
en: {
|
|
258
|
+
type: "text",
|
|
259
|
+
analyzer: "english"
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
},
|
|
263
|
+
username: {
|
|
264
|
+
type: "keyword"
|
|
265
|
+
},
|
|
266
|
+
grant: {
|
|
267
|
+
type: "integer"
|
|
268
|
+
},
|
|
269
|
+
granted_users: {
|
|
270
|
+
type: "keyword"
|
|
271
|
+
},
|
|
272
|
+
comment_count: {
|
|
273
|
+
type: "integer"
|
|
274
|
+
},
|
|
275
|
+
bookmark_count: {
|
|
276
|
+
type: "integer"
|
|
277
|
+
},
|
|
278
|
+
like_count: {
|
|
279
|
+
type: "integer"
|
|
280
|
+
},
|
|
281
|
+
created_at: {
|
|
282
|
+
type: "date",
|
|
283
|
+
format: "strict_date_optional_time"
|
|
284
|
+
},
|
|
285
|
+
updated_at: {
|
|
286
|
+
type: "date",
|
|
287
|
+
format: "strict_date_optional_time"
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
// src/mappings/kuromoji.json
|
|
294
|
+
var kuromoji_default = {
|
|
295
|
+
mappings: {
|
|
296
|
+
properties: {
|
|
297
|
+
path: {
|
|
298
|
+
fields: {
|
|
299
|
+
ja: {
|
|
300
|
+
type: "text",
|
|
301
|
+
analyzer: "kuromoji"
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
},
|
|
305
|
+
body: {
|
|
306
|
+
fields: {
|
|
307
|
+
ja: {
|
|
308
|
+
type: "text",
|
|
309
|
+
analyzer: "kuromoji"
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
};
|
|
316
|
+
|
|
317
|
+
// src/mappings/sudachi.json
|
|
318
|
+
var sudachi_default = {
|
|
319
|
+
settings: {
|
|
320
|
+
analysis: {
|
|
321
|
+
tokenizer: {
|
|
322
|
+
sudachi_tokenizer: {
|
|
323
|
+
type: "sudachi_tokenizer",
|
|
324
|
+
mode: "search"
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
analyzer: {
|
|
328
|
+
sudachi_analyzer: {
|
|
329
|
+
filter: [],
|
|
330
|
+
tokenizer: "sudachi_tokenizer",
|
|
331
|
+
type: "custom"
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
},
|
|
336
|
+
mappings: {
|
|
337
|
+
properties: {
|
|
338
|
+
path: {
|
|
339
|
+
fields: {
|
|
340
|
+
ja: {
|
|
341
|
+
type: "text",
|
|
342
|
+
analyzer: "sudachi_analyzer"
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
},
|
|
346
|
+
body: {
|
|
347
|
+
fields: {
|
|
348
|
+
ja: {
|
|
349
|
+
type: "text",
|
|
350
|
+
analyzer: "sudachi_analyzer"
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
};
|
|
357
|
+
|
|
358
|
+
// src/driver.ts
|
|
359
|
+
function applyConfig(config) {
|
|
360
|
+
if (!config.url) {
|
|
361
|
+
return {
|
|
362
|
+
client: null,
|
|
363
|
+
node: "",
|
|
364
|
+
baseIndexName: config.indexName,
|
|
365
|
+
aliasName: `${config.indexName}-current`,
|
|
366
|
+
analyzer: config.analyzer,
|
|
367
|
+
requestTimeout: config.requestTimeout
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
const { node, indexName } = parseUri(config.url);
|
|
371
|
+
const clientOpts = {
|
|
372
|
+
node,
|
|
373
|
+
requestTimeout: config.requestTimeout
|
|
374
|
+
};
|
|
375
|
+
return {
|
|
376
|
+
client: new import_elasticsearch.Client(clientOpts),
|
|
377
|
+
node,
|
|
378
|
+
baseIndexName: indexName,
|
|
379
|
+
aliasName: `${indexName}-current`,
|
|
380
|
+
analyzer: config.analyzer,
|
|
381
|
+
requestTimeout: config.requestTimeout
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
function applyConfigInPlace(target, config) {
|
|
385
|
+
const oldClient = target.client;
|
|
386
|
+
Object.assign(target, applyConfig(config));
|
|
387
|
+
return { oldClient };
|
|
388
|
+
}
|
|
389
|
+
var USER_COUNT_TTL_MS = 5 * 60 * 1e3;
|
|
390
|
+
function createElasticsearchDriver(state2, deps = {}) {
|
|
391
|
+
const log = deps.log;
|
|
392
|
+
let userCountCache = null;
|
|
393
|
+
const getCachedUserCount = async () => {
|
|
394
|
+
if (!deps.countUsers) return null;
|
|
395
|
+
const now = Date.now();
|
|
396
|
+
if (userCountCache && now - userCountCache.at < USER_COUNT_TTL_MS) {
|
|
397
|
+
return userCountCache.value;
|
|
398
|
+
}
|
|
399
|
+
const value = await deps.countUsers();
|
|
400
|
+
userCountCache = { value, at: now };
|
|
401
|
+
return value;
|
|
402
|
+
};
|
|
403
|
+
const driver = {
|
|
404
|
+
// Getters off the state ref: `reconfigure` makes these mutable, so
|
|
405
|
+
// they must always reflect the *current* state, not a boot-time
|
|
406
|
+
// literal. Tests read `driver.client` to install fakes — since the
|
|
407
|
+
// getter returns the same object reference, mutating its methods
|
|
408
|
+
// still works.
|
|
409
|
+
get aliasName() {
|
|
410
|
+
return state2.aliasName;
|
|
411
|
+
},
|
|
412
|
+
get node() {
|
|
413
|
+
return state2.node;
|
|
414
|
+
},
|
|
415
|
+
get baseIndexName() {
|
|
416
|
+
return state2.baseIndexName;
|
|
417
|
+
},
|
|
418
|
+
get client() {
|
|
419
|
+
return requireClient(state2.client);
|
|
420
|
+
},
|
|
421
|
+
async index(doc) {
|
|
422
|
+
const { client, aliasName } = snapshot(state2);
|
|
423
|
+
const source = docToEsSource(doc);
|
|
424
|
+
await client.index({
|
|
425
|
+
index: aliasName,
|
|
426
|
+
id: doc.id,
|
|
427
|
+
document: source
|
|
428
|
+
});
|
|
429
|
+
},
|
|
430
|
+
async remove(id) {
|
|
431
|
+
const { client, aliasName } = snapshot(state2);
|
|
432
|
+
try {
|
|
433
|
+
await client.delete({ index: aliasName, id });
|
|
434
|
+
} catch (err) {
|
|
435
|
+
if (isNotFoundError(err)) return;
|
|
436
|
+
throw err;
|
|
437
|
+
}
|
|
438
|
+
},
|
|
439
|
+
async query(q) {
|
|
440
|
+
const { client, aliasName } = snapshot(state2);
|
|
441
|
+
const page = q.page && q.page > 0 ? q.page : 1;
|
|
442
|
+
const limit = clampLimit(q.limit);
|
|
443
|
+
const from = (page - 1) * limit;
|
|
444
|
+
let functionScore;
|
|
445
|
+
const userCount = await getCachedUserCount();
|
|
446
|
+
if (userCount !== null) {
|
|
447
|
+
const factor = 1e4 / (userCount || 1);
|
|
448
|
+
functionScore = {
|
|
449
|
+
fieldValueFactor: { field: "bookmark_count", modifier: "log1p", factor, missing: 0 },
|
|
450
|
+
boostMode: "sum"
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
const body = buildSearchBody({
|
|
454
|
+
parsed: parseQuery(q.q),
|
|
455
|
+
pathPrefix: q.pathPrefix,
|
|
456
|
+
viewer: q.viewer,
|
|
457
|
+
grants: q.grants,
|
|
458
|
+
functionScore,
|
|
459
|
+
from,
|
|
460
|
+
size: limit
|
|
461
|
+
});
|
|
462
|
+
const response = await client.search({
|
|
463
|
+
index: aliasName,
|
|
464
|
+
...body
|
|
465
|
+
});
|
|
466
|
+
const totalRaw = response.hits?.total;
|
|
467
|
+
const total = typeof totalRaw === "number" ? totalRaw : totalRaw?.value ?? 0;
|
|
468
|
+
const rawHits = response.hits?.hits ?? [];
|
|
469
|
+
const hits = rawHits.map((h) => {
|
|
470
|
+
const source = h._source ?? {};
|
|
471
|
+
const snippet = pickSnippet(h.highlight);
|
|
472
|
+
return {
|
|
473
|
+
id: String(h._id),
|
|
474
|
+
path: source.path ?? "",
|
|
475
|
+
score: typeof h._score === "number" ? h._score : void 0,
|
|
476
|
+
...snippet ? { snippet } : {}
|
|
477
|
+
};
|
|
478
|
+
});
|
|
479
|
+
return { total, hits };
|
|
480
|
+
},
|
|
481
|
+
async rebuild() {
|
|
482
|
+
const { client, aliasName, baseIndexName, analyzer } = snapshot(state2);
|
|
483
|
+
if (!deps.iteratePages || !deps.countAllPages || !deps.getBookmarkCountsBulk) {
|
|
484
|
+
throw new Error("@crowi/plugin-search-elasticsearch: rebuild() requires iteratePages / countAllPages / getBookmarkCountsBulk deps.");
|
|
485
|
+
}
|
|
486
|
+
const newIndexName = createTimestampedIndexName(baseIndexName);
|
|
487
|
+
log?.info("rebuild: creating index %s", newIndexName);
|
|
488
|
+
const mapping = loadMapping(analyzer);
|
|
489
|
+
await client.indices.create({ index: newIndexName, ...mapping });
|
|
490
|
+
log?.info("rebuild: prefetching bookmark counts");
|
|
491
|
+
const bookmarkCounts = await deps.getBookmarkCountsBulk();
|
|
492
|
+
log?.info("rebuild: indexing all pages");
|
|
493
|
+
await indexAllPages({
|
|
494
|
+
client,
|
|
495
|
+
indexTarget: newIndexName,
|
|
496
|
+
iteratePages: deps.iteratePages,
|
|
497
|
+
countAllPages: deps.countAllPages,
|
|
498
|
+
bookmarkCounts,
|
|
499
|
+
log
|
|
500
|
+
});
|
|
501
|
+
log?.info("rebuild: switching alias %s -> %s", aliasName, newIndexName);
|
|
502
|
+
await switchAlias(client, aliasName, newIndexName);
|
|
503
|
+
log?.info("rebuild: cleaning up old indices");
|
|
504
|
+
await deleteOldIndices(client, baseIndexName, newIndexName);
|
|
505
|
+
}
|
|
506
|
+
};
|
|
507
|
+
return driver;
|
|
508
|
+
}
|
|
509
|
+
var SEARCH_NOT_CONFIGURED = "@crowi/plugin-search-elasticsearch: Search not configured (Elasticsearch url is empty).";
|
|
510
|
+
function requireClient(client) {
|
|
511
|
+
if (!client) {
|
|
512
|
+
throw new Error(SEARCH_NOT_CONFIGURED);
|
|
513
|
+
}
|
|
514
|
+
return client;
|
|
515
|
+
}
|
|
516
|
+
function snapshot(state2) {
|
|
517
|
+
return {
|
|
518
|
+
client: requireClient(state2.client),
|
|
519
|
+
aliasName: state2.aliasName,
|
|
520
|
+
baseIndexName: state2.baseIndexName,
|
|
521
|
+
analyzer: state2.analyzer
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
var DEFAULT_LIMIT = 50;
|
|
525
|
+
var MAX_LIMIT = 200;
|
|
526
|
+
function clampLimit(limit) {
|
|
527
|
+
if (!limit || limit <= 0) return DEFAULT_LIMIT;
|
|
528
|
+
return Math.min(limit, MAX_LIMIT);
|
|
529
|
+
}
|
|
530
|
+
function parseUri(uri) {
|
|
531
|
+
if (!uri.startsWith("http")) {
|
|
532
|
+
throw new Error("URL for Elasticsearch should starts with http/https");
|
|
533
|
+
}
|
|
534
|
+
const esUrl = new URL(uri);
|
|
535
|
+
const auth = esUrl.username && esUrl.password ? `${esUrl.username}:${esUrl.password}@` : "";
|
|
536
|
+
const node = `${esUrl.protocol}//${auth}${esUrl.host}`;
|
|
537
|
+
const indexName = esUrl.pathname && esUrl.pathname !== "/" ? esUrl.pathname.substring(1) : "crowi";
|
|
538
|
+
return { node, indexName };
|
|
539
|
+
}
|
|
540
|
+
function createTimestampedIndexName(base) {
|
|
541
|
+
const d = /* @__PURE__ */ new Date();
|
|
542
|
+
const pad = (n, w = 2) => String(n).padStart(w, "0");
|
|
543
|
+
const ts = `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}${pad(d.getUTCMilliseconds(), 3)}`;
|
|
544
|
+
const rnd = Math.random().toString(36).slice(2, 6).padEnd(4, "0");
|
|
545
|
+
return `${base}-${ts}-${rnd}`;
|
|
546
|
+
}
|
|
547
|
+
var TS_INDEX_RE = /^.+-\d{17}-[a-z0-9]{4}$/;
|
|
548
|
+
function loadMapping(analyzer) {
|
|
549
|
+
const base = default_default;
|
|
550
|
+
if (analyzer === "default") return base;
|
|
551
|
+
const overlay = analyzer === "kuromoji" ? kuromoji_default : sudachi_default;
|
|
552
|
+
return deepMergeMappings(base, overlay);
|
|
553
|
+
}
|
|
554
|
+
function isPlainObject(v) {
|
|
555
|
+
return typeof v === "object" && v !== null && !Array.isArray(v);
|
|
556
|
+
}
|
|
557
|
+
function deepMergeMappings(a, b) {
|
|
558
|
+
const out = { ...a };
|
|
559
|
+
for (const key of Object.keys(b)) {
|
|
560
|
+
const av = a[key];
|
|
561
|
+
const bv = b[key];
|
|
562
|
+
if (isPlainObject(av) && isPlainObject(bv)) {
|
|
563
|
+
out[key] = deepMergeMappings(av, bv);
|
|
564
|
+
} else {
|
|
565
|
+
out[key] = bv;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
return out;
|
|
569
|
+
}
|
|
570
|
+
async function indexAllPages(ctx) {
|
|
571
|
+
const allPageCount = await ctx.countAllPages();
|
|
572
|
+
let operations = [];
|
|
573
|
+
let total = 0;
|
|
574
|
+
let skipped = 0;
|
|
575
|
+
const flush = async () => {
|
|
576
|
+
if (operations.length === 0) return;
|
|
577
|
+
try {
|
|
578
|
+
const response = await ctx.client.bulk({
|
|
579
|
+
operations,
|
|
580
|
+
timeout: "1d"
|
|
581
|
+
});
|
|
582
|
+
if (response.errors) {
|
|
583
|
+
ctx.log?.warn("rebuild: bulk had item-level errors (took=%dms)", response.took);
|
|
584
|
+
}
|
|
585
|
+
} catch (err) {
|
|
586
|
+
ctx.log?.error("rebuild: bulk failed: %o", err);
|
|
587
|
+
}
|
|
588
|
+
operations = [];
|
|
589
|
+
};
|
|
590
|
+
await ctx.iteratePages(async (doc) => {
|
|
591
|
+
if (!doc.creator || !doc.revision || !shouldIndex(doc)) {
|
|
592
|
+
skipped++;
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
595
|
+
total++;
|
|
596
|
+
const id = typeof doc._id === "string" ? doc._id : doc._id.toString();
|
|
597
|
+
const bookmarkCount = ctx.bookmarkCounts.get(id) ?? 0;
|
|
598
|
+
const source = pageStreamDocToEsSource(doc, bookmarkCount);
|
|
599
|
+
operations.push({ index: { _index: ctx.indexTarget, _id: id } });
|
|
600
|
+
operations.push(source);
|
|
601
|
+
if (operations.length >= 4e3) {
|
|
602
|
+
await flush();
|
|
603
|
+
}
|
|
604
|
+
});
|
|
605
|
+
await flush();
|
|
606
|
+
ctx.log?.info("rebuild: indexed total=%d skipped=%d (allPageCount=%d)", total, skipped, allPageCount);
|
|
607
|
+
}
|
|
608
|
+
function shouldIndex(doc) {
|
|
609
|
+
if (doc.redirectTo !== null && doc.redirectTo !== void 0) return false;
|
|
610
|
+
if (doc.status === "deleted") return false;
|
|
611
|
+
if (doc.status === "draft") return false;
|
|
612
|
+
return true;
|
|
613
|
+
}
|
|
614
|
+
async function switchAlias(client, aliasName, newIndex) {
|
|
615
|
+
const aliasInfo = await getCurrentAliasInfo(client, aliasName);
|
|
616
|
+
const actions = [{ add: { index: newIndex, alias: aliasName } }];
|
|
617
|
+
if (aliasInfo) {
|
|
618
|
+
actions.push({ remove: { index: aliasInfo.index, alias: aliasName } });
|
|
619
|
+
}
|
|
620
|
+
await client.indices.updateAliases({ actions });
|
|
621
|
+
}
|
|
622
|
+
async function getCurrentAliasInfo(client, aliasName) {
|
|
623
|
+
try {
|
|
624
|
+
const exists = await client.indices.existsAlias({ name: aliasName });
|
|
625
|
+
if (!exists) return null;
|
|
626
|
+
} catch {
|
|
627
|
+
return null;
|
|
628
|
+
}
|
|
629
|
+
const aliases = await client.cat.aliases({ name: aliasName, format: "json" });
|
|
630
|
+
const list = aliases;
|
|
631
|
+
return list.length > 0 ? { alias: list[0].alias, index: list[0].index } : null;
|
|
632
|
+
}
|
|
633
|
+
async function deleteOldIndices(client, baseIndexName, keepIndexName) {
|
|
634
|
+
const indices = await client.cat.indices({ index: `${baseIndexName}-*`, format: "json" });
|
|
635
|
+
const list = indices;
|
|
636
|
+
const toDelete = list.map((i) => i.index).filter((name) => name.startsWith(`${baseIndexName}-`) && name !== keepIndexName && TS_INDEX_RE.test(name));
|
|
637
|
+
if (toDelete.length === 0) return;
|
|
638
|
+
await client.indices.delete({ index: toDelete });
|
|
639
|
+
}
|
|
640
|
+
function isNotFoundError(err) {
|
|
641
|
+
if (!err || typeof err !== "object") return false;
|
|
642
|
+
const e = err;
|
|
643
|
+
return e.statusCode === 404 || e.meta?.statusCode === 404;
|
|
644
|
+
}
|
|
645
|
+
function docToEsSource(doc) {
|
|
646
|
+
const meta = doc.meta ?? {};
|
|
647
|
+
const source = {
|
|
648
|
+
path: doc.path,
|
|
649
|
+
body: doc.body
|
|
650
|
+
};
|
|
651
|
+
const username = readString(meta.username);
|
|
652
|
+
if (username !== void 0) source.username = username;
|
|
653
|
+
const grant = readNumber(meta.grant);
|
|
654
|
+
if (grant !== void 0) source.grant = grant;
|
|
655
|
+
const grantedUsers = readStringArray(meta.granted_users ?? meta.grantedUsers);
|
|
656
|
+
if (grantedUsers !== void 0) source.granted_users = grantedUsers;
|
|
657
|
+
const commentCount = readNumber(meta.comment_count ?? meta.commentCount);
|
|
658
|
+
if (commentCount !== void 0) source.comment_count = commentCount;
|
|
659
|
+
const bookmarkCount = readNumber(meta.bookmark_count ?? meta.bookmarkCount);
|
|
660
|
+
if (bookmarkCount !== void 0) source.bookmark_count = bookmarkCount;
|
|
661
|
+
const likeCount = readNumber(meta.like_count ?? meta.likeCount);
|
|
662
|
+
if (likeCount !== void 0) source.like_count = likeCount;
|
|
663
|
+
const createdAt = readDateLike(meta.created_at ?? meta.createdAt);
|
|
664
|
+
if (createdAt !== void 0) source.created_at = createdAt;
|
|
665
|
+
const updatedAt = readDateLike(meta.updated_at ?? meta.updatedAt);
|
|
666
|
+
if (updatedAt !== void 0) source.updated_at = updatedAt;
|
|
667
|
+
return source;
|
|
668
|
+
}
|
|
669
|
+
function pageStreamDocToEsSource(doc, bookmarkCount) {
|
|
670
|
+
const grantedUsers = (doc.grantedUsers ?? []).map((u) => typeof u === "string" ? u : u.toString());
|
|
671
|
+
const searchable = {
|
|
672
|
+
id: typeof doc._id === "string" ? doc._id : doc._id.toString(),
|
|
673
|
+
path: doc.path,
|
|
674
|
+
body: doc.revision?.body ?? "",
|
|
675
|
+
meta: {
|
|
676
|
+
username: doc.creator?.username,
|
|
677
|
+
grant: doc.grant,
|
|
678
|
+
granted_users: grantedUsers,
|
|
679
|
+
comment_count: doc.commentCount ?? 0,
|
|
680
|
+
bookmark_count: bookmarkCount,
|
|
681
|
+
like_count: doc.liker?.length ?? 0,
|
|
682
|
+
created_at: doc.createdAt,
|
|
683
|
+
updated_at: doc.updatedAt
|
|
684
|
+
}
|
|
685
|
+
};
|
|
686
|
+
return docToEsSource(searchable);
|
|
687
|
+
}
|
|
688
|
+
function pickSnippet(highlight) {
|
|
689
|
+
if (!highlight) return void 0;
|
|
690
|
+
for (const field of ["body.ja", "body", "path.ja", "body.en", "path.en"]) {
|
|
691
|
+
const fragments = highlight[field];
|
|
692
|
+
if (fragments && fragments.length > 0) return fragments[0];
|
|
693
|
+
}
|
|
694
|
+
return void 0;
|
|
695
|
+
}
|
|
696
|
+
function readString(value) {
|
|
697
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
698
|
+
}
|
|
699
|
+
function readNumber(value) {
|
|
700
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
701
|
+
return void 0;
|
|
702
|
+
}
|
|
703
|
+
function readStringArray(value) {
|
|
704
|
+
if (!Array.isArray(value)) return void 0;
|
|
705
|
+
const out = [];
|
|
706
|
+
for (const v of value) {
|
|
707
|
+
if (typeof v === "string") out.push(v);
|
|
708
|
+
else if (v && typeof v === "object" && typeof v.toString === "function") {
|
|
709
|
+
out.push(v.toString());
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
return out;
|
|
713
|
+
}
|
|
714
|
+
function readDateLike(value) {
|
|
715
|
+
if (value instanceof Date) return value;
|
|
716
|
+
if (typeof value === "string") return value;
|
|
717
|
+
return void 0;
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// src/index.ts
|
|
721
|
+
var ElasticsearchConfigSchema = import_v3.z.object({
|
|
722
|
+
/**
|
|
723
|
+
* `https://[user:pass@]host[:port][/indexName]`. Empty string keeps
|
|
724
|
+
* the driver registered but disabled — `query()` will throw a
|
|
725
|
+
* helpful error and `index()` becomes a no-op.
|
|
726
|
+
*
|
|
727
|
+
* Marked `@sensitive` because the URL embeds the cluster password
|
|
728
|
+
* (Bonsai-style `https://USER:PASS@HOST/INDEX`); we don't want
|
|
729
|
+
* Mongo to keep it in plaintext.
|
|
730
|
+
*/
|
|
731
|
+
url: import_v3.z.string().describe("@sensitive Elasticsearch endpoint (https://USER:PASS@HOST/INDEX format).").default(""),
|
|
732
|
+
/**
|
|
733
|
+
* Base index name. Used as the `indexName` if not provided in the
|
|
734
|
+
* URL path. The runtime alias `${indexName}-current` is what the
|
|
735
|
+
* driver actually targets for read / write.
|
|
736
|
+
*/
|
|
737
|
+
indexName: import_v3.z.string().default("crowi"),
|
|
738
|
+
requestTimeout: import_v3.z.number().int().positive().default(5e3),
|
|
739
|
+
/**
|
|
740
|
+
* Mapping flavour. Cluster requirements:
|
|
741
|
+
* - `default`: no extra ES plugin.
|
|
742
|
+
* - `kuromoji`: `analysis-kuromoji` plugin (Elastic-distributed).
|
|
743
|
+
* The dev image (`elasticsearch.Dockerfile`) preinstalls it.
|
|
744
|
+
* - `sudachi`: third-party `analysis-sudachi` plugin + dictionary.
|
|
745
|
+
* NOT bundled in the dev image; operators must build a derived
|
|
746
|
+
* image. Picking this without the plugin makes `rebuild()` fail.
|
|
747
|
+
*/
|
|
748
|
+
analyzer: import_v3.z.enum(["default", "kuromoji", "sudachi"]).describe("default / kuromoji (analysis-kuromoji plugin) / sudachi (analysis-sudachi plugin + dictionary, custom image required)").default("default")
|
|
749
|
+
}).strict();
|
|
750
|
+
var PLUGIN_NAME = "@crowi/plugin-search-elasticsearch";
|
|
751
|
+
var state = null;
|
|
752
|
+
function toDriverConfig(config) {
|
|
753
|
+
return {
|
|
754
|
+
url: config.url,
|
|
755
|
+
indexName: config.indexName,
|
|
756
|
+
requestTimeout: config.requestTimeout,
|
|
757
|
+
analyzer: config.analyzer
|
|
758
|
+
};
|
|
759
|
+
}
|
|
760
|
+
var plugin = {
|
|
761
|
+
name: PLUGIN_NAME,
|
|
762
|
+
version: "0.1.0-dev",
|
|
763
|
+
configSchema: ElasticsearchConfigSchema,
|
|
764
|
+
adminPlacement: {
|
|
765
|
+
label: "Elasticsearch",
|
|
766
|
+
icon: "search"
|
|
767
|
+
// section omitted: derived from registerSearch -> 'search'
|
|
768
|
+
},
|
|
769
|
+
registerSearch: (registry, ctx) => {
|
|
770
|
+
const config = ctx.config();
|
|
771
|
+
if (!config.url) {
|
|
772
|
+
ctx.log.warn("url is empty; the elasticsearch search driver is disabled until configured.");
|
|
773
|
+
return;
|
|
774
|
+
}
|
|
775
|
+
state = applyConfig(toDriverConfig(config));
|
|
776
|
+
const driver = buildDriver(state, ctx);
|
|
777
|
+
registry.register("elasticsearch", driver);
|
|
778
|
+
ctx.log.debug("registered elasticsearch search driver (node=%s, indexName=%s, analyzer=%s)", driver.node, driver.baseIndexName, config.analyzer);
|
|
779
|
+
},
|
|
780
|
+
reconfigure: (ctx) => {
|
|
781
|
+
if (!state) {
|
|
782
|
+
ctx.log.warn("reconfigure: driver was not registered at boot (url was empty); a server restart is required to enable Elasticsearch search.");
|
|
783
|
+
return;
|
|
784
|
+
}
|
|
785
|
+
const config = ctx.config();
|
|
786
|
+
if (!config.url) {
|
|
787
|
+
ctx.log.warn('reconfigure: url cleared; search requests will fail with a "Search not configured" error until a url is set.');
|
|
788
|
+
}
|
|
789
|
+
const { oldClient } = applyConfigInPlace(state, toDriverConfig(config));
|
|
790
|
+
if (oldClient) {
|
|
791
|
+
void oldClient.close().catch((err) => {
|
|
792
|
+
ctx.log.warn("reconfigure: closing the previous Elasticsearch client failed: %o", err);
|
|
793
|
+
});
|
|
794
|
+
}
|
|
795
|
+
ctx.log.debug("reconfigured elasticsearch search driver (node=%s, index=%s, analyzer=%s)", state.node || "<unset>", state.baseIndexName, config.analyzer);
|
|
796
|
+
}
|
|
797
|
+
};
|
|
798
|
+
var index_default = plugin;
|
|
799
|
+
function buildDriver(driverState, ctx) {
|
|
800
|
+
const Page = ctx.model("Page");
|
|
801
|
+
const Bookmark = ctx.model("Bookmark");
|
|
802
|
+
const User = ctx.model("User");
|
|
803
|
+
return createElasticsearchDriver(driverState, {
|
|
804
|
+
log: ctx.log,
|
|
805
|
+
iteratePages: async (handler) => {
|
|
806
|
+
const cursor = Page.getStreamOfFindAll({ publicOnly: false });
|
|
807
|
+
await cursor.eachAsync(handler);
|
|
808
|
+
},
|
|
809
|
+
countAllPages: () => Page.allPageCount(),
|
|
810
|
+
getBookmarkCountsBulk: async () => {
|
|
811
|
+
const rows = await Bookmark.aggregate([{ $group: { _id: "$page", n: { $sum: 1 } } }]);
|
|
812
|
+
const map = /* @__PURE__ */ new Map();
|
|
813
|
+
for (const row of rows) {
|
|
814
|
+
const key = typeof row._id === "string" ? row._id : row._id.toString();
|
|
815
|
+
map.set(key, row.n);
|
|
816
|
+
}
|
|
817
|
+
return map;
|
|
818
|
+
},
|
|
819
|
+
countUsers: () => User.countDocuments({}).exec()
|
|
820
|
+
});
|
|
821
|
+
}
|
|
822
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
823
|
+
0 && (module.exports = {
|
|
824
|
+
ElasticsearchConfigSchema,
|
|
825
|
+
applyConfig,
|
|
826
|
+
applyConfigInPlace,
|
|
827
|
+
buildSearchBody,
|
|
828
|
+
createElasticsearchDriver,
|
|
829
|
+
parseQuery
|
|
830
|
+
});
|
|
831
|
+
//# sourceMappingURL=index.js.map
|