@tinacms/search 0.0.0-bf8b9b7-20251204000148 → 0.0.0-c19d29e-20251224001156

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -0,0 +1,55 @@
1
+ # @tinacms/search
2
+
3
+ Full-text search for TinaCMS powered by [search-index](https://github.com/fergiemcdowall/search-index).
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pnpm add @tinacms/search
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```typescript
14
+ import { LocalSearchIndexClient } from "@tinacms/search";
15
+
16
+ const client = new LocalSearchIndexClient({
17
+ stopwordLanguages: ["eng"],
18
+ });
19
+
20
+ await client.onStartIndexing();
21
+
22
+ await client.put([
23
+ {
24
+ _id: "1",
25
+ title: "Getting Started",
26
+ body: "TinaCMS is a Git-backed headless CMS",
27
+ },
28
+ {
29
+ _id: "2",
30
+ title: "React Tutorial",
31
+ body: "Learn how to build React applications",
32
+ },
33
+ ]);
34
+
35
+ // Basic search
36
+ const results = await client.query("TinaCMS", { limit: 10 });
37
+
38
+ // Fuzzy search (handles typos)
39
+ const fuzzyResults = await client.query("TinCMS tutrial", {
40
+ fuzzy: true,
41
+ limit: 10,
42
+ });
43
+ ```
44
+
45
+ ## API
46
+
47
+ - `client.onStartIndexing()` - Initialize the index
48
+ - `client.put(documents)` - Index documents
49
+ - `client.query(query, options)` - Search the index
50
+ - `client.del(ids)` - Delete documents
51
+ - `client.export(filename)` - Export index to SQLite
52
+
53
+ ## License
54
+
55
+ Apache 2.0
@@ -1,5 +1,6 @@
1
- import type { SearchClient } from '../types';
1
+ import type { SearchClient, SearchOptions, SearchQueryResponse, IndexableDocument, SearchIndex } from '../types';
2
2
  import { MemoryLevel } from 'memory-level';
3
+ import { FuzzySearchWrapper } from '../fuzzy-search-wrapper';
3
4
  type TinaSearchIndexerClientOptions = {
4
5
  stopwordLanguages?: string[];
5
6
  tokenSplitRegex?: string;
@@ -10,23 +11,16 @@ type TinaCloudSearchIndexerClientOptions = {
10
11
  indexerToken: string;
11
12
  } & TinaSearchIndexerClientOptions;
12
13
  export declare class LocalSearchIndexClient implements SearchClient {
13
- searchIndex: any;
14
+ searchIndex?: SearchIndex;
14
15
  protected readonly memoryLevel: MemoryLevel;
15
16
  private readonly stopwords;
16
17
  private readonly tokenSplitRegex;
18
+ fuzzySearchWrapper?: FuzzySearchWrapper;
17
19
  constructor(options: TinaSearchIndexerClientOptions);
18
20
  onStartIndexing(): Promise<void>;
19
- put(docs: any[]): Promise<any>;
20
- del(ids: string[]): Promise<any>;
21
- query(query: string, options: {
22
- cursor?: string;
23
- limit?: number;
24
- } | undefined): Promise<{
25
- results: any[];
26
- total: number;
27
- nextCursor: string | null;
28
- prevCursor: string | null;
29
- }>;
21
+ put(docs: IndexableDocument[]): Promise<void>;
22
+ del(ids: string[]): Promise<void>;
23
+ query(query: string, options?: SearchOptions): Promise<SearchQueryResponse>;
30
24
  export(filename: string): Promise<void>;
31
25
  }
32
26
  export declare class TinaCMSSearchIndexClient extends LocalSearchIndexClient {
@@ -34,6 +28,9 @@ export declare class TinaCMSSearchIndexClient extends LocalSearchIndexClient {
34
28
  private readonly branch;
35
29
  private readonly indexerToken;
36
30
  constructor(options: TinaCloudSearchIndexerClientOptions);
31
+ private getUploadUrl;
32
+ private serializeIndex;
33
+ private uploadIndex;
37
34
  onFinishIndexing(): Promise<void>;
38
35
  }
39
36
  export {};
@@ -0,0 +1,11 @@
1
+ import type { FuzzyMatch, FuzzySearchOptions } from './types';
2
+ export declare class FuzzyCache {
3
+ private cache;
4
+ private maxSize;
5
+ constructor(maxSize?: number);
6
+ private getCacheKey;
7
+ get(query: string, options: Partial<FuzzySearchOptions>): FuzzyMatch[] | undefined;
8
+ set(query: string, options: Partial<FuzzySearchOptions>, results: FuzzyMatch[]): void;
9
+ clear(): void;
10
+ get size(): number;
11
+ }
@@ -0,0 +1,8 @@
1
+ import type { FuzzySearchOptions, FuzzyMatch } from './types';
2
+ export declare const PREFIX_MATCH_MIN_SIMILARITY = 0.8;
3
+ export declare function levenshteinDistance(str1: string, str2: string): number;
4
+ export declare function similarityScore(str1: string, str2: string, useTranspositions?: boolean): number;
5
+ export declare function damerauLevenshteinDistance(str1: string, str2: string): number;
6
+ export declare function getNgrams(str: string, n?: number): Set<string>;
7
+ export declare function ngramOverlap(ngrams1: Set<string>, ngrams2: Set<string>): number;
8
+ export declare function findSimilarTerms(query: string, dictionary: string[], options?: FuzzySearchOptions): FuzzyMatch[];
@@ -0,0 +1,4 @@
1
+ export type { FuzzySearchOptions, FuzzyMatch } from './types';
2
+ export { DEFAULT_FUZZY_OPTIONS } from './types';
3
+ export { FuzzyCache } from './cache';
4
+ export { levenshteinDistance, similarityScore, damerauLevenshteinDistance, findSimilarTerms, getNgrams, ngramOverlap, PREFIX_MATCH_MIN_SIMILARITY, } from './distance';
@@ -0,0 +1,19 @@
1
+ export interface FuzzySearchOptions {
2
+ maxDistance?: number;
3
+ minSimilarity?: number;
4
+ maxResults?: number;
5
+ useTranspositions?: boolean;
6
+ caseSensitive?: boolean;
7
+ /** Use n-gram filtering for candidate selection (supports transpositions) */
8
+ useNgramFilter?: boolean;
9
+ /** Size of n-grams for filtering (default: 2) */
10
+ ngramSize?: number;
11
+ /** Minimum n-gram overlap ratio to consider a candidate (0-1, default: 0.2) */
12
+ minNgramOverlap?: number;
13
+ }
14
+ export interface FuzzyMatch {
15
+ term: string;
16
+ distance: number;
17
+ similarity: number;
18
+ }
19
+ export declare const DEFAULT_FUZZY_OPTIONS: Required<FuzzySearchOptions>;
@@ -0,0 +1,23 @@
1
+ import type { FuzzySearchOptions, FuzzyMatch } from './fuzzy';
2
+ import type { SearchQueryResponse, SearchIndex } from './types';
3
+ interface QueryOptions {
4
+ limit?: number;
5
+ cursor?: string;
6
+ fuzzyOptions?: FuzzySearchOptions;
7
+ }
8
+ export declare class FuzzySearchWrapper {
9
+ private cache;
10
+ private searchIndex;
11
+ constructor(searchIndex: SearchIndex, cacheSize?: number);
12
+ getDictionary(field?: string): Promise<string[]>;
13
+ findSimilar(query: string, field?: string, options?: FuzzySearchOptions): Promise<FuzzyMatch[]>;
14
+ expandQuery(query: string, options?: FuzzySearchOptions): Promise<{
15
+ original: string[];
16
+ expanded: string[];
17
+ matches: Record<string, FuzzyMatch[]>;
18
+ }>;
19
+ query(query: string, options?: QueryOptions): Promise<SearchQueryResponse>;
20
+ clearCache(): void;
21
+ getCacheSize(): number;
22
+ }
23
+ export {};
@@ -1,18 +1,28 @@
1
- export type { SearchClient } from './types';
1
+ export type { SearchClient, SearchResult, SearchQueryResponse, IndexableDocument, SearchOptions, } from './types';
2
+ export type { FuzzySearchOptions, FuzzyMatch } from './fuzzy';
2
3
  export { processDocumentForIndexing } from './indexer/utils';
3
- export declare const queryToSearchIndexQuery: (query: string, stopwordLanguages?: string[]) => any;
4
- export declare const optionsToSearchIndexOptions: (options?: {
4
+ import type { FuzzyMatch } from './fuzzy';
5
+ import type { SearchResult, SearchQueryResponse } from './types';
6
+ interface SearchQuery {
7
+ AND: string[];
8
+ }
9
+ interface PaginationOptions {
5
10
  limit?: number;
6
11
  cursor?: string;
7
- }) => {
8
- PAGE?: {};
9
- };
10
- export declare const parseSearchIndexResponse: (data: any, options?: {
11
- limit?: number;
12
- cursor?: string;
13
- }) => {
14
- results: any;
15
- total: any;
16
- prevCursor: any;
17
- nextCursor: string;
18
- };
12
+ }
13
+ interface PageOptions {
14
+ PAGE?: {
15
+ SIZE: number;
16
+ NUMBER: number;
17
+ };
18
+ }
19
+ interface SearchIndexResponse {
20
+ RESULT: SearchResult[];
21
+ RESULT_LENGTH: number;
22
+ FUZZY_MATCHES?: Record<string, FuzzyMatch[]>;
23
+ NEXT_CURSOR?: string | null;
24
+ PREV_CURSOR?: string | null;
25
+ }
26
+ export declare const queryToSearchIndexQuery: (query: string, stopwordLanguages?: string[]) => SearchQuery;
27
+ export declare const optionsToSearchIndexOptions: (options?: PaginationOptions) => PageOptions;
28
+ export declare const parseSearchIndexResponse: (data: SearchIndexResponse, options?: PaginationOptions) => SearchQueryResponse;
@@ -5,195 +5,202 @@ var __publicField = (obj, key, value) => {
5
5
  return value;
6
6
  };
7
7
  import * as sw from "stopword";
8
+ const INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
8
9
  class StringBuilder {
9
10
  constructor(limit) {
10
- __publicField(this, "buffer");
11
- __publicField(this, "length", 0);
11
+ __publicField(this, "buffer", []);
12
12
  __publicField(this, "limit");
13
- this.buffer = [];
13
+ __publicField(this, "length", 0);
14
14
  this.limit = limit;
15
15
  }
16
16
  append(str) {
17
- if (this.length + str.length > this.limit) {
17
+ if (this.length + str.length > this.limit)
18
18
  return true;
19
- } else {
20
- this.buffer.push(str);
21
- this.length += str.length;
22
- if (this.length > this.limit) {
23
- return true;
24
- }
25
- return false;
26
- }
19
+ this.buffer.push(str);
20
+ this.length += str.length;
21
+ return this.length > this.limit;
27
22
  }
28
23
  toString() {
29
24
  return this.buffer.join(" ");
30
25
  }
31
26
  }
32
- const extractText = (data, acc, indexableNodeTypes) => {
33
- var _a, _b;
34
- if (data) {
35
- if (indexableNodeTypes.indexOf(data.type) !== -1 && (data.text || data.value)) {
36
- const tokens = tokenizeString(data.text || data.value);
37
- for (const token of tokens) {
38
- if (acc.append(token)) {
39
- return;
40
- }
41
- }
27
+ const tokenizeString = (str) => {
28
+ return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
29
+ };
30
+ const extractText = (data, builder, nodeTypes) => {
31
+ var _a;
32
+ if (!data)
33
+ return;
34
+ if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
35
+ const tokens = tokenizeString(data.text || data.value || "");
36
+ for (const token of tokens) {
37
+ if (builder.append(token))
38
+ return;
42
39
  }
43
- (_b = (_a = data.children) == null ? void 0 : _a.forEach) == null ? void 0 : _b.call(
44
- _a,
45
- (child) => extractText(child, acc, indexableNodeTypes)
46
- );
47
40
  }
41
+ (_a = data.children) == null ? void 0 : _a.forEach((child) => extractText(child, builder, nodeTypes));
48
42
  };
49
- const relativePath = (path, collection) => {
43
+ const getRelativePath = (path, collection) => {
50
44
  return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
51
45
  };
52
- const tokenizeString = (str) => {
53
- return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
54
- };
55
- const processTextFieldValue = (value, maxLen) => {
46
+ const processTextField = (value, maxLength) => {
56
47
  const tokens = tokenizeString(value);
57
- const builder = new StringBuilder(maxLen);
48
+ const builder = new StringBuilder(maxLength);
58
49
  for (const part of tokens) {
59
- if (builder.append(part)) {
50
+ if (builder.append(part))
60
51
  break;
61
- }
62
52
  }
63
53
  return builder.toString();
64
54
  };
55
+ const processRichTextField = (value, maxLength) => {
56
+ const builder = new StringBuilder(maxLength);
57
+ extractText(value, builder, INDEXABLE_NODE_TYPES);
58
+ return builder.toString();
59
+ };
60
+ const processObjectField = (data, path, collection, textIndexLength, field) => {
61
+ if (field.list) {
62
+ return data.map(
63
+ (obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
64
+ );
65
+ }
66
+ return processDocumentForIndexing(
67
+ data,
68
+ path,
69
+ collection,
70
+ textIndexLength,
71
+ field
72
+ );
73
+ };
74
+ const processStringField = (data, maxLength, isList) => {
75
+ if (isList) {
76
+ return data.map(
77
+ (value) => processTextField(value, maxLength)
78
+ );
79
+ }
80
+ return processTextField(data, maxLength);
81
+ };
82
+ const processRichTextFieldData = (data, maxLength, isList) => {
83
+ if (isList) {
84
+ return data.map(
85
+ (value) => processRichTextField(value, maxLength)
86
+ );
87
+ }
88
+ return processRichTextField(data, maxLength);
89
+ };
65
90
  const processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
66
91
  if (!field) {
67
- const relPath = relativePath(path, collection);
68
- data["_id"] = `${collection.name}:${relPath}`;
69
- data["_relativePath"] = relPath;
92
+ const relativePath = getRelativePath(path, collection);
93
+ data["_id"] = `${collection.name}:${relativePath}`;
94
+ data["_relativePath"] = relativePath;
70
95
  }
71
- for (const f of (field == null ? void 0 : field.fields) || collection.fields || []) {
96
+ const fields = (field == null ? void 0 : field.fields) || collection.fields || [];
97
+ for (const f of fields) {
72
98
  if (!f.searchable) {
73
99
  delete data[f.name];
74
100
  continue;
75
101
  }
76
- const isList = f.list;
77
- if (data[f.name]) {
78
- if (f.type === "object") {
79
- if (isList) {
80
- data[f.name] = data[f.name].map(
81
- (obj) => processDocumentForIndexing(
82
- obj,
83
- path,
84
- collection,
85
- textIndexLength,
86
- f
87
- )
88
- );
89
- } else {
90
- data[f.name] = processDocumentForIndexing(
91
- data[f.name],
92
- path,
93
- collection,
94
- textIndexLength,
95
- f
96
- );
97
- }
98
- } else if (f.type === "string") {
99
- const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
100
- if (isList) {
101
- data[f.name] = data[f.name].map(
102
- (value) => processTextFieldValue(value, fieldTextIndexLength)
103
- );
104
- } else {
105
- data[f.name] = processTextFieldValue(
106
- data[f.name],
107
- fieldTextIndexLength
108
- );
109
- }
110
- } else if (f.type === "rich-text") {
111
- const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
112
- if (isList) {
113
- data[f.name] = data[f.name].map((value) => {
114
- const acc = new StringBuilder(fieldTextIndexLength);
115
- extractText(value, acc, ["text", "code_block", "html"]);
116
- return acc.toString();
117
- });
118
- } else {
119
- const acc = new StringBuilder(fieldTextIndexLength);
120
- extractText(data[f.name], acc, ["text", "code_block", "html"]);
121
- data[f.name] = acc.toString();
122
- }
123
- }
102
+ if (!data[f.name])
103
+ continue;
104
+ const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
105
+ const isList = Boolean(f.list);
106
+ switch (f.type) {
107
+ case "object":
108
+ data[f.name] = processObjectField(
109
+ data[f.name],
110
+ path,
111
+ collection,
112
+ textIndexLength,
113
+ f
114
+ );
115
+ break;
116
+ case "string":
117
+ data[f.name] = processStringField(
118
+ data[f.name],
119
+ fieldMaxLength,
120
+ isList
121
+ );
122
+ break;
123
+ case "rich-text":
124
+ data[f.name] = processRichTextFieldData(
125
+ data[f.name],
126
+ fieldMaxLength,
127
+ isList
128
+ );
129
+ break;
124
130
  }
125
131
  }
126
132
  return data;
127
133
  };
128
- const memo = {};
134
+ const stopwordCache = {};
129
135
  const lookupStopwords = (keys, defaultStopWords = sw.eng) => {
130
- let stopwords = defaultStopWords;
131
- if (keys) {
132
- if (memo[keys.join(",")]) {
133
- return memo[keys.join(",")];
134
- }
135
- stopwords = [];
136
- for (const key of keys) {
137
- stopwords.push(...sw[key]);
138
- }
139
- memo[keys.join(",")] = stopwords;
136
+ if (!keys) {
137
+ return defaultStopWords;
140
138
  }
139
+ const cacheKey = keys.join(",");
140
+ if (stopwordCache[cacheKey]) {
141
+ return stopwordCache[cacheKey];
142
+ }
143
+ const stopwords = keys.flatMap((key) => sw[key] || []);
144
+ stopwordCache[cacheKey] = stopwords;
141
145
  return stopwords;
142
146
  };
143
147
  const queryToSearchIndexQuery = (query, stopwordLanguages) => {
144
- let q;
145
148
  const parts = query.split(" ");
146
149
  const stopwords = lookupStopwords(stopwordLanguages);
147
150
  if (parts.length === 1) {
148
- q = { AND: [parts[0]] };
149
- } else {
150
- q = {
151
- AND: parts.filter(
152
- (part) => part.toLowerCase() !== "and" && stopwords.indexOf(part.toLowerCase()) === -1
153
- )
154
- };
151
+ return { AND: [parts[0]] };
155
152
  }
156
- return q;
153
+ const filteredParts = parts.filter(
154
+ (part) => part.toLowerCase() !== "and" && !stopwords.includes(part.toLowerCase())
155
+ );
156
+ return { AND: filteredParts };
157
157
  };
158
158
  const optionsToSearchIndexOptions = (options) => {
159
- const opt = {};
160
- if (options == null ? void 0 : options.limit) {
161
- opt["PAGE"] = {
159
+ if (!(options == null ? void 0 : options.limit))
160
+ return {};
161
+ return {
162
+ PAGE: {
162
163
  SIZE: options.limit,
163
- NUMBER: (options == null ? void 0 : options.cursor) ? parseInt(options.cursor) : 0
164
- };
165
- }
166
- return opt;
164
+ NUMBER: options.cursor ? parseInt(options.cursor) : 0
165
+ }
166
+ };
167
167
  };
168
168
  const parseSearchIndexResponse = (data, options) => {
169
- const results = data["RESULT"];
170
- const total = data["RESULT_LENGTH"];
171
- if ((options == null ? void 0 : options.cursor) && (options == null ? void 0 : options.limit)) {
172
- const prevCursor = options.cursor === "0" ? null : (parseInt(options.cursor) - 1).toString();
173
- const nextCursor = total <= (parseInt(options.cursor) + 1) * options.limit ? null : (parseInt(options.cursor) + 1).toString();
169
+ const resultArray = (data == null ? void 0 : data.RESULT) ?? (data == null ? void 0 : data.results);
170
+ if (!data || !Array.isArray(resultArray)) {
174
171
  return {
175
- results,
176
- total,
177
- prevCursor,
178
- nextCursor
179
- };
180
- } else if (!(options == null ? void 0 : options.cursor) && (options == null ? void 0 : options.limit)) {
181
- const prevCursor = null;
182
- const nextCursor = total <= options.limit ? null : "1";
183
- return {
184
- results,
185
- total,
186
- prevCursor,
187
- nextCursor
172
+ results: [],
173
+ total: 0,
174
+ prevCursor: null,
175
+ nextCursor: null,
176
+ fuzzyMatches: void 0
188
177
  };
189
- } else {
178
+ }
179
+ const results = data.RESULT ?? data.results;
180
+ const total = data.RESULT_LENGTH ?? data.total ?? 0;
181
+ const fuzzyMatches = data.FUZZY_MATCHES ?? data.fuzzyMatches;
182
+ const nextCursor = data.NEXT_CURSOR ?? data.nextCursor;
183
+ const prevCursor = data.PREV_CURSOR ?? data.prevCursor;
184
+ if (nextCursor !== void 0 || prevCursor !== void 0) {
190
185
  return {
191
186
  results,
192
187
  total,
193
- prevCursor: null,
194
- nextCursor: null
188
+ prevCursor: prevCursor ?? null,
189
+ nextCursor: nextCursor ?? null,
190
+ fuzzyMatches
195
191
  };
196
192
  }
193
+ const currentPage = (options == null ? void 0 : options.cursor) ? parseInt(options.cursor) : 0;
194
+ const pageSize = options == null ? void 0 : options.limit;
195
+ const hasPreviousPage = currentPage > 0;
196
+ const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
197
+ return {
198
+ results,
199
+ total,
200
+ prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
201
+ nextCursor: hasNextPage ? (currentPage + 1).toString() : null,
202
+ fuzzyMatches
203
+ };
197
204
  };
198
205
  export {
199
206
  optionsToSearchIndexOptions,
package/dist/index.d.ts CHANGED
@@ -1,5 +1,10 @@
1
- import si from 'search-index';
1
+ import createSearchIndex from 'search-index';
2
2
  export { SearchIndexer } from './indexer';
3
3
  export { LocalSearchIndexClient, TinaCMSSearchIndexClient } from './client';
4
- export type { SearchClient } from './types';
5
- export { si };
4
+ export type { SearchClient, SearchOptions, SearchResult, SearchQueryResponse, IndexableDocument, SearchIndexResult, SearchIndex, } from './types';
5
+ export type { FuzzySearchOptions, FuzzyMatch } from './fuzzy';
6
+ export { levenshteinDistance, similarityScore, damerauLevenshteinDistance, findSimilarTerms, FuzzyCache, DEFAULT_FUZZY_OPTIONS, } from './fuzzy';
7
+ export { FuzzySearchWrapper } from './fuzzy-search-wrapper';
8
+ export { buildPageOptions, buildPaginationCursors } from './pagination';
9
+ export type { PaginationOptions, PageOptions, PaginationCursors, } from './pagination';
10
+ export { createSearchIndex };