pdf-search-highlight 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # pdf-search-highlight
2
2
 
3
- PDF viewer with text search and highlight. Render PDF, search text with flexible whitespace matching, and navigate between highlighted results. Zoom in/out and download PDF files.
3
+ PDF viewer with text search and highlight. Render PDF, search text with flexible whitespace matching or fuzzy (approximate) matching, and navigate between highlighted results. Supports multi-context search with different highlight colors. Zoom in/out and download PDF files.
4
4
 
5
5
  Built on [pdf.js](https://mozilla.github.io/pdf.js/). Works with Vanilla JS and React.
6
6
 
@@ -14,6 +14,8 @@ npm install pdf-search-highlight pdfjs-dist
14
14
 
15
15
  - Render PDF pages (canvas + text layer)
16
16
  - Search with flexible whitespace matching — handles inconsistent PDF text splitting
17
+ - Fuzzy (approximate) search — find text even with typos or OCR errors
18
+ - **Multi-context search** — search multiple queries simultaneously, each highlighted with a different color
17
19
  - Cross-span highlight using `<mark>` elements
18
20
  - Navigate between matches (next/prev, auto-scroll)
19
21
  - Zoom in/out with configurable scale
@@ -46,10 +48,24 @@ search.onChange = ({ current, total }) => {
46
48
  };
47
49
 
48
50
  search.search('hello world');
51
+ search.search('helo wrld', { fuzzy: true, fuzzyThreshold: 0.6 }); // approximate match
49
52
  search.next();
50
53
  search.prev();
51
54
  search.clear();
52
55
 
56
+ // Multi-context search — each query gets a different highlight color
57
+ search.searchMultiple([
58
+ { query: 'contract' },
59
+ { query: 'payment' },
60
+ { query: 'deadline' },
61
+ ]);
62
+
63
+ // With per-context options
64
+ search.searchMultiple([
65
+ { query: 'contract' },
66
+ { query: 'payement', options: { fuzzy: true, fuzzyThreshold: 0.7 } },
67
+ ]);
68
+
53
69
  // Zoom
54
70
  renderer.setScale(1.5);
55
71
  const newPages = await renderer.renderAllPages();
@@ -74,6 +90,14 @@ await viewer.loadPDF(file);
74
90
  viewer.search('query');
75
91
  viewer.nextMatch();
76
92
 
93
+ // Multi-context search
94
+ viewer.searchMultiple([
95
+ { query: 'contract' },
96
+ { query: 'payment' },
97
+ { query: 'deadline' },
98
+ ]);
99
+ viewer.nextMatch(); // navigates through ALL matches in document order
100
+
77
101
  // Zoom
78
102
  await viewer.zoomIn();
79
103
  await viewer.zoomOut();
@@ -85,6 +109,9 @@ await viewer.download('document.pdf');
85
109
  // Events
86
110
  viewer.on('load', ({ pageCount }) => console.log('Pages:', pageCount));
87
111
  viewer.on('search', ({ query, total }) => console.log('Found:', total));
112
+ viewer.on('searchmultiple', ({ contexts, total, totalsPerContext }) => {
113
+ console.log('Multi-search:', total, 'total matches');
114
+ });
88
115
  viewer.on('matchchange', ({ current, total }) => console.log(`${current + 1}/${total}`));
89
116
  viewer.on('zoom', ({ scale }) => console.log('Scale:', scale));
90
117
  viewer.on('error', ({ error, context }) => console.error(context, error));
@@ -99,16 +126,25 @@ import 'pdf-search-highlight/styles.css';
99
126
  function App() {
100
127
  const { containerRef, pages, loadPDF, zoomIn, zoomOut, download, scale } =
101
128
  usePDFRenderer(pdfjsLib);
102
- const { search, next, prev, current, total } = useSearchController(pages);
129
+ const { search, searchMultiple, next, prev, current, total } =
130
+ useSearchController(pages);
103
131
 
104
132
  return (
105
133
  <>
106
- {/* Search UI — anywhere you want */}
134
+ {/* Single search */}
107
135
  <input onChange={e => search(e.target.value)} />
108
136
  <span>{total > 0 ? `${current + 1}/${total}` : ''}</span>
109
137
  <button onClick={prev}>Prev</button>
110
138
  <button onClick={next}>Next</button>
111
139
 
140
+ {/* Multi-context search */}
141
+ <button onClick={() => searchMultiple([
142
+ { query: 'contract' },
143
+ { query: 'payment' },
144
+ ])}>
145
+ Search Multiple
146
+ </button>
147
+
112
148
  {/* Zoom & download */}
113
149
  <button onClick={zoomOut}>-</button>
114
150
  <button onClick={zoomIn}>+</button>
@@ -137,8 +173,11 @@ function App() {
137
173
  pdfjsLib={pdfjsLib}
138
174
  source={file}
139
175
  searchQuery={query}
176
+ // OR multi-context search:
177
+ // searchContexts={[{ query: 'contract' }, { query: 'payment' }]}
140
178
  onLoad={({ pageCount }) => console.log('Pages:', pageCount)}
141
179
  onSearch={({ query, total }) => console.log('Found:', total)}
180
+ onSearchMultiple={({ contexts, total }) => console.log('Multi:', total)}
142
181
  onMatchChange={({ current, total }) => console.log(`${current + 1}/${total}`)}
143
182
  onZoom={({ scale }) => console.log('Scale:', scale)}
144
183
  style={{ height: '80vh', overflow: 'auto' }}
@@ -147,6 +186,7 @@ function App() {
147
186
 
148
187
  // Imperative access via ref
149
188
  // ref.current.nextMatch()
189
+ // ref.current.searchMultiple([{ query: 'a' }, { query: 'b' }])
150
190
  // ref.current.zoomIn()
151
191
  // ref.current.download('doc.pdf')
152
192
  }
@@ -169,7 +209,7 @@ function App() {
169
209
  | Export | Description |
170
210
  |---|---|
171
211
  | `usePDFRenderer(pdfjsLib, options?)` | Hook: render PDF, returns `{ containerRef, pages, loadPDF, scale, setScale, zoomIn, zoomOut, download, ... }` |
172
- | `useSearchController(pages, options?)` | Hook: search + highlight, returns `{ search, next, prev, goTo, clear, current, total }` |
212
+ | `useSearchController(pages, options?)` | Hook: search + highlight, returns `{ search, searchMultiple, next, prev, goTo, clear, current, total }` |
173
213
  | `PDFSearchViewer` | All-in-one component with ref handle for imperative control |
174
214
 
175
215
  ### PDFRenderer
@@ -198,16 +238,28 @@ const search = new SearchController({
198
238
  });
199
239
 
200
240
  search.setPages(pages);
241
+
242
+ // Single search
201
243
  search.search('query', { caseSensitive: false, flexibleWhitespace: true });
244
+ search.search('query', { fuzzy: true, fuzzyThreshold: 0.6 });
245
+
246
+ // Multi-context search
247
+ search.searchMultiple([
248
+ { query: 'contract' },
249
+ { query: 'payment' },
250
+ { query: 'deadline', options: { fuzzy: true } },
251
+ ]);
252
+
202
253
  search.next();
203
254
  search.prev();
204
255
  search.goTo(5);
205
256
  search.clear();
206
257
  search.onChange = ({ current, total, query }) => {};
207
258
 
208
- search.current // current match index
209
- search.total // total matches
210
- search.query // last query
259
+ search.current // current match index
260
+ search.total // total matches
261
+ search.query // last single query
262
+ search.contexts // last multi-context queries
211
263
  ```
212
264
 
213
265
  ### PDFSearchViewer (Core)
@@ -217,6 +269,13 @@ const viewer = new PDFSearchViewer(container, pdfjsLib, options);
217
269
 
218
270
  await viewer.loadPDF(source);
219
271
  viewer.search('query', { caseSensitive: true });
272
+
273
+ // Multi-context search
274
+ viewer.searchMultiple([
275
+ { query: 'contract' },
276
+ { query: 'payment' },
277
+ ]);
278
+
220
279
  viewer.nextMatch();
221
280
  viewer.prevMatch();
222
281
  viewer.clearSearch();
@@ -230,6 +289,7 @@ await viewer.download('file.pdf'); // Download PDF
230
289
 
231
290
  viewer.on('load', (data) => {}); // { pageCount }
232
291
  viewer.on('search', (data) => {}); // { query, total }
292
+ viewer.on('searchmultiple', (data) => {}); // { contexts, total, totalsPerContext }
233
293
  viewer.on('matchchange', (data) => {}); // { current, total }
234
294
  viewer.on('zoom', (data) => {}); // { scale }
235
295
  viewer.on('error', (data) => {}); // { error, context }
@@ -249,10 +309,46 @@ interface PDFSearchViewerOptions {
249
309
 
250
310
  interface SearchOptions {
251
311
  caseSensitive?: boolean; // Default: false
252
- flexibleWhitespace?: boolean; // Default: true
312
+ flexibleWhitespace?: boolean; // Default: true (ignored when fuzzy is true)
313
+ fuzzy?: boolean; // Default: false — enable approximate matching
314
+ fuzzyThreshold?: number; // Default: 0.6 — similarity 0.0–1.0
315
+ }
316
+
317
+ interface SearchContext {
318
+ query: string; // The search query
319
+ options?: SearchOptions; // Optional per-context overrides
253
320
  }
254
321
  ```
255
322
 
323
+ ### Multi-Context Search
324
+
325
+ Search for multiple terms simultaneously, each highlighted with a different color:
326
+
327
+ ```js
328
+ // Each context gets an auto-assigned color (highlight-0 through highlight-7, cycles)
329
+ search.searchMultiple([
330
+ { query: 'contract' }, // Yellow
331
+ { query: 'payment' }, // Cyan
332
+ { query: 'deadline' }, // Green
333
+ { query: 'penalty' }, // Orange
334
+ ]);
335
+
336
+ // Per-context options override shared options
337
+ search.searchMultiple(
338
+ [
339
+ { query: 'contract' },
340
+ { query: 'payement', options: { fuzzy: true, fuzzyThreshold: 0.7 } },
341
+ ],
342
+ { caseSensitive: false } // shared options
343
+ );
344
+
345
+ // Navigate through ALL matches in document order
346
+ search.next(); // goes to next match regardless of which context
347
+ search.prev(); // goes to previous match
348
+ ```
349
+
350
+ 8 colors are provided by default (CSS classes `highlight-0` through `highlight-7`). Colors cycle for more than 8 contexts.
351
+
256
352
  ### Custom CSS
257
353
 
258
354
  Override any class name:
@@ -274,12 +370,23 @@ const renderer = new PDFRenderer(container, {
274
370
  Default styles:
275
371
 
276
372
  ```css
373
+ /* Single search */
277
374
  .highlight {
278
375
  background: rgba(255, 230, 0, 0.45) !important;
279
376
  }
280
377
  .highlight.active {
281
378
  background: rgba(233, 69, 96, 0.55) !important;
282
379
  }
380
+
381
+ /* Multi-context search (8 colors) */
382
+ .highlight-0 { /* Yellow */ }
383
+ .highlight-1 { /* Cyan */ }
384
+ .highlight-2 { /* Green */ }
385
+ .highlight-3 { /* Orange */ }
386
+ .highlight-4 { /* Purple */ }
387
+ .highlight-5 { /* Pink */ }
388
+ .highlight-6 { /* Blue */ }
389
+ .highlight-7 { /* Lime */ }
283
390
  ```
284
391
 
285
392
  ## How it works
@@ -287,9 +394,11 @@ Default styles:
287
394
  1. **Render**: PDF.js renders each page as `<canvas>` + transparent `<span>` text layer overlay
288
395
  2. **Search**: Concatenate all span texts into one string per page, build a `charMap` mapping each character back to its source span
289
396
  3. **Flexible whitespace**: Query `"and expensive"` becomes regex `a\s*n\s*d\s*e\s*x\s*p\s*e\s*n\s*s\s*i\s*v\s*e` — matches regardless of whitespace differences in PDF text
290
- 4. **Highlight**: Regex matches on concatenated text charMap maps back to spans split span DOM into text nodes + `<mark>` elements
291
- 5. **Navigate**: Prev/next with wrap-around, auto-scroll to active match
292
- 6. **Zoom**: Re-renders all pages at new scale, search highlights are automatically re-applied
397
+ 4. **Fuzzy search**: Semi-global Levenshtein alignment finds substrings within edit distance `queryLength × (1 - threshold)` handles typos, OCR errors, and garbled text extraction
398
+ 5. **Highlight**: Regex/fuzzy matches on concatenated text → charMap maps back to spans → split span DOM into text nodes + `<mark>` elements
399
+ 6. **Multi-context**: Each context runs independently, matches are sorted by document position, and each context's `<mark>` elements receive a distinct CSS class (`highlight-0`, `highlight-1`, ...)
400
+ 7. **Navigate**: Prev/next with wrap-around, auto-scroll to active match — in multi-context mode, navigation cycles through all matches across all contexts
401
+ 8. **Zoom**: Re-renders all pages at new scale, search highlights are automatically re-applied
293
402
 
294
403
  ## License
295
404
 
@@ -57,36 +57,19 @@ interface SearchOptions {
57
57
  * Flexible whitespace matching: insert \s* between every character.
58
58
  * Handles PDF text split inconsistencies. Defaults to true.
59
59
  * Only applies for queries < 200 chars (performance).
60
+ * Ignored when `fuzzy` is true.
60
61
  */
61
62
  flexibleWhitespace?: boolean;
63
+ /** Enable approximate (fuzzy) matching. Defaults to false. */
64
+ fuzzy?: boolean;
65
+ /**
66
+ * Similarity threshold for fuzzy matching: 0.0–1.0.
67
+ * similarity = 1 - (editDistance / queryLength).
68
+ * Higher values require closer matches. Defaults to 0.6.
69
+ */
70
+ fuzzyThreshold?: number;
62
71
  }
63
72
 
64
- type PDFSearchViewerEventMap = {
65
- /** Fired when PDF finishes loading. */
66
- load: {
67
- pageCount: number;
68
- };
69
- /** Fired when a search completes. */
70
- search: {
71
- query: string;
72
- total: number;
73
- };
74
- /** Fired when active match changes (via next/prev). */
75
- matchchange: {
76
- current: number;
77
- total: number;
78
- };
79
- /** Fired when zoom/scale changes. */
80
- zoom: {
81
- scale: number;
82
- };
83
- /** Fired on error. */
84
- error: {
85
- error: Error;
86
- context: string;
87
- };
88
- };
89
-
90
73
  /**
91
74
  * A single search match, potentially spanning multiple spans.
92
75
  * Each match contains an array of <mark> elements that highlight
@@ -116,6 +99,48 @@ interface PageData {
116
99
  /** All text spans on this page. */
117
100
  spans: SpanData[];
118
101
  }
102
+ /**
103
+ * A single search context for multi-context search.
104
+ * Each context represents a separate query highlighted with a distinct color.
105
+ */
106
+ interface SearchContext {
107
+ /** The query string to search for. */
108
+ query: string;
109
+ /** Optional per-context search options (overrides shared options). */
110
+ options?: SearchOptions;
111
+ }
112
+
113
+ type PDFSearchViewerEventMap = {
114
+ /** Fired when PDF finishes loading. */
115
+ load: {
116
+ pageCount: number;
117
+ };
118
+ /** Fired when a search completes. */
119
+ search: {
120
+ query: string;
121
+ total: number;
122
+ };
123
+ /** Fired when a multi-context search completes. */
124
+ searchmultiple: {
125
+ contexts: SearchContext[];
126
+ total: number;
127
+ totalsPerContext: number[];
128
+ };
129
+ /** Fired when active match changes (via next/prev). */
130
+ matchchange: {
131
+ current: number;
132
+ total: number;
133
+ };
134
+ /** Fired when zoom/scale changes. */
135
+ zoom: {
136
+ scale: number;
137
+ };
138
+ /** Fired on error. */
139
+ error: {
140
+ error: Error;
141
+ context: string;
142
+ };
143
+ };
119
144
 
120
145
  type PDFSource = File | ArrayBuffer | Uint8Array | string;
121
146
  /**
@@ -144,6 +169,8 @@ declare class PDFSearchViewer extends EventEmitter<PDFSearchViewerEventMap> {
144
169
  private pageData;
145
170
  private lastQuery;
146
171
  private lastSearchOptions;
172
+ private lastContexts;
173
+ private lastIsMultiContext;
147
174
  private destroyed;
148
175
  constructor(container: HTMLElement, pdfjsLib: any, options?: PDFSearchViewerOptions);
149
176
  /**
@@ -155,6 +182,13 @@ declare class PDFSearchViewer extends EventEmitter<PDFSearchViewerEventMap> {
155
182
  * Clears previous highlights and creates new ones.
156
183
  */
157
184
  search(query: string, options?: SearchOptions): number;
185
+ /**
186
+ * Search for multiple query contexts across all pages.
187
+ * Each context is highlighted with a different color (highlight-0, highlight-1, ...).
188
+ * Navigation (nextMatch/prevMatch) cycles through ALL matches in document order.
189
+ * Returns total number of matches across all contexts.
190
+ */
191
+ searchMultiple(contexts: SearchContext[], sharedOptions?: SearchOptions): number;
158
192
  /**
159
193
  * Navigate to next match (wraps around).
160
194
  */
@@ -197,4 +231,4 @@ declare class PDFSearchViewer extends EventEmitter<PDFSearchViewerEventMap> {
197
231
  destroy(): void;
198
232
  }
199
233
 
200
- export { type ClassNames as C, EventEmitter as E, type PageData as P, type SearchOptions as S, type PDFSearchViewerOptions as a, type SpanData as b, type SearchMatch as c, PDFSearchViewer as d, type PDFSearchViewerEventMap as e, type PDFSource as f };
234
+ export { type ClassNames as C, EventEmitter as E, type PageData as P, type SearchOptions as S, type SearchContext as a, type PDFSearchViewerOptions as b, type SpanData as c, type SearchMatch as d, PDFSearchViewer as e, type PDFSearchViewerEventMap as f, type PDFSource as g };
@@ -57,36 +57,19 @@ interface SearchOptions {
57
57
  * Flexible whitespace matching: insert \s* between every character.
58
58
  * Handles PDF text split inconsistencies. Defaults to true.
59
59
  * Only applies for queries < 200 chars (performance).
60
+ * Ignored when `fuzzy` is true.
60
61
  */
61
62
  flexibleWhitespace?: boolean;
63
+ /** Enable approximate (fuzzy) matching. Defaults to false. */
64
+ fuzzy?: boolean;
65
+ /**
66
+ * Similarity threshold for fuzzy matching: 0.0–1.0.
67
+ * similarity = 1 - (editDistance / queryLength).
68
+ * Higher values require closer matches. Defaults to 0.6.
69
+ */
70
+ fuzzyThreshold?: number;
62
71
  }
63
72
 
64
- type PDFSearchViewerEventMap = {
65
- /** Fired when PDF finishes loading. */
66
- load: {
67
- pageCount: number;
68
- };
69
- /** Fired when a search completes. */
70
- search: {
71
- query: string;
72
- total: number;
73
- };
74
- /** Fired when active match changes (via next/prev). */
75
- matchchange: {
76
- current: number;
77
- total: number;
78
- };
79
- /** Fired when zoom/scale changes. */
80
- zoom: {
81
- scale: number;
82
- };
83
- /** Fired on error. */
84
- error: {
85
- error: Error;
86
- context: string;
87
- };
88
- };
89
-
90
73
  /**
91
74
  * A single search match, potentially spanning multiple spans.
92
75
  * Each match contains an array of <mark> elements that highlight
@@ -116,6 +99,48 @@ interface PageData {
116
99
  /** All text spans on this page. */
117
100
  spans: SpanData[];
118
101
  }
102
+ /**
103
+ * A single search context for multi-context search.
104
+ * Each context represents a separate query highlighted with a distinct color.
105
+ */
106
+ interface SearchContext {
107
+ /** The query string to search for. */
108
+ query: string;
109
+ /** Optional per-context search options (overrides shared options). */
110
+ options?: SearchOptions;
111
+ }
112
+
113
+ type PDFSearchViewerEventMap = {
114
+ /** Fired when PDF finishes loading. */
115
+ load: {
116
+ pageCount: number;
117
+ };
118
+ /** Fired when a search completes. */
119
+ search: {
120
+ query: string;
121
+ total: number;
122
+ };
123
+ /** Fired when a multi-context search completes. */
124
+ searchmultiple: {
125
+ contexts: SearchContext[];
126
+ total: number;
127
+ totalsPerContext: number[];
128
+ };
129
+ /** Fired when active match changes (via next/prev). */
130
+ matchchange: {
131
+ current: number;
132
+ total: number;
133
+ };
134
+ /** Fired when zoom/scale changes. */
135
+ zoom: {
136
+ scale: number;
137
+ };
138
+ /** Fired on error. */
139
+ error: {
140
+ error: Error;
141
+ context: string;
142
+ };
143
+ };
119
144
 
120
145
  type PDFSource = File | ArrayBuffer | Uint8Array | string;
121
146
  /**
@@ -144,6 +169,8 @@ declare class PDFSearchViewer extends EventEmitter<PDFSearchViewerEventMap> {
144
169
  private pageData;
145
170
  private lastQuery;
146
171
  private lastSearchOptions;
172
+ private lastContexts;
173
+ private lastIsMultiContext;
147
174
  private destroyed;
148
175
  constructor(container: HTMLElement, pdfjsLib: any, options?: PDFSearchViewerOptions);
149
176
  /**
@@ -155,6 +182,13 @@ declare class PDFSearchViewer extends EventEmitter<PDFSearchViewerEventMap> {
155
182
  * Clears previous highlights and creates new ones.
156
183
  */
157
184
  search(query: string, options?: SearchOptions): number;
185
+ /**
186
+ * Search for multiple query contexts across all pages.
187
+ * Each context is highlighted with a different color (highlight-0, highlight-1, ...).
188
+ * Navigation (nextMatch/prevMatch) cycles through ALL matches in document order.
189
+ * Returns total number of matches across all contexts.
190
+ */
191
+ searchMultiple(contexts: SearchContext[], sharedOptions?: SearchOptions): number;
158
192
  /**
159
193
  * Navigate to next match (wraps around).
160
194
  */
@@ -197,4 +231,4 @@ declare class PDFSearchViewer extends EventEmitter<PDFSearchViewerEventMap> {
197
231
  destroy(): void;
198
232
  }
199
233
 
200
- export { type ClassNames as C, EventEmitter as E, type PageData as P, type SearchOptions as S, type PDFSearchViewerOptions as a, type SpanData as b, type SearchMatch as c, PDFSearchViewer as d, type PDFSearchViewerEventMap as e, type PDFSource as f };
234
+ export { type ClassNames as C, EventEmitter as E, type PageData as P, type SearchOptions as S, type SearchContext as a, type PDFSearchViewerOptions as b, type SpanData as c, type SearchMatch as d, PDFSearchViewer as e, type PDFSearchViewerEventMap as f, type PDFSource as g };