@prose-reader/enhancer-search 1.130.0 → 1.132.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/src/index.ts +36 -149
- package/src/report.ts +5 -0
- package/src/search.ts +61 -0
- package/src/types.ts +5 -0
- package/{vite.config.js → vite.config.ts} +2 -1
package/package.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@prose-reader/enhancer-search",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.132.0",
|
|
4
4
|
"type": "module",
|
|
5
|
-
"main": "./dist/
|
|
6
|
-
"module": "./dist/
|
|
5
|
+
"main": "./dist/index.umd.cjs",
|
|
6
|
+
"module": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
8
8
|
"exports": {
|
|
9
9
|
".": {
|
|
10
|
-
"import": "./dist/
|
|
11
|
-
"require": "./dist/
|
|
10
|
+
"import": "./dist/index.js",
|
|
11
|
+
"require": "./dist/index.umd.cjs"
|
|
12
12
|
}
|
|
13
13
|
},
|
|
14
14
|
"scripts": {
|
|
@@ -19,10 +19,10 @@
|
|
|
19
19
|
"test": "vitest run --coverage"
|
|
20
20
|
},
|
|
21
21
|
"dependencies": {
|
|
22
|
-
"@prose-reader/core": "^1.
|
|
22
|
+
"@prose-reader/core": "^1.132.0"
|
|
23
23
|
},
|
|
24
24
|
"peerDependencies": {
|
|
25
25
|
"rxjs": "*"
|
|
26
26
|
},
|
|
27
|
-
"gitHead": "
|
|
27
|
+
"gitHead": "c090e2540deebddc68038275846e7f01730593a8"
|
|
28
28
|
}
|
package/src/index.ts
CHANGED
|
@@ -1,34 +1,17 @@
|
|
|
1
1
|
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
2
|
-
import { Reader } from "@prose-reader/core"
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
`application/xhtml+xml`,
|
|
8
|
-
`application/xml`,
|
|
9
|
-
`image/svg+xml`,
|
|
10
|
-
`text/html`,
|
|
11
|
-
`text/xml`,
|
|
12
|
-
]
|
|
13
|
-
|
|
14
|
-
const isSupportedContentType = (contentType: string): contentType is DOMParserSupportedType => {
|
|
15
|
-
return supportedContentType.includes(contentType as DOMParserSupportedType)
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
type ResultItem = {
|
|
19
|
-
spineItemIndex: number
|
|
20
|
-
startCfi: string
|
|
21
|
-
endCfi: string
|
|
22
|
-
pageIndex?: number
|
|
23
|
-
contextText: string
|
|
24
|
-
startOffset: number
|
|
25
|
-
endOffset: number
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export type SearchResult = ResultItem[]
|
|
2
|
+
import { deferIdle, Reader } from "@prose-reader/core"
|
|
3
|
+
import { defer, forkJoin, Observable, of } from "rxjs"
|
|
4
|
+
import { catchError, finalize, map, switchMap } from "rxjs/operators"
|
|
5
|
+
import { searchInDocument, SearchResult } from "./search"
|
|
6
|
+
import { report } from "./report"
|
|
29
7
|
|
|
30
8
|
/**
|
|
9
|
+
* Contract of search enhancer.
|
|
31
10
|
*
|
|
11
|
+
* - At best a result match should be navigable. It means the search needs to
|
|
12
|
+
* be done on a rendered document. This is because rendering can differ from the original
|
|
13
|
+
* item resource. A resource can be something indigest and very specific (.pdf). The search
|
|
14
|
+
* enhancer is agnostic and can only search into documents.
|
|
32
15
|
*/
|
|
33
16
|
export const searchEnhancer =
|
|
34
17
|
<InheritOptions, InheritOutput extends Reader>(next: (options: InheritOptions) => InheritOutput) =>
|
|
@@ -36,64 +19,11 @@ export const searchEnhancer =
|
|
|
36
19
|
options: InheritOptions,
|
|
37
20
|
): InheritOutput & {
|
|
38
21
|
search: {
|
|
39
|
-
search: (text: string) =>
|
|
40
|
-
$: {
|
|
41
|
-
search$: Observable<{ type: `start` } | { type: `end`; data: SearchResult }>
|
|
42
|
-
}
|
|
22
|
+
search: (text: string) => Observable<SearchResult>
|
|
43
23
|
}
|
|
44
24
|
} => {
|
|
45
25
|
const reader = next(options)
|
|
46
26
|
|
|
47
|
-
const searchSubject$ = new Subject<string>()
|
|
48
|
-
|
|
49
|
-
const searchNodeContainingText = (node: Node, text: string) => {
|
|
50
|
-
const nodeList = node.childNodes
|
|
51
|
-
|
|
52
|
-
if (node.nodeName === `head`) return []
|
|
53
|
-
|
|
54
|
-
const rangeList: {
|
|
55
|
-
startNode: Node
|
|
56
|
-
start: number
|
|
57
|
-
endNode: Node
|
|
58
|
-
end: number
|
|
59
|
-
}[] = []
|
|
60
|
-
for (let i = 0; i < nodeList.length; i++) {
|
|
61
|
-
const subNode = nodeList[i]
|
|
62
|
-
|
|
63
|
-
if (!subNode) {
|
|
64
|
-
continue
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
if (subNode?.hasChildNodes()) {
|
|
68
|
-
rangeList.push(...searchNodeContainingText(subNode, text))
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
if (subNode.nodeType === 3) {
|
|
72
|
-
const content = (subNode as Text).data.toLowerCase()
|
|
73
|
-
if (content) {
|
|
74
|
-
let match
|
|
75
|
-
const regexp = RegExp(`(${text})`, `g`)
|
|
76
|
-
|
|
77
|
-
while ((match = regexp.exec(content)) !== null) {
|
|
78
|
-
if (match.index >= 0 && subNode.ownerDocument) {
|
|
79
|
-
const range = subNode.ownerDocument.createRange()
|
|
80
|
-
range.setStart(subNode, match.index)
|
|
81
|
-
range.setEnd(subNode, match.index + text.length)
|
|
82
|
-
rangeList.push({
|
|
83
|
-
startNode: subNode,
|
|
84
|
-
start: match.index,
|
|
85
|
-
endNode: subNode,
|
|
86
|
-
end: match.index + text.length,
|
|
87
|
-
})
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
return rangeList
|
|
95
|
-
}
|
|
96
|
-
|
|
97
27
|
const searchForItem = (index: number, text: string) => {
|
|
98
28
|
const item = reader.spineItemsManager.get(index)
|
|
99
29
|
|
|
@@ -101,92 +31,49 @@ export const searchEnhancer =
|
|
|
101
31
|
return of([])
|
|
102
32
|
}
|
|
103
33
|
|
|
104
|
-
return
|
|
105
|
-
switchMap((
|
|
106
|
-
|
|
107
|
-
return of([])
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
const contentType = response?.headers.get(`Content-Type`) ?? ``
|
|
34
|
+
return deferIdle(() => item.renderer.renderHeadless()).pipe(
|
|
35
|
+
switchMap((result) => {
|
|
36
|
+
const { doc, release } = result || {}
|
|
111
37
|
|
|
112
|
-
|
|
113
|
-
// the reader returns us a valid HTML document anyway so it is not ultimately necessary.
|
|
114
|
-
// however we can still avoid doing unnecessary HTML generation for images resources, etc.
|
|
115
|
-
if (!isSupportedContentType(contentType)) return of([])
|
|
38
|
+
if (!doc) return of([])
|
|
116
39
|
|
|
117
|
-
return
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
const newResults = ranges.map((range) => {
|
|
125
|
-
const { end, start } = reader.cfi.generateCfiFromRange(range, item.item)
|
|
126
|
-
const { node, offset, spineItem } = reader.cfi.resolveCfi({ cfi: start }) || {}
|
|
127
|
-
const pageIndex =
|
|
128
|
-
node && spineItem !== undefined
|
|
129
|
-
? reader.spine.locator.getSpineItemPageIndexFromNode(node, offset, spineItem.item.index)
|
|
130
|
-
: undefined
|
|
131
|
-
|
|
132
|
-
return {
|
|
133
|
-
spineItemIndex: index,
|
|
134
|
-
startCfi: start,
|
|
135
|
-
endCfi: end,
|
|
136
|
-
pageIndex,
|
|
137
|
-
contextText: range.startNode.parentElement?.textContent || ``,
|
|
138
|
-
startOffset: range.start,
|
|
139
|
-
endOffset: range.end,
|
|
140
|
-
}
|
|
141
|
-
})
|
|
40
|
+
return deferIdle(() => searchInDocument(reader, item, doc, text)).pipe(
|
|
41
|
+
finalize(() => {
|
|
42
|
+
release?.()
|
|
43
|
+
}),
|
|
44
|
+
catchError((e) => {
|
|
45
|
+
report.error(e)
|
|
142
46
|
|
|
143
|
-
return
|
|
47
|
+
return of([])
|
|
144
48
|
}),
|
|
145
49
|
)
|
|
146
50
|
}),
|
|
147
51
|
)
|
|
148
52
|
}
|
|
149
53
|
|
|
150
|
-
const search = (text: string) =>
|
|
151
|
-
|
|
152
|
-
|
|
54
|
+
const search = (text: string) =>
|
|
55
|
+
defer(() => {
|
|
56
|
+
if (text === ``) {
|
|
57
|
+
return of([])
|
|
58
|
+
}
|
|
153
59
|
|
|
154
|
-
|
|
155
|
-
* Main search process stream
|
|
156
|
-
*/
|
|
157
|
-
const search$ = merge(
|
|
158
|
-
searchSubject$.asObservable().pipe(map(() => ({ type: `start` as const }))),
|
|
159
|
-
searchSubject$.asObservable().pipe(
|
|
160
|
-
switchMap((text) => {
|
|
161
|
-
if (text === ``) {
|
|
162
|
-
return of([])
|
|
163
|
-
}
|
|
60
|
+
const searches$ = reader.context.manifest?.spineItems.map((_, index) => searchForItem(index, text)) || []
|
|
164
61
|
|
|
165
|
-
|
|
62
|
+
return forkJoin([...searches$, of([])])
|
|
63
|
+
}).pipe(
|
|
64
|
+
map((results) => {
|
|
65
|
+
const flattenedResults = results.flat()
|
|
166
66
|
|
|
167
|
-
|
|
168
|
-
map((results) => {
|
|
169
|
-
return results.reduce((acc, value) => [...acc, ...value], [])
|
|
170
|
-
}),
|
|
171
|
-
)
|
|
172
|
-
}),
|
|
173
|
-
map((data) => ({ type: `end` as const, data })),
|
|
174
|
-
),
|
|
175
|
-
).pipe(share(), takeUntil(reader.$.destroy$))
|
|
67
|
+
report.debug("results", flattenedResults)
|
|
176
68
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
}
|
|
69
|
+
return flattenedResults
|
|
70
|
+
}),
|
|
71
|
+
)
|
|
181
72
|
|
|
182
73
|
return {
|
|
183
74
|
...reader,
|
|
184
|
-
destroy,
|
|
185
75
|
search: {
|
|
186
76
|
search,
|
|
187
|
-
$: {
|
|
188
|
-
search$,
|
|
189
|
-
},
|
|
190
77
|
},
|
|
191
78
|
}
|
|
192
79
|
}
|
package/src/report.ts
ADDED
package/src/search.ts
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { Reader, SpineItem } from "@prose-reader/core"
|
|
2
|
+
import { Observable, of } from "rxjs"
|
|
3
|
+
import { ResultItem } from "./types"
|
|
4
|
+
|
|
5
|
+
export type SearchResult = ResultItem[]
|
|
6
|
+
|
|
7
|
+
const searchNodeContainingText = (node: Node, text: string) => {
|
|
8
|
+
const nodeList = node.childNodes
|
|
9
|
+
|
|
10
|
+
if (node.nodeName === `head`) return []
|
|
11
|
+
|
|
12
|
+
const rangeList: Range[] = []
|
|
13
|
+
|
|
14
|
+
for (let i = 0; i < nodeList.length; i++) {
|
|
15
|
+
const subNode = nodeList[i]
|
|
16
|
+
|
|
17
|
+
if (!subNode) {
|
|
18
|
+
continue
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (subNode?.hasChildNodes()) {
|
|
22
|
+
rangeList.push(...searchNodeContainingText(subNode, text))
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if (subNode.nodeType === 3) {
|
|
26
|
+
const content = (subNode as Text).data.toLowerCase()
|
|
27
|
+
if (content) {
|
|
28
|
+
let match
|
|
29
|
+
const regexp = RegExp(`(${text})`, `g`)
|
|
30
|
+
|
|
31
|
+
while ((match = regexp.exec(content)) !== null) {
|
|
32
|
+
if (match.index >= 0 && subNode.ownerDocument) {
|
|
33
|
+
const range = subNode.ownerDocument.createRange()
|
|
34
|
+
range.setStart(subNode, match.index)
|
|
35
|
+
range.setEnd(subNode, match.index + text.length)
|
|
36
|
+
|
|
37
|
+
rangeList.push(range)
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return rangeList
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export const searchInDocument = (reader: Reader, item: SpineItem, doc: Document, text: string): Observable<SearchResult> => {
|
|
48
|
+
const ranges = searchNodeContainingText(doc, text)
|
|
49
|
+
|
|
50
|
+
const newResults = ranges.map((range) => {
|
|
51
|
+
const { end, start } = reader.cfi.generateCfiFromRange(range, item.item)
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
cfi: start,
|
|
55
|
+
startCfi: start,
|
|
56
|
+
endCfi: end,
|
|
57
|
+
} satisfies ResultItem
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
return of(newResults)
|
|
61
|
+
}
|
package/src/types.ts
ADDED
|
@@ -9,10 +9,11 @@ const libName = name.replace(`@`, ``).replace(`/`, `-`)
|
|
|
9
9
|
export default defineConfig(({ mode }) => ({
|
|
10
10
|
build: {
|
|
11
11
|
minify: false,
|
|
12
|
+
target: "esnext",
|
|
12
13
|
lib: {
|
|
13
14
|
entry: resolve(__dirname, `src/index.ts`),
|
|
14
15
|
name: libName,
|
|
15
|
-
fileName:
|
|
16
|
+
fileName: `index`,
|
|
16
17
|
},
|
|
17
18
|
emptyOutDir: mode !== `development`,
|
|
18
19
|
sourcemap: true,
|