@kreuzberg/kreuzcrawl 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +5 -1
- package/index.d.ts +17 -33
- package/index.js +52 -53
- package/npm/darwin-arm64/kreuzcrawl-node.darwin-arm64.node +0 -0
- package/npm/darwin-arm64/package.json +1 -1
- package/npm/kreuzberg-kreuzcrawl-darwin-arm64-0.2.0.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-linux-arm64-gnu-0.2.0.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-linux-x64-gnu-0.2.0.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-win32-x64-msvc-0.2.0.tgz +0 -0
- package/npm/linux-arm64-gnu/kreuzcrawl-node.linux-arm64-gnu.node +0 -0
- package/npm/linux-arm64-gnu/package.json +1 -1
- package/npm/linux-x64-gnu/kreuzcrawl-node.linux-x64-gnu.node +0 -0
- package/npm/linux-x64-gnu/package.json +1 -1
- package/npm/win32-x64-msvc/kreuzcrawl-node.win32-x64-msvc.node +0 -0
- package/npm/win32-x64-msvc/package.json +1 -1
- package/package.json +5 -5
- package/src/lib.rs +82 -113
- package/npm/kreuzberg-kreuzcrawl-darwin-arm64-0.1.1.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-linux-arm64-gnu-0.1.1.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-linux-x64-gnu-0.1.1.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-win32-x64-msvc-0.1.1.tgz +0 -0
package/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kreuzcrawl-node"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
license = "Elastic-2.0"
|
|
6
6
|
|
|
@@ -16,3 +16,7 @@ serde_json = "1"
|
|
|
16
16
|
|
|
17
17
|
[build-dependencies]
|
|
18
18
|
napi-build = "2"
|
|
19
|
+
|
|
20
|
+
[lints.clippy]
|
|
21
|
+
duplicated_attributes = "allow"
|
|
22
|
+
unwrap_used = "allow"
|
package/index.d.ts
CHANGED
|
@@ -12,14 +12,6 @@ export declare function crawl(engine: JsCrawlEngineHandle, url: string): Promise
|
|
|
12
12
|
|
|
13
13
|
export declare function createEngine(config?: JsCrawlConfig | undefined | null): JsCrawlEngineHandle
|
|
14
14
|
|
|
15
|
-
export interface JsActionResult {
|
|
16
|
-
actionIndex?: number
|
|
17
|
-
actionType?: string
|
|
18
|
-
success?: boolean
|
|
19
|
-
data?: string
|
|
20
|
-
error?: string
|
|
21
|
-
}
|
|
22
|
-
|
|
23
15
|
export interface JsArticleMetadata {
|
|
24
16
|
publishedTime?: string
|
|
25
17
|
modifiedTime?: string
|
|
@@ -83,16 +75,6 @@ export declare const enum JsBrowserWait {
|
|
|
83
75
|
Fixed = 'fixed'
|
|
84
76
|
}
|
|
85
77
|
|
|
86
|
-
export interface JsCachedPage {
|
|
87
|
-
url?: string
|
|
88
|
-
statusCode?: number
|
|
89
|
-
contentType?: string
|
|
90
|
-
body?: string
|
|
91
|
-
etag?: string
|
|
92
|
-
lastModified?: string
|
|
93
|
-
cachedAt?: number
|
|
94
|
-
}
|
|
95
|
-
|
|
96
78
|
export interface JsCitationReference {
|
|
97
79
|
index?: number
|
|
98
80
|
url?: string
|
|
@@ -104,6 +86,21 @@ export interface JsCitationResult {
|
|
|
104
86
|
references?: Array<JsCitationReference>
|
|
105
87
|
}
|
|
106
88
|
|
|
89
|
+
export interface JsContentConfig {
|
|
90
|
+
outputFormat?: string
|
|
91
|
+
preprocessingPreset?: string
|
|
92
|
+
removeNavigation?: boolean
|
|
93
|
+
removeForms?: boolean
|
|
94
|
+
stripTags?: Array<string>
|
|
95
|
+
preserveTags?: Array<string>
|
|
96
|
+
excludeSelectors?: Array<string>
|
|
97
|
+
skipImages?: boolean
|
|
98
|
+
maxDepth?: number
|
|
99
|
+
wrap?: boolean
|
|
100
|
+
wrapWidth?: number
|
|
101
|
+
includeDocumentStructure?: boolean
|
|
102
|
+
}
|
|
103
|
+
|
|
107
104
|
export interface JsCookieInfo {
|
|
108
105
|
name?: string
|
|
109
106
|
value?: string
|
|
@@ -123,14 +120,15 @@ export interface JsCrawlConfig {
|
|
|
123
120
|
excludePaths?: Array<string>
|
|
124
121
|
customHeaders?: Record<string, string>
|
|
125
122
|
requestTimeout?: number
|
|
123
|
+
rateLimitMs?: number
|
|
126
124
|
maxRedirects?: number
|
|
127
125
|
retryCount?: number
|
|
128
126
|
retryCodes?: Array<number>
|
|
129
127
|
cookiesEnabled?: boolean
|
|
130
128
|
auth?: JsAuthConfig
|
|
131
129
|
maxBodySize?: number
|
|
132
|
-
mainContentOnly?: boolean
|
|
133
130
|
removeTags?: Array<string>
|
|
131
|
+
content?: JsContentConfig
|
|
134
132
|
mapLimit?: number
|
|
135
133
|
mapSearch?: string
|
|
136
134
|
downloadAssets?: boolean
|
|
@@ -148,12 +146,6 @@ export interface JsCrawlConfig {
|
|
|
148
146
|
saveBrowserProfile?: boolean
|
|
149
147
|
}
|
|
150
148
|
|
|
151
|
-
export declare const enum JsCrawlEvent {
|
|
152
|
-
Page = 'Page',
|
|
153
|
-
Error = 'Error',
|
|
154
|
-
Complete = 'Complete'
|
|
155
|
-
}
|
|
156
|
-
|
|
157
149
|
export interface JsCrawlPageResult {
|
|
158
150
|
url?: string
|
|
159
151
|
normalizedUrl?: string
|
|
@@ -258,13 +250,6 @@ export declare const enum JsImageSource {
|
|
|
258
250
|
TwitterImage = 'twitter_image'
|
|
259
251
|
}
|
|
260
252
|
|
|
261
|
-
export interface JsInteractionResult {
|
|
262
|
-
actionResults?: Array<JsActionResult>
|
|
263
|
-
finalHtml?: string
|
|
264
|
-
finalUrl?: string
|
|
265
|
-
screenshot?: Array<number>
|
|
266
|
-
}
|
|
267
|
-
|
|
268
253
|
export interface JsJsonLdEntry {
|
|
269
254
|
schemaType?: string
|
|
270
255
|
name?: string
|
|
@@ -379,7 +364,6 @@ export interface JsScrapeResult {
|
|
|
379
364
|
isPdf?: boolean
|
|
380
365
|
wasSkipped?: boolean
|
|
381
366
|
detectedCharset?: string
|
|
382
|
-
mainContentOnly?: boolean
|
|
383
367
|
authHeaderSent?: boolean
|
|
384
368
|
responseMeta?: JsResponseMeta
|
|
385
369
|
assets?: Array<JsDownloadedAsset>
|
package/index.js
CHANGED
|
@@ -77,8 +77,8 @@ function requireNative() {
|
|
|
77
77
|
try {
|
|
78
78
|
const binding = require('@kreuzberg/kreuzcrawl-android-arm64')
|
|
79
79
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-android-arm64/package.json').version
|
|
80
|
-
if (bindingPackageVersion !== '0.
|
|
81
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
80
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
81
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
82
82
|
}
|
|
83
83
|
return binding
|
|
84
84
|
} catch (e) {
|
|
@@ -93,8 +93,8 @@ function requireNative() {
|
|
|
93
93
|
try {
|
|
94
94
|
const binding = require('@kreuzberg/kreuzcrawl-android-arm-eabi')
|
|
95
95
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-android-arm-eabi/package.json').version
|
|
96
|
-
if (bindingPackageVersion !== '0.
|
|
97
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
96
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
97
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
98
98
|
}
|
|
99
99
|
return binding
|
|
100
100
|
} catch (e) {
|
|
@@ -114,8 +114,8 @@ function requireNative() {
|
|
|
114
114
|
try {
|
|
115
115
|
const binding = require('@kreuzberg/kreuzcrawl-win32-x64-gnu')
|
|
116
116
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-win32-x64-gnu/package.json').version
|
|
117
|
-
if (bindingPackageVersion !== '0.
|
|
118
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
117
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
118
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
119
119
|
}
|
|
120
120
|
return binding
|
|
121
121
|
} catch (e) {
|
|
@@ -130,8 +130,8 @@ function requireNative() {
|
|
|
130
130
|
try {
|
|
131
131
|
const binding = require('@kreuzberg/kreuzcrawl-win32-x64-msvc')
|
|
132
132
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-win32-x64-msvc/package.json').version
|
|
133
|
-
if (bindingPackageVersion !== '0.
|
|
134
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
133
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
134
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
135
135
|
}
|
|
136
136
|
return binding
|
|
137
137
|
} catch (e) {
|
|
@@ -147,8 +147,8 @@ function requireNative() {
|
|
|
147
147
|
try {
|
|
148
148
|
const binding = require('@kreuzberg/kreuzcrawl-win32-ia32-msvc')
|
|
149
149
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-win32-ia32-msvc/package.json').version
|
|
150
|
-
if (bindingPackageVersion !== '0.
|
|
151
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
150
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
151
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
152
152
|
}
|
|
153
153
|
return binding
|
|
154
154
|
} catch (e) {
|
|
@@ -163,8 +163,8 @@ function requireNative() {
|
|
|
163
163
|
try {
|
|
164
164
|
const binding = require('@kreuzberg/kreuzcrawl-win32-arm64-msvc')
|
|
165
165
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-win32-arm64-msvc/package.json').version
|
|
166
|
-
if (bindingPackageVersion !== '0.
|
|
167
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
166
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
167
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
168
168
|
}
|
|
169
169
|
return binding
|
|
170
170
|
} catch (e) {
|
|
@@ -182,8 +182,8 @@ function requireNative() {
|
|
|
182
182
|
try {
|
|
183
183
|
const binding = require('@kreuzberg/kreuzcrawl-darwin-universal')
|
|
184
184
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-darwin-universal/package.json').version
|
|
185
|
-
if (bindingPackageVersion !== '0.
|
|
186
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
185
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
186
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
187
187
|
}
|
|
188
188
|
return binding
|
|
189
189
|
} catch (e) {
|
|
@@ -198,8 +198,8 @@ function requireNative() {
|
|
|
198
198
|
try {
|
|
199
199
|
const binding = require('@kreuzberg/kreuzcrawl-darwin-x64')
|
|
200
200
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-darwin-x64/package.json').version
|
|
201
|
-
if (bindingPackageVersion !== '0.
|
|
202
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
201
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
202
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
203
203
|
}
|
|
204
204
|
return binding
|
|
205
205
|
} catch (e) {
|
|
@@ -214,8 +214,8 @@ function requireNative() {
|
|
|
214
214
|
try {
|
|
215
215
|
const binding = require('@kreuzberg/kreuzcrawl-darwin-arm64')
|
|
216
216
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-darwin-arm64/package.json').version
|
|
217
|
-
if (bindingPackageVersion !== '0.
|
|
218
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
217
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
218
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
219
219
|
}
|
|
220
220
|
return binding
|
|
221
221
|
} catch (e) {
|
|
@@ -234,8 +234,8 @@ function requireNative() {
|
|
|
234
234
|
try {
|
|
235
235
|
const binding = require('@kreuzberg/kreuzcrawl-freebsd-x64')
|
|
236
236
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-freebsd-x64/package.json').version
|
|
237
|
-
if (bindingPackageVersion !== '0.
|
|
238
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
237
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
238
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
239
239
|
}
|
|
240
240
|
return binding
|
|
241
241
|
} catch (e) {
|
|
@@ -250,8 +250,8 @@ function requireNative() {
|
|
|
250
250
|
try {
|
|
251
251
|
const binding = require('@kreuzberg/kreuzcrawl-freebsd-arm64')
|
|
252
252
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-freebsd-arm64/package.json').version
|
|
253
|
-
if (bindingPackageVersion !== '0.
|
|
254
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
253
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
254
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
255
255
|
}
|
|
256
256
|
return binding
|
|
257
257
|
} catch (e) {
|
|
@@ -271,8 +271,8 @@ function requireNative() {
|
|
|
271
271
|
try {
|
|
272
272
|
const binding = require('@kreuzberg/kreuzcrawl-linux-x64-musl')
|
|
273
273
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-x64-musl/package.json').version
|
|
274
|
-
if (bindingPackageVersion !== '0.
|
|
275
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
274
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
275
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
276
276
|
}
|
|
277
277
|
return binding
|
|
278
278
|
} catch (e) {
|
|
@@ -287,8 +287,8 @@ function requireNative() {
|
|
|
287
287
|
try {
|
|
288
288
|
const binding = require('@kreuzberg/kreuzcrawl-linux-x64-gnu')
|
|
289
289
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-x64-gnu/package.json').version
|
|
290
|
-
if (bindingPackageVersion !== '0.
|
|
291
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
290
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
291
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
292
292
|
}
|
|
293
293
|
return binding
|
|
294
294
|
} catch (e) {
|
|
@@ -305,8 +305,8 @@ function requireNative() {
|
|
|
305
305
|
try {
|
|
306
306
|
const binding = require('@kreuzberg/kreuzcrawl-linux-arm64-musl')
|
|
307
307
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-arm64-musl/package.json').version
|
|
308
|
-
if (bindingPackageVersion !== '0.
|
|
309
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
308
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
309
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
310
310
|
}
|
|
311
311
|
return binding
|
|
312
312
|
} catch (e) {
|
|
@@ -321,8 +321,8 @@ function requireNative() {
|
|
|
321
321
|
try {
|
|
322
322
|
const binding = require('@kreuzberg/kreuzcrawl-linux-arm64-gnu')
|
|
323
323
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-arm64-gnu/package.json').version
|
|
324
|
-
if (bindingPackageVersion !== '0.
|
|
325
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
324
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
325
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
326
326
|
}
|
|
327
327
|
return binding
|
|
328
328
|
} catch (e) {
|
|
@@ -339,8 +339,8 @@ function requireNative() {
|
|
|
339
339
|
try {
|
|
340
340
|
const binding = require('@kreuzberg/kreuzcrawl-linux-arm-musleabihf')
|
|
341
341
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-arm-musleabihf/package.json').version
|
|
342
|
-
if (bindingPackageVersion !== '0.
|
|
343
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
342
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
343
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
344
344
|
}
|
|
345
345
|
return binding
|
|
346
346
|
} catch (e) {
|
|
@@ -355,8 +355,8 @@ function requireNative() {
|
|
|
355
355
|
try {
|
|
356
356
|
const binding = require('@kreuzberg/kreuzcrawl-linux-arm-gnueabihf')
|
|
357
357
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-arm-gnueabihf/package.json').version
|
|
358
|
-
if (bindingPackageVersion !== '0.
|
|
359
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
358
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
359
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
360
360
|
}
|
|
361
361
|
return binding
|
|
362
362
|
} catch (e) {
|
|
@@ -373,8 +373,8 @@ function requireNative() {
|
|
|
373
373
|
try {
|
|
374
374
|
const binding = require('@kreuzberg/kreuzcrawl-linux-loong64-musl')
|
|
375
375
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-loong64-musl/package.json').version
|
|
376
|
-
if (bindingPackageVersion !== '0.
|
|
377
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
376
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
377
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
378
378
|
}
|
|
379
379
|
return binding
|
|
380
380
|
} catch (e) {
|
|
@@ -389,8 +389,8 @@ function requireNative() {
|
|
|
389
389
|
try {
|
|
390
390
|
const binding = require('@kreuzberg/kreuzcrawl-linux-loong64-gnu')
|
|
391
391
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-loong64-gnu/package.json').version
|
|
392
|
-
if (bindingPackageVersion !== '0.
|
|
393
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
392
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
393
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
394
394
|
}
|
|
395
395
|
return binding
|
|
396
396
|
} catch (e) {
|
|
@@ -407,8 +407,8 @@ function requireNative() {
|
|
|
407
407
|
try {
|
|
408
408
|
const binding = require('@kreuzberg/kreuzcrawl-linux-riscv64-musl')
|
|
409
409
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-riscv64-musl/package.json').version
|
|
410
|
-
if (bindingPackageVersion !== '0.
|
|
411
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
410
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
411
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
412
412
|
}
|
|
413
413
|
return binding
|
|
414
414
|
} catch (e) {
|
|
@@ -423,8 +423,8 @@ function requireNative() {
|
|
|
423
423
|
try {
|
|
424
424
|
const binding = require('@kreuzberg/kreuzcrawl-linux-riscv64-gnu')
|
|
425
425
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-riscv64-gnu/package.json').version
|
|
426
|
-
if (bindingPackageVersion !== '0.
|
|
427
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
426
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
427
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
428
428
|
}
|
|
429
429
|
return binding
|
|
430
430
|
} catch (e) {
|
|
@@ -440,8 +440,8 @@ function requireNative() {
|
|
|
440
440
|
try {
|
|
441
441
|
const binding = require('@kreuzberg/kreuzcrawl-linux-ppc64-gnu')
|
|
442
442
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-ppc64-gnu/package.json').version
|
|
443
|
-
if (bindingPackageVersion !== '0.
|
|
444
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
443
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
444
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
445
445
|
}
|
|
446
446
|
return binding
|
|
447
447
|
} catch (e) {
|
|
@@ -456,8 +456,8 @@ function requireNative() {
|
|
|
456
456
|
try {
|
|
457
457
|
const binding = require('@kreuzberg/kreuzcrawl-linux-s390x-gnu')
|
|
458
458
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-s390x-gnu/package.json').version
|
|
459
|
-
if (bindingPackageVersion !== '0.
|
|
460
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
459
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
460
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
461
461
|
}
|
|
462
462
|
return binding
|
|
463
463
|
} catch (e) {
|
|
@@ -476,8 +476,8 @@ function requireNative() {
|
|
|
476
476
|
try {
|
|
477
477
|
const binding = require('@kreuzberg/kreuzcrawl-openharmony-arm64')
|
|
478
478
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-openharmony-arm64/package.json').version
|
|
479
|
-
if (bindingPackageVersion !== '0.
|
|
480
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
479
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
480
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
481
481
|
}
|
|
482
482
|
return binding
|
|
483
483
|
} catch (e) {
|
|
@@ -492,8 +492,8 @@ function requireNative() {
|
|
|
492
492
|
try {
|
|
493
493
|
const binding = require('@kreuzberg/kreuzcrawl-openharmony-x64')
|
|
494
494
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-openharmony-x64/package.json').version
|
|
495
|
-
if (bindingPackageVersion !== '0.
|
|
496
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
495
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
496
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
497
497
|
}
|
|
498
498
|
return binding
|
|
499
499
|
} catch (e) {
|
|
@@ -508,8 +508,8 @@ function requireNative() {
|
|
|
508
508
|
try {
|
|
509
509
|
const binding = require('@kreuzberg/kreuzcrawl-openharmony-arm')
|
|
510
510
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-openharmony-arm/package.json').version
|
|
511
|
-
if (bindingPackageVersion !== '0.
|
|
512
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
511
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
512
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
513
513
|
}
|
|
514
514
|
return binding
|
|
515
515
|
} catch (e) {
|
|
@@ -584,7 +584,6 @@ module.exports.createEngine = nativeBinding.createEngine
|
|
|
584
584
|
module.exports.JsAssetCategory = nativeBinding.JsAssetCategory
|
|
585
585
|
module.exports.JsBrowserMode = nativeBinding.JsBrowserMode
|
|
586
586
|
module.exports.JsBrowserWait = nativeBinding.JsBrowserWait
|
|
587
|
-
module.exports.JsCrawlEvent = nativeBinding.JsCrawlEvent
|
|
588
587
|
module.exports.JsFeedType = nativeBinding.JsFeedType
|
|
589
588
|
module.exports.JsImageSource = nativeBinding.JsImageSource
|
|
590
589
|
module.exports.JsLinkType = nativeBinding.JsLinkType
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kreuzberg/kreuzcrawl",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"main": "index.js",
|
|
5
5
|
"types": "index.d.ts",
|
|
6
6
|
"repository": {
|
|
@@ -35,9 +35,9 @@
|
|
|
35
35
|
"@napi-rs/cli": "^3.6.2"
|
|
36
36
|
},
|
|
37
37
|
"optionalDependencies": {
|
|
38
|
-
"@kreuzberg/kreuzcrawl-darwin-arm64": "0.
|
|
39
|
-
"@kreuzberg/kreuzcrawl-linux-arm64-gnu": "0.
|
|
40
|
-
"@kreuzberg/kreuzcrawl-linux-x64-gnu": "0.
|
|
41
|
-
"@kreuzberg/kreuzcrawl-win32-x64-msvc": "0.
|
|
38
|
+
"@kreuzberg/kreuzcrawl-darwin-arm64": "0.2.0",
|
|
39
|
+
"@kreuzberg/kreuzcrawl-linux-arm64-gnu": "0.2.0",
|
|
40
|
+
"@kreuzberg/kreuzcrawl-linux-x64-gnu": "0.2.0",
|
|
41
|
+
"@kreuzberg/kreuzcrawl-win32-x64-msvc": "0.2.0"
|
|
42
42
|
}
|
|
43
43
|
}
|
package/src/lib.rs
CHANGED
|
@@ -6,7 +6,13 @@
|
|
|
6
6
|
clippy::let_unit_value,
|
|
7
7
|
clippy::needless_borrow,
|
|
8
8
|
clippy::map_identity,
|
|
9
|
-
clippy::just_underscores_and_digits
|
|
9
|
+
clippy::just_underscores_and_digits,
|
|
10
|
+
clippy::unnecessary_cast,
|
|
11
|
+
clippy::unused_unit,
|
|
12
|
+
clippy::unwrap_or_default,
|
|
13
|
+
clippy::derivable_impls,
|
|
14
|
+
clippy::needless_borrows_for_generic_args,
|
|
15
|
+
clippy::unnecessary_fallible_conversions
|
|
10
16
|
)]
|
|
11
17
|
|
|
12
18
|
use napi::*;
|
|
@@ -42,6 +48,34 @@ pub struct JsProxyConfig {
|
|
|
42
48
|
pub password: Option<String>,
|
|
43
49
|
}
|
|
44
50
|
|
|
51
|
+
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
52
|
+
#[napi(object)]
|
|
53
|
+
pub struct JsContentConfig {
|
|
54
|
+
#[napi(js_name = "outputFormat")]
|
|
55
|
+
pub output_format: Option<String>,
|
|
56
|
+
#[napi(js_name = "preprocessingPreset")]
|
|
57
|
+
pub preprocessing_preset: Option<String>,
|
|
58
|
+
#[napi(js_name = "removeNavigation")]
|
|
59
|
+
pub remove_navigation: Option<bool>,
|
|
60
|
+
#[napi(js_name = "removeForms")]
|
|
61
|
+
pub remove_forms: Option<bool>,
|
|
62
|
+
#[napi(js_name = "stripTags")]
|
|
63
|
+
pub strip_tags: Option<Vec<String>>,
|
|
64
|
+
#[napi(js_name = "preserveTags")]
|
|
65
|
+
pub preserve_tags: Option<Vec<String>>,
|
|
66
|
+
#[napi(js_name = "excludeSelectors")]
|
|
67
|
+
pub exclude_selectors: Option<Vec<String>>,
|
|
68
|
+
#[napi(js_name = "skipImages")]
|
|
69
|
+
pub skip_images: Option<bool>,
|
|
70
|
+
#[napi(js_name = "maxDepth")]
|
|
71
|
+
pub max_depth: Option<i64>,
|
|
72
|
+
pub wrap: Option<bool>,
|
|
73
|
+
#[napi(js_name = "wrapWidth")]
|
|
74
|
+
pub wrap_width: Option<i64>,
|
|
75
|
+
#[napi(js_name = "includeDocumentStructure")]
|
|
76
|
+
pub include_document_structure: Option<bool>,
|
|
77
|
+
}
|
|
78
|
+
|
|
45
79
|
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
46
80
|
#[napi(object)]
|
|
47
81
|
pub struct JsBrowserConfig {
|
|
@@ -80,6 +114,8 @@ pub struct JsCrawlConfig {
|
|
|
80
114
|
pub custom_headers: Option<HashMap<String, String>>,
|
|
81
115
|
#[napi(js_name = "requestTimeout")]
|
|
82
116
|
pub request_timeout: Option<i64>,
|
|
117
|
+
#[napi(js_name = "rateLimitMs")]
|
|
118
|
+
pub rate_limit_ms: Option<i64>,
|
|
83
119
|
#[napi(js_name = "maxRedirects")]
|
|
84
120
|
pub max_redirects: Option<i64>,
|
|
85
121
|
#[napi(js_name = "retryCount")]
|
|
@@ -91,10 +127,9 @@ pub struct JsCrawlConfig {
|
|
|
91
127
|
pub auth: Option<JsAuthConfig>,
|
|
92
128
|
#[napi(js_name = "maxBodySize")]
|
|
93
129
|
pub max_body_size: Option<i64>,
|
|
94
|
-
#[napi(js_name = "mainContentOnly")]
|
|
95
|
-
pub main_content_only: Option<bool>,
|
|
96
130
|
#[napi(js_name = "removeTags")]
|
|
97
131
|
pub remove_tags: Option<Vec<String>>,
|
|
132
|
+
pub content: Option<JsContentConfig>,
|
|
98
133
|
#[napi(js_name = "mapLimit")]
|
|
99
134
|
pub map_limit: Option<i64>,
|
|
100
135
|
#[napi(js_name = "mapSearch")]
|
|
@@ -139,30 +174,6 @@ pub struct JsDownloadedDocument {
|
|
|
139
174
|
pub headers: Option<HashMap<String, String>>,
|
|
140
175
|
}
|
|
141
176
|
|
|
142
|
-
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
143
|
-
#[napi(object)]
|
|
144
|
-
pub struct JsInteractionResult {
|
|
145
|
-
#[napi(js_name = "actionResults")]
|
|
146
|
-
pub action_results: Option<Vec<JsActionResult>>,
|
|
147
|
-
#[napi(js_name = "finalHtml")]
|
|
148
|
-
pub final_html: Option<String>,
|
|
149
|
-
#[napi(js_name = "finalUrl")]
|
|
150
|
-
pub final_url: Option<String>,
|
|
151
|
-
pub screenshot: Option<Vec<u8>>,
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
155
|
-
#[napi(object)]
|
|
156
|
-
pub struct JsActionResult {
|
|
157
|
-
#[napi(js_name = "actionIndex")]
|
|
158
|
-
pub action_index: Option<i64>,
|
|
159
|
-
#[napi(js_name = "actionType")]
|
|
160
|
-
pub action_type: Option<String>,
|
|
161
|
-
pub success: Option<bool>,
|
|
162
|
-
pub data: Option<String>,
|
|
163
|
-
pub error: Option<String>,
|
|
164
|
-
}
|
|
165
|
-
|
|
166
177
|
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
167
178
|
#[napi(object)]
|
|
168
179
|
pub struct JsScrapeResult {
|
|
@@ -195,8 +206,6 @@ pub struct JsScrapeResult {
|
|
|
195
206
|
pub was_skipped: Option<bool>,
|
|
196
207
|
#[napi(js_name = "detectedCharset")]
|
|
197
208
|
pub detected_charset: Option<String>,
|
|
198
|
-
#[napi(js_name = "mainContentOnly")]
|
|
199
|
-
pub main_content_only: Option<bool>,
|
|
200
209
|
#[napi(js_name = "authHeaderSent")]
|
|
201
210
|
pub auth_header_sent: Option<bool>,
|
|
202
211
|
#[napi(js_name = "responseMeta")]
|
|
@@ -297,22 +306,6 @@ pub struct JsMarkdownResult {
|
|
|
297
306
|
pub fit_content: Option<String>,
|
|
298
307
|
}
|
|
299
308
|
|
|
300
|
-
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
301
|
-
#[napi(object)]
|
|
302
|
-
pub struct JsCachedPage {
|
|
303
|
-
pub url: Option<String>,
|
|
304
|
-
#[napi(js_name = "statusCode")]
|
|
305
|
-
pub status_code: Option<u16>,
|
|
306
|
-
#[napi(js_name = "contentType")]
|
|
307
|
-
pub content_type: Option<String>,
|
|
308
|
-
pub body: Option<String>,
|
|
309
|
-
pub etag: Option<String>,
|
|
310
|
-
#[napi(js_name = "lastModified")]
|
|
311
|
-
pub last_modified: Option<String>,
|
|
312
|
-
#[napi(js_name = "cachedAt")]
|
|
313
|
-
pub cached_at: Option<i64>,
|
|
314
|
-
}
|
|
315
|
-
|
|
316
309
|
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
317
310
|
#[napi(object)]
|
|
318
311
|
pub struct JsLinkInfo {
|
|
@@ -674,21 +667,6 @@ impl Default for JsAssetCategory {
|
|
|
674
667
|
}
|
|
675
668
|
}
|
|
676
669
|
|
|
677
|
-
#[napi(string_enum)]
|
|
678
|
-
#[derive(Clone, serde::Serialize, serde::Deserialize)]
|
|
679
|
-
pub enum JsCrawlEvent {
|
|
680
|
-
Page,
|
|
681
|
-
Error,
|
|
682
|
-
Complete,
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
#[allow(clippy::derivable_impls)]
|
|
686
|
-
impl Default for JsCrawlEvent {
|
|
687
|
-
fn default() -> Self {
|
|
688
|
-
Self::Page
|
|
689
|
-
}
|
|
690
|
-
}
|
|
691
|
-
|
|
692
670
|
#[allow(clippy::missing_errors_doc)]
|
|
693
671
|
#[napi(js_name = "createEngine")]
|
|
694
672
|
pub fn create_engine(config: Option<JsCrawlConfig>) -> Result<JsCrawlEngineHandle> {
|
|
@@ -781,6 +759,44 @@ impl From<kreuzcrawl::ProxyConfig> for JsProxyConfig {
|
|
|
781
759
|
}
|
|
782
760
|
}
|
|
783
761
|
|
|
762
|
+
impl From<JsContentConfig> for kreuzcrawl::ContentConfig {
|
|
763
|
+
fn from(val: JsContentConfig) -> Self {
|
|
764
|
+
Self {
|
|
765
|
+
output_format: val.output_format.unwrap_or_default(),
|
|
766
|
+
preprocessing_preset: val.preprocessing_preset.unwrap_or_default(),
|
|
767
|
+
remove_navigation: val.remove_navigation.unwrap_or_default(),
|
|
768
|
+
remove_forms: val.remove_forms.unwrap_or_default(),
|
|
769
|
+
strip_tags: val.strip_tags.unwrap_or_default(),
|
|
770
|
+
preserve_tags: val.preserve_tags.unwrap_or_default(),
|
|
771
|
+
exclude_selectors: val.exclude_selectors.unwrap_or_default(),
|
|
772
|
+
skip_images: val.skip_images.unwrap_or_default(),
|
|
773
|
+
max_depth: val.max_depth.map(|v| v as usize),
|
|
774
|
+
wrap: val.wrap.unwrap_or_default(),
|
|
775
|
+
wrap_width: val.wrap_width.map(|v| v as usize).unwrap_or_default(),
|
|
776
|
+
include_document_structure: val.include_document_structure.unwrap_or_default(),
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
impl From<kreuzcrawl::ContentConfig> for JsContentConfig {
|
|
782
|
+
fn from(val: kreuzcrawl::ContentConfig) -> Self {
|
|
783
|
+
Self {
|
|
784
|
+
output_format: Some(val.output_format),
|
|
785
|
+
preprocessing_preset: Some(val.preprocessing_preset),
|
|
786
|
+
remove_navigation: Some(val.remove_navigation),
|
|
787
|
+
remove_forms: Some(val.remove_forms),
|
|
788
|
+
strip_tags: Some(val.strip_tags),
|
|
789
|
+
preserve_tags: Some(val.preserve_tags),
|
|
790
|
+
exclude_selectors: Some(val.exclude_selectors),
|
|
791
|
+
skip_images: Some(val.skip_images),
|
|
792
|
+
max_depth: val.max_depth.map(|v| v as i64),
|
|
793
|
+
wrap: Some(val.wrap),
|
|
794
|
+
wrap_width: Some(val.wrap_width as i64),
|
|
795
|
+
include_document_structure: Some(val.include_document_structure),
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
784
800
|
#[allow(clippy::field_reassign_with_default)]
|
|
785
801
|
impl From<JsBrowserConfig> for kreuzcrawl::BrowserConfig {
|
|
786
802
|
fn from(val: JsBrowserConfig) -> Self {
|
|
@@ -828,14 +844,15 @@ impl From<JsCrawlConfig> for kreuzcrawl::CrawlConfig {
|
|
|
828
844
|
if let Some(__v) = val.request_timeout {
|
|
829
845
|
__result.request_timeout = std::time::Duration::from_millis(__v as u64);
|
|
830
846
|
}
|
|
847
|
+
__result.rate_limit_ms = val.rate_limit_ms.map(|v| v as u64);
|
|
831
848
|
__result.max_redirects = val.max_redirects.map(|v| v as usize).unwrap_or_default();
|
|
832
849
|
__result.retry_count = val.retry_count.map(|v| v as usize).unwrap_or_default();
|
|
833
850
|
__result.retry_codes = val.retry_codes.unwrap_or_default();
|
|
834
851
|
__result.cookies_enabled = val.cookies_enabled.unwrap_or_default();
|
|
835
852
|
__result.auth = val.auth.map(Into::into);
|
|
836
853
|
__result.max_body_size = val.max_body_size.map(|v| v as usize);
|
|
837
|
-
__result.main_content_only = val.main_content_only.unwrap_or_default();
|
|
838
854
|
__result.remove_tags = val.remove_tags.unwrap_or_default();
|
|
855
|
+
__result.content = val.content.map(Into::into).unwrap_or_default();
|
|
839
856
|
__result.map_limit = val.map_limit.map(|v| v as usize);
|
|
840
857
|
__result.map_search = val.map_search;
|
|
841
858
|
__result.download_assets = val.download_assets.unwrap_or_default();
|
|
@@ -872,14 +889,15 @@ impl From<kreuzcrawl::CrawlConfig> for JsCrawlConfig {
|
|
|
872
889
|
exclude_paths: Some(val.exclude_paths),
|
|
873
890
|
custom_headers: Some(val.custom_headers.into_iter().collect()),
|
|
874
891
|
request_timeout: Some(val.request_timeout.as_millis() as u64 as i64),
|
|
892
|
+
rate_limit_ms: val.rate_limit_ms.map(|v| v as i64),
|
|
875
893
|
max_redirects: Some(val.max_redirects as i64),
|
|
876
894
|
retry_count: Some(val.retry_count as i64),
|
|
877
895
|
retry_codes: Some(val.retry_codes),
|
|
878
896
|
cookies_enabled: Some(val.cookies_enabled),
|
|
879
897
|
auth: val.auth.map(Into::into),
|
|
880
898
|
max_body_size: val.max_body_size.map(|v| v as i64),
|
|
881
|
-
main_content_only: Some(val.main_content_only),
|
|
882
899
|
remove_tags: Some(val.remove_tags),
|
|
900
|
+
content: Some(val.content.into()),
|
|
883
901
|
map_limit: val.map_limit.map(|v| v as i64),
|
|
884
902
|
map_search: val.map_search,
|
|
885
903
|
download_assets: Some(val.download_assets),
|
|
@@ -920,41 +938,18 @@ impl From<kreuzcrawl::DownloadedDocument> for JsDownloadedDocument {
|
|
|
920
938
|
mime_type: Some(format!("{:?}", val.mime_type)),
|
|
921
939
|
content: Some(val.content.to_vec()),
|
|
922
940
|
size: Some(val.size as i64),
|
|
923
|
-
filename: val.filename.as_ref().map(|v| format!("{:?}"
|
|
941
|
+
filename: val.filename.as_ref().map(|v| format!("{v:?}")),
|
|
924
942
|
content_hash: Some(format!("{:?}", val.content_hash)),
|
|
925
943
|
headers: Some(
|
|
926
944
|
val.headers
|
|
927
945
|
.into_iter()
|
|
928
|
-
.map(|(k, v)| (
|
|
946
|
+
.map(|(k, v)| (k.to_string(), v.to_string()))
|
|
929
947
|
.collect(),
|
|
930
948
|
),
|
|
931
949
|
}
|
|
932
950
|
}
|
|
933
951
|
}
|
|
934
952
|
|
|
935
|
-
impl From<kreuzcrawl::InteractionResult> for JsInteractionResult {
|
|
936
|
-
fn from(val: kreuzcrawl::InteractionResult) -> Self {
|
|
937
|
-
Self {
|
|
938
|
-
action_results: Some(val.action_results.into_iter().map(Into::into).collect()),
|
|
939
|
-
final_html: Some(val.final_html),
|
|
940
|
-
final_url: Some(val.final_url),
|
|
941
|
-
screenshot: val.screenshot.map(|v| v.to_vec()),
|
|
942
|
-
}
|
|
943
|
-
}
|
|
944
|
-
}
|
|
945
|
-
|
|
946
|
-
impl From<kreuzcrawl::ActionResult> for JsActionResult {
|
|
947
|
-
fn from(val: kreuzcrawl::ActionResult) -> Self {
|
|
948
|
-
Self {
|
|
949
|
-
action_index: Some(val.action_index as i64),
|
|
950
|
-
action_type: Some(format!("{:?}", val.action_type)),
|
|
951
|
-
success: Some(val.success),
|
|
952
|
-
data: val.data.as_ref().map(ToString::to_string),
|
|
953
|
-
error: val.error,
|
|
954
|
-
}
|
|
955
|
-
}
|
|
956
|
-
}
|
|
957
|
-
|
|
958
953
|
impl From<JsScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
959
954
|
fn from(val: JsScrapeResult) -> Self {
|
|
960
955
|
Self {
|
|
@@ -987,7 +982,6 @@ impl From<JsScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
|
987
982
|
is_pdf: val.is_pdf.unwrap_or_default(),
|
|
988
983
|
was_skipped: val.was_skipped.unwrap_or_default(),
|
|
989
984
|
detected_charset: val.detected_charset,
|
|
990
|
-
main_content_only: val.main_content_only.unwrap_or_default(),
|
|
991
985
|
auth_header_sent: val.auth_header_sent.unwrap_or_default(),
|
|
992
986
|
response_meta: val.response_meta.map(Into::into),
|
|
993
987
|
assets: val
|
|
@@ -1025,7 +1019,6 @@ impl From<kreuzcrawl::ScrapeResult> for JsScrapeResult {
|
|
|
1025
1019
|
is_pdf: Some(val.is_pdf),
|
|
1026
1020
|
was_skipped: Some(val.was_skipped),
|
|
1027
1021
|
detected_charset: val.detected_charset,
|
|
1028
|
-
main_content_only: Some(val.main_content_only),
|
|
1029
1022
|
auth_header_sent: Some(val.auth_header_sent),
|
|
1030
1023
|
response_meta: val.response_meta.map(Into::into),
|
|
1031
1024
|
assets: Some(val.assets.into_iter().map(Into::into).collect()),
|
|
@@ -1213,20 +1206,6 @@ impl From<kreuzcrawl::MarkdownResult> for JsMarkdownResult {
|
|
|
1213
1206
|
}
|
|
1214
1207
|
}
|
|
1215
1208
|
|
|
1216
|
-
impl From<kreuzcrawl::CachedPage> for JsCachedPage {
|
|
1217
|
-
fn from(val: kreuzcrawl::CachedPage) -> Self {
|
|
1218
|
-
Self {
|
|
1219
|
-
url: Some(val.url),
|
|
1220
|
-
status_code: Some(val.status_code),
|
|
1221
|
-
content_type: Some(val.content_type),
|
|
1222
|
-
body: Some(val.body),
|
|
1223
|
-
etag: val.etag,
|
|
1224
|
-
last_modified: val.last_modified,
|
|
1225
|
-
cached_at: Some(val.cached_at as i64),
|
|
1226
|
-
}
|
|
1227
|
-
}
|
|
1228
|
-
}
|
|
1229
|
-
|
|
1230
1209
|
impl From<JsLinkInfo> for kreuzcrawl::LinkInfo {
|
|
1231
1210
|
fn from(val: JsLinkInfo) -> Self {
|
|
1232
1211
|
Self {
|
|
@@ -1845,16 +1824,6 @@ impl From<kreuzcrawl::AssetCategory> for JsAssetCategory {
|
|
|
1845
1824
|
}
|
|
1846
1825
|
}
|
|
1847
1826
|
|
|
1848
|
-
impl From<kreuzcrawl::CrawlEvent> for JsCrawlEvent {
|
|
1849
|
-
fn from(val: kreuzcrawl::CrawlEvent) -> Self {
|
|
1850
|
-
match val {
|
|
1851
|
-
kreuzcrawl::CrawlEvent::Page(..) => Self::Page,
|
|
1852
|
-
kreuzcrawl::CrawlEvent::Error { .. } => Self::Error,
|
|
1853
|
-
kreuzcrawl::CrawlEvent::Complete { .. } => Self::Complete,
|
|
1854
|
-
}
|
|
1855
|
-
}
|
|
1856
|
-
}
|
|
1857
|
-
|
|
1858
1827
|
// Error variant name constants
|
|
1859
1828
|
pub const CRAWL_ERROR_ERROR_NOT_FOUND: &str = "NotFound";
|
|
1860
1829
|
pub const CRAWL_ERROR_ERROR_UNAUTHORIZED: &str = "Unauthorized";
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|