@kreuzberg/kreuzcrawl 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +1 -1
- package/index.d.ts +16 -2
- package/index.js +52 -52
- package/npm/darwin-arm64/kreuzcrawl-node.darwin-arm64.node +0 -0
- package/npm/darwin-arm64/package.json +1 -1
- package/npm/kreuzberg-kreuzcrawl-darwin-arm64-0.2.0.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-linux-arm64-gnu-0.2.0.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-linux-x64-gnu-0.2.0.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-win32-x64-msvc-0.2.0.tgz +0 -0
- package/npm/linux-arm64-gnu/kreuzcrawl-node.linux-arm64-gnu.node +0 -0
- package/npm/linux-arm64-gnu/package.json +1 -1
- package/npm/linux-x64-gnu/kreuzcrawl-node.linux-x64-gnu.node +0 -0
- package/npm/linux-x64-gnu/package.json +1 -1
- package/npm/win32-x64-msvc/kreuzcrawl-node.win32-x64-msvc.node +0 -0
- package/npm/win32-x64-msvc/package.json +1 -1
- package/package.json +5 -5
- package/src/lib.rs +69 -8
- package/npm/kreuzberg-kreuzcrawl-darwin-arm64-0.1.2.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-linux-arm64-gnu-0.1.2.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-linux-x64-gnu-0.1.2.tgz +0 -0
- package/npm/kreuzberg-kreuzcrawl-win32-x64-msvc-0.1.2.tgz +0 -0
package/Cargo.toml
CHANGED
package/index.d.ts
CHANGED
|
@@ -86,6 +86,21 @@ export interface JsCitationResult {
|
|
|
86
86
|
references?: Array<JsCitationReference>
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
+
export interface JsContentConfig {
|
|
90
|
+
outputFormat?: string
|
|
91
|
+
preprocessingPreset?: string
|
|
92
|
+
removeNavigation?: boolean
|
|
93
|
+
removeForms?: boolean
|
|
94
|
+
stripTags?: Array<string>
|
|
95
|
+
preserveTags?: Array<string>
|
|
96
|
+
excludeSelectors?: Array<string>
|
|
97
|
+
skipImages?: boolean
|
|
98
|
+
maxDepth?: number
|
|
99
|
+
wrap?: boolean
|
|
100
|
+
wrapWidth?: number
|
|
101
|
+
includeDocumentStructure?: boolean
|
|
102
|
+
}
|
|
103
|
+
|
|
89
104
|
export interface JsCookieInfo {
|
|
90
105
|
name?: string
|
|
91
106
|
value?: string
|
|
@@ -112,8 +127,8 @@ export interface JsCrawlConfig {
|
|
|
112
127
|
cookiesEnabled?: boolean
|
|
113
128
|
auth?: JsAuthConfig
|
|
114
129
|
maxBodySize?: number
|
|
115
|
-
mainContentOnly?: boolean
|
|
116
130
|
removeTags?: Array<string>
|
|
131
|
+
content?: JsContentConfig
|
|
117
132
|
mapLimit?: number
|
|
118
133
|
mapSearch?: string
|
|
119
134
|
downloadAssets?: boolean
|
|
@@ -349,7 +364,6 @@ export interface JsScrapeResult {
|
|
|
349
364
|
isPdf?: boolean
|
|
350
365
|
wasSkipped?: boolean
|
|
351
366
|
detectedCharset?: string
|
|
352
|
-
mainContentOnly?: boolean
|
|
353
367
|
authHeaderSent?: boolean
|
|
354
368
|
responseMeta?: JsResponseMeta
|
|
355
369
|
assets?: Array<JsDownloadedAsset>
|
package/index.js
CHANGED
|
@@ -77,8 +77,8 @@ function requireNative() {
|
|
|
77
77
|
try {
|
|
78
78
|
const binding = require('@kreuzberg/kreuzcrawl-android-arm64')
|
|
79
79
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-android-arm64/package.json').version
|
|
80
|
-
if (bindingPackageVersion !== '0.
|
|
81
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
80
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
81
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
82
82
|
}
|
|
83
83
|
return binding
|
|
84
84
|
} catch (e) {
|
|
@@ -93,8 +93,8 @@ function requireNative() {
|
|
|
93
93
|
try {
|
|
94
94
|
const binding = require('@kreuzberg/kreuzcrawl-android-arm-eabi')
|
|
95
95
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-android-arm-eabi/package.json').version
|
|
96
|
-
if (bindingPackageVersion !== '0.
|
|
97
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
96
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
97
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
98
98
|
}
|
|
99
99
|
return binding
|
|
100
100
|
} catch (e) {
|
|
@@ -114,8 +114,8 @@ function requireNative() {
|
|
|
114
114
|
try {
|
|
115
115
|
const binding = require('@kreuzberg/kreuzcrawl-win32-x64-gnu')
|
|
116
116
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-win32-x64-gnu/package.json').version
|
|
117
|
-
if (bindingPackageVersion !== '0.
|
|
118
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
117
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
118
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
119
119
|
}
|
|
120
120
|
return binding
|
|
121
121
|
} catch (e) {
|
|
@@ -130,8 +130,8 @@ function requireNative() {
|
|
|
130
130
|
try {
|
|
131
131
|
const binding = require('@kreuzberg/kreuzcrawl-win32-x64-msvc')
|
|
132
132
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-win32-x64-msvc/package.json').version
|
|
133
|
-
if (bindingPackageVersion !== '0.
|
|
134
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
133
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
134
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
135
135
|
}
|
|
136
136
|
return binding
|
|
137
137
|
} catch (e) {
|
|
@@ -147,8 +147,8 @@ function requireNative() {
|
|
|
147
147
|
try {
|
|
148
148
|
const binding = require('@kreuzberg/kreuzcrawl-win32-ia32-msvc')
|
|
149
149
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-win32-ia32-msvc/package.json').version
|
|
150
|
-
if (bindingPackageVersion !== '0.
|
|
151
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
150
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
151
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
152
152
|
}
|
|
153
153
|
return binding
|
|
154
154
|
} catch (e) {
|
|
@@ -163,8 +163,8 @@ function requireNative() {
|
|
|
163
163
|
try {
|
|
164
164
|
const binding = require('@kreuzberg/kreuzcrawl-win32-arm64-msvc')
|
|
165
165
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-win32-arm64-msvc/package.json').version
|
|
166
|
-
if (bindingPackageVersion !== '0.
|
|
167
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
166
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
167
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
168
168
|
}
|
|
169
169
|
return binding
|
|
170
170
|
} catch (e) {
|
|
@@ -182,8 +182,8 @@ function requireNative() {
|
|
|
182
182
|
try {
|
|
183
183
|
const binding = require('@kreuzberg/kreuzcrawl-darwin-universal')
|
|
184
184
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-darwin-universal/package.json').version
|
|
185
|
-
if (bindingPackageVersion !== '0.
|
|
186
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
185
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
186
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
187
187
|
}
|
|
188
188
|
return binding
|
|
189
189
|
} catch (e) {
|
|
@@ -198,8 +198,8 @@ function requireNative() {
|
|
|
198
198
|
try {
|
|
199
199
|
const binding = require('@kreuzberg/kreuzcrawl-darwin-x64')
|
|
200
200
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-darwin-x64/package.json').version
|
|
201
|
-
if (bindingPackageVersion !== '0.
|
|
202
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
201
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
202
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
203
203
|
}
|
|
204
204
|
return binding
|
|
205
205
|
} catch (e) {
|
|
@@ -214,8 +214,8 @@ function requireNative() {
|
|
|
214
214
|
try {
|
|
215
215
|
const binding = require('@kreuzberg/kreuzcrawl-darwin-arm64')
|
|
216
216
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-darwin-arm64/package.json').version
|
|
217
|
-
if (bindingPackageVersion !== '0.
|
|
218
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
217
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
218
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
219
219
|
}
|
|
220
220
|
return binding
|
|
221
221
|
} catch (e) {
|
|
@@ -234,8 +234,8 @@ function requireNative() {
|
|
|
234
234
|
try {
|
|
235
235
|
const binding = require('@kreuzberg/kreuzcrawl-freebsd-x64')
|
|
236
236
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-freebsd-x64/package.json').version
|
|
237
|
-
if (bindingPackageVersion !== '0.
|
|
238
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
237
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
238
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
239
239
|
}
|
|
240
240
|
return binding
|
|
241
241
|
} catch (e) {
|
|
@@ -250,8 +250,8 @@ function requireNative() {
|
|
|
250
250
|
try {
|
|
251
251
|
const binding = require('@kreuzberg/kreuzcrawl-freebsd-arm64')
|
|
252
252
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-freebsd-arm64/package.json').version
|
|
253
|
-
if (bindingPackageVersion !== '0.
|
|
254
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
253
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
254
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
255
255
|
}
|
|
256
256
|
return binding
|
|
257
257
|
} catch (e) {
|
|
@@ -271,8 +271,8 @@ function requireNative() {
|
|
|
271
271
|
try {
|
|
272
272
|
const binding = require('@kreuzberg/kreuzcrawl-linux-x64-musl')
|
|
273
273
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-x64-musl/package.json').version
|
|
274
|
-
if (bindingPackageVersion !== '0.
|
|
275
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
274
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
275
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
276
276
|
}
|
|
277
277
|
return binding
|
|
278
278
|
} catch (e) {
|
|
@@ -287,8 +287,8 @@ function requireNative() {
|
|
|
287
287
|
try {
|
|
288
288
|
const binding = require('@kreuzberg/kreuzcrawl-linux-x64-gnu')
|
|
289
289
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-x64-gnu/package.json').version
|
|
290
|
-
if (bindingPackageVersion !== '0.
|
|
291
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
290
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
291
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
292
292
|
}
|
|
293
293
|
return binding
|
|
294
294
|
} catch (e) {
|
|
@@ -305,8 +305,8 @@ function requireNative() {
|
|
|
305
305
|
try {
|
|
306
306
|
const binding = require('@kreuzberg/kreuzcrawl-linux-arm64-musl')
|
|
307
307
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-arm64-musl/package.json').version
|
|
308
|
-
if (bindingPackageVersion !== '0.
|
|
309
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
308
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
309
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
310
310
|
}
|
|
311
311
|
return binding
|
|
312
312
|
} catch (e) {
|
|
@@ -321,8 +321,8 @@ function requireNative() {
|
|
|
321
321
|
try {
|
|
322
322
|
const binding = require('@kreuzberg/kreuzcrawl-linux-arm64-gnu')
|
|
323
323
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-arm64-gnu/package.json').version
|
|
324
|
-
if (bindingPackageVersion !== '0.
|
|
325
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
324
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
325
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
326
326
|
}
|
|
327
327
|
return binding
|
|
328
328
|
} catch (e) {
|
|
@@ -339,8 +339,8 @@ function requireNative() {
|
|
|
339
339
|
try {
|
|
340
340
|
const binding = require('@kreuzberg/kreuzcrawl-linux-arm-musleabihf')
|
|
341
341
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-arm-musleabihf/package.json').version
|
|
342
|
-
if (bindingPackageVersion !== '0.
|
|
343
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
342
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
343
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
344
344
|
}
|
|
345
345
|
return binding
|
|
346
346
|
} catch (e) {
|
|
@@ -355,8 +355,8 @@ function requireNative() {
|
|
|
355
355
|
try {
|
|
356
356
|
const binding = require('@kreuzberg/kreuzcrawl-linux-arm-gnueabihf')
|
|
357
357
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-arm-gnueabihf/package.json').version
|
|
358
|
-
if (bindingPackageVersion !== '0.
|
|
359
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
358
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
359
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
360
360
|
}
|
|
361
361
|
return binding
|
|
362
362
|
} catch (e) {
|
|
@@ -373,8 +373,8 @@ function requireNative() {
|
|
|
373
373
|
try {
|
|
374
374
|
const binding = require('@kreuzberg/kreuzcrawl-linux-loong64-musl')
|
|
375
375
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-loong64-musl/package.json').version
|
|
376
|
-
if (bindingPackageVersion !== '0.
|
|
377
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
376
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
377
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
378
378
|
}
|
|
379
379
|
return binding
|
|
380
380
|
} catch (e) {
|
|
@@ -389,8 +389,8 @@ function requireNative() {
|
|
|
389
389
|
try {
|
|
390
390
|
const binding = require('@kreuzberg/kreuzcrawl-linux-loong64-gnu')
|
|
391
391
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-loong64-gnu/package.json').version
|
|
392
|
-
if (bindingPackageVersion !== '0.
|
|
393
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
392
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
393
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
394
394
|
}
|
|
395
395
|
return binding
|
|
396
396
|
} catch (e) {
|
|
@@ -407,8 +407,8 @@ function requireNative() {
|
|
|
407
407
|
try {
|
|
408
408
|
const binding = require('@kreuzberg/kreuzcrawl-linux-riscv64-musl')
|
|
409
409
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-riscv64-musl/package.json').version
|
|
410
|
-
if (bindingPackageVersion !== '0.
|
|
411
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
410
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
411
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
412
412
|
}
|
|
413
413
|
return binding
|
|
414
414
|
} catch (e) {
|
|
@@ -423,8 +423,8 @@ function requireNative() {
|
|
|
423
423
|
try {
|
|
424
424
|
const binding = require('@kreuzberg/kreuzcrawl-linux-riscv64-gnu')
|
|
425
425
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-riscv64-gnu/package.json').version
|
|
426
|
-
if (bindingPackageVersion !== '0.
|
|
427
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
426
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
427
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
428
428
|
}
|
|
429
429
|
return binding
|
|
430
430
|
} catch (e) {
|
|
@@ -440,8 +440,8 @@ function requireNative() {
|
|
|
440
440
|
try {
|
|
441
441
|
const binding = require('@kreuzberg/kreuzcrawl-linux-ppc64-gnu')
|
|
442
442
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-ppc64-gnu/package.json').version
|
|
443
|
-
if (bindingPackageVersion !== '0.
|
|
444
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
443
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
444
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
445
445
|
}
|
|
446
446
|
return binding
|
|
447
447
|
} catch (e) {
|
|
@@ -456,8 +456,8 @@ function requireNative() {
|
|
|
456
456
|
try {
|
|
457
457
|
const binding = require('@kreuzberg/kreuzcrawl-linux-s390x-gnu')
|
|
458
458
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-linux-s390x-gnu/package.json').version
|
|
459
|
-
if (bindingPackageVersion !== '0.
|
|
460
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
459
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
460
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
461
461
|
}
|
|
462
462
|
return binding
|
|
463
463
|
} catch (e) {
|
|
@@ -476,8 +476,8 @@ function requireNative() {
|
|
|
476
476
|
try {
|
|
477
477
|
const binding = require('@kreuzberg/kreuzcrawl-openharmony-arm64')
|
|
478
478
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-openharmony-arm64/package.json').version
|
|
479
|
-
if (bindingPackageVersion !== '0.
|
|
480
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
479
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
480
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
481
481
|
}
|
|
482
482
|
return binding
|
|
483
483
|
} catch (e) {
|
|
@@ -492,8 +492,8 @@ function requireNative() {
|
|
|
492
492
|
try {
|
|
493
493
|
const binding = require('@kreuzberg/kreuzcrawl-openharmony-x64')
|
|
494
494
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-openharmony-x64/package.json').version
|
|
495
|
-
if (bindingPackageVersion !== '0.
|
|
496
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
495
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
496
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
497
497
|
}
|
|
498
498
|
return binding
|
|
499
499
|
} catch (e) {
|
|
@@ -508,8 +508,8 @@ function requireNative() {
|
|
|
508
508
|
try {
|
|
509
509
|
const binding = require('@kreuzberg/kreuzcrawl-openharmony-arm')
|
|
510
510
|
const bindingPackageVersion = require('@kreuzberg/kreuzcrawl-openharmony-arm/package.json').version
|
|
511
|
-
if (bindingPackageVersion !== '0.
|
|
512
|
-
throw new Error(`Native binding package version mismatch, expected 0.
|
|
511
|
+
if (bindingPackageVersion !== '0.2.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') {
|
|
512
|
+
throw new Error(`Native binding package version mismatch, expected 0.2.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`)
|
|
513
513
|
}
|
|
514
514
|
return binding
|
|
515
515
|
} catch (e) {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kreuzberg/kreuzcrawl",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"main": "index.js",
|
|
5
5
|
"types": "index.d.ts",
|
|
6
6
|
"repository": {
|
|
@@ -35,9 +35,9 @@
|
|
|
35
35
|
"@napi-rs/cli": "^3.6.2"
|
|
36
36
|
},
|
|
37
37
|
"optionalDependencies": {
|
|
38
|
-
"@kreuzberg/kreuzcrawl-darwin-arm64": "0.
|
|
39
|
-
"@kreuzberg/kreuzcrawl-linux-arm64-gnu": "0.
|
|
40
|
-
"@kreuzberg/kreuzcrawl-linux-x64-gnu": "0.
|
|
41
|
-
"@kreuzberg/kreuzcrawl-win32-x64-msvc": "0.
|
|
38
|
+
"@kreuzberg/kreuzcrawl-darwin-arm64": "0.2.0",
|
|
39
|
+
"@kreuzberg/kreuzcrawl-linux-arm64-gnu": "0.2.0",
|
|
40
|
+
"@kreuzberg/kreuzcrawl-linux-x64-gnu": "0.2.0",
|
|
41
|
+
"@kreuzberg/kreuzcrawl-win32-x64-msvc": "0.2.0"
|
|
42
42
|
}
|
|
43
43
|
}
|
package/src/lib.rs
CHANGED
|
@@ -48,6 +48,34 @@ pub struct JsProxyConfig {
|
|
|
48
48
|
pub password: Option<String>,
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
52
|
+
#[napi(object)]
|
|
53
|
+
pub struct JsContentConfig {
|
|
54
|
+
#[napi(js_name = "outputFormat")]
|
|
55
|
+
pub output_format: Option<String>,
|
|
56
|
+
#[napi(js_name = "preprocessingPreset")]
|
|
57
|
+
pub preprocessing_preset: Option<String>,
|
|
58
|
+
#[napi(js_name = "removeNavigation")]
|
|
59
|
+
pub remove_navigation: Option<bool>,
|
|
60
|
+
#[napi(js_name = "removeForms")]
|
|
61
|
+
pub remove_forms: Option<bool>,
|
|
62
|
+
#[napi(js_name = "stripTags")]
|
|
63
|
+
pub strip_tags: Option<Vec<String>>,
|
|
64
|
+
#[napi(js_name = "preserveTags")]
|
|
65
|
+
pub preserve_tags: Option<Vec<String>>,
|
|
66
|
+
#[napi(js_name = "excludeSelectors")]
|
|
67
|
+
pub exclude_selectors: Option<Vec<String>>,
|
|
68
|
+
#[napi(js_name = "skipImages")]
|
|
69
|
+
pub skip_images: Option<bool>,
|
|
70
|
+
#[napi(js_name = "maxDepth")]
|
|
71
|
+
pub max_depth: Option<i64>,
|
|
72
|
+
pub wrap: Option<bool>,
|
|
73
|
+
#[napi(js_name = "wrapWidth")]
|
|
74
|
+
pub wrap_width: Option<i64>,
|
|
75
|
+
#[napi(js_name = "includeDocumentStructure")]
|
|
76
|
+
pub include_document_structure: Option<bool>,
|
|
77
|
+
}
|
|
78
|
+
|
|
51
79
|
#[derive(Clone, Default, serde::Serialize, serde::Deserialize)]
|
|
52
80
|
#[napi(object)]
|
|
53
81
|
pub struct JsBrowserConfig {
|
|
@@ -99,10 +127,9 @@ pub struct JsCrawlConfig {
|
|
|
99
127
|
pub auth: Option<JsAuthConfig>,
|
|
100
128
|
#[napi(js_name = "maxBodySize")]
|
|
101
129
|
pub max_body_size: Option<i64>,
|
|
102
|
-
#[napi(js_name = "mainContentOnly")]
|
|
103
|
-
pub main_content_only: Option<bool>,
|
|
104
130
|
#[napi(js_name = "removeTags")]
|
|
105
131
|
pub remove_tags: Option<Vec<String>>,
|
|
132
|
+
pub content: Option<JsContentConfig>,
|
|
106
133
|
#[napi(js_name = "mapLimit")]
|
|
107
134
|
pub map_limit: Option<i64>,
|
|
108
135
|
#[napi(js_name = "mapSearch")]
|
|
@@ -179,8 +206,6 @@ pub struct JsScrapeResult {
|
|
|
179
206
|
pub was_skipped: Option<bool>,
|
|
180
207
|
#[napi(js_name = "detectedCharset")]
|
|
181
208
|
pub detected_charset: Option<String>,
|
|
182
|
-
#[napi(js_name = "mainContentOnly")]
|
|
183
|
-
pub main_content_only: Option<bool>,
|
|
184
209
|
#[napi(js_name = "authHeaderSent")]
|
|
185
210
|
pub auth_header_sent: Option<bool>,
|
|
186
211
|
#[napi(js_name = "responseMeta")]
|
|
@@ -734,6 +759,44 @@ impl From<kreuzcrawl::ProxyConfig> for JsProxyConfig {
|
|
|
734
759
|
}
|
|
735
760
|
}
|
|
736
761
|
|
|
762
|
+
impl From<JsContentConfig> for kreuzcrawl::ContentConfig {
|
|
763
|
+
fn from(val: JsContentConfig) -> Self {
|
|
764
|
+
Self {
|
|
765
|
+
output_format: val.output_format.unwrap_or_default(),
|
|
766
|
+
preprocessing_preset: val.preprocessing_preset.unwrap_or_default(),
|
|
767
|
+
remove_navigation: val.remove_navigation.unwrap_or_default(),
|
|
768
|
+
remove_forms: val.remove_forms.unwrap_or_default(),
|
|
769
|
+
strip_tags: val.strip_tags.unwrap_or_default(),
|
|
770
|
+
preserve_tags: val.preserve_tags.unwrap_or_default(),
|
|
771
|
+
exclude_selectors: val.exclude_selectors.unwrap_or_default(),
|
|
772
|
+
skip_images: val.skip_images.unwrap_or_default(),
|
|
773
|
+
max_depth: val.max_depth.map(|v| v as usize),
|
|
774
|
+
wrap: val.wrap.unwrap_or_default(),
|
|
775
|
+
wrap_width: val.wrap_width.map(|v| v as usize).unwrap_or_default(),
|
|
776
|
+
include_document_structure: val.include_document_structure.unwrap_or_default(),
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
impl From<kreuzcrawl::ContentConfig> for JsContentConfig {
|
|
782
|
+
fn from(val: kreuzcrawl::ContentConfig) -> Self {
|
|
783
|
+
Self {
|
|
784
|
+
output_format: Some(val.output_format),
|
|
785
|
+
preprocessing_preset: Some(val.preprocessing_preset),
|
|
786
|
+
remove_navigation: Some(val.remove_navigation),
|
|
787
|
+
remove_forms: Some(val.remove_forms),
|
|
788
|
+
strip_tags: Some(val.strip_tags),
|
|
789
|
+
preserve_tags: Some(val.preserve_tags),
|
|
790
|
+
exclude_selectors: Some(val.exclude_selectors),
|
|
791
|
+
skip_images: Some(val.skip_images),
|
|
792
|
+
max_depth: val.max_depth.map(|v| v as i64),
|
|
793
|
+
wrap: Some(val.wrap),
|
|
794
|
+
wrap_width: Some(val.wrap_width as i64),
|
|
795
|
+
include_document_structure: Some(val.include_document_structure),
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
737
800
|
#[allow(clippy::field_reassign_with_default)]
|
|
738
801
|
impl From<JsBrowserConfig> for kreuzcrawl::BrowserConfig {
|
|
739
802
|
fn from(val: JsBrowserConfig) -> Self {
|
|
@@ -788,8 +851,8 @@ impl From<JsCrawlConfig> for kreuzcrawl::CrawlConfig {
|
|
|
788
851
|
__result.cookies_enabled = val.cookies_enabled.unwrap_or_default();
|
|
789
852
|
__result.auth = val.auth.map(Into::into);
|
|
790
853
|
__result.max_body_size = val.max_body_size.map(|v| v as usize);
|
|
791
|
-
__result.main_content_only = val.main_content_only.unwrap_or_default();
|
|
792
854
|
__result.remove_tags = val.remove_tags.unwrap_or_default();
|
|
855
|
+
__result.content = val.content.map(Into::into).unwrap_or_default();
|
|
793
856
|
__result.map_limit = val.map_limit.map(|v| v as usize);
|
|
794
857
|
__result.map_search = val.map_search;
|
|
795
858
|
__result.download_assets = val.download_assets.unwrap_or_default();
|
|
@@ -833,8 +896,8 @@ impl From<kreuzcrawl::CrawlConfig> for JsCrawlConfig {
|
|
|
833
896
|
cookies_enabled: Some(val.cookies_enabled),
|
|
834
897
|
auth: val.auth.map(Into::into),
|
|
835
898
|
max_body_size: val.max_body_size.map(|v| v as i64),
|
|
836
|
-
main_content_only: Some(val.main_content_only),
|
|
837
899
|
remove_tags: Some(val.remove_tags),
|
|
900
|
+
content: Some(val.content.into()),
|
|
838
901
|
map_limit: val.map_limit.map(|v| v as i64),
|
|
839
902
|
map_search: val.map_search,
|
|
840
903
|
download_assets: Some(val.download_assets),
|
|
@@ -919,7 +982,6 @@ impl From<JsScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
|
919
982
|
is_pdf: val.is_pdf.unwrap_or_default(),
|
|
920
983
|
was_skipped: val.was_skipped.unwrap_or_default(),
|
|
921
984
|
detected_charset: val.detected_charset,
|
|
922
|
-
main_content_only: val.main_content_only.unwrap_or_default(),
|
|
923
985
|
auth_header_sent: val.auth_header_sent.unwrap_or_default(),
|
|
924
986
|
response_meta: val.response_meta.map(Into::into),
|
|
925
987
|
assets: val
|
|
@@ -957,7 +1019,6 @@ impl From<kreuzcrawl::ScrapeResult> for JsScrapeResult {
|
|
|
957
1019
|
is_pdf: Some(val.is_pdf),
|
|
958
1020
|
was_skipped: Some(val.was_skipped),
|
|
959
1021
|
detected_charset: val.detected_charset,
|
|
960
|
-
main_content_only: Some(val.main_content_only),
|
|
961
1022
|
auth_header_sent: Some(val.auth_header_sent),
|
|
962
1023
|
response_meta: val.response_meta.map(Into::into),
|
|
963
1024
|
assets: Some(val.assets.into_iter().map(Into::into).collect()),
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|