@blocklet/crawler 2.0.172 → 2.0.175

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,9 +9,9 @@ var _ufo = require("ufo");
9
9
  var _utils = require("./utils");
10
10
  var _cron = _interopRequireDefault(require("@abtnode/cron"));
11
11
  var _debounce = _interopRequireDefault(require("lodash/debounce"));
12
- function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
12
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
13
13
  function _getRequireWildcardCache(e) { if ("function" != typeof WeakMap) return null; var r = new WeakMap(), t = new WeakMap(); return (_getRequireWildcardCache = function (e) { return e ? t : r; })(e); }
14
- function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != typeof e && "function" != typeof e) return { default: e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && Object.prototype.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n.default = e, t && t.set(e, n), n; }
14
+ function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != typeof e && "function" != typeof e) return { default: e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && {}.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n.default = e, t && t.set(e, n), n; }
15
15
  const formatHtml = htmlString => {
16
16
  if (typeof htmlString !== "string") {
17
17
  return "";
@@ -41,7 +41,6 @@ const getPageContent = async ({
41
41
  idleTime: 1 * 1e3
42
42
  // 1s
43
43
  });
44
-
45
44
  if (formatPageContent) {
46
45
  pageContent = await formatPageContent({
47
46
  page,
@@ -86,7 +85,7 @@ const crawlUrl = async ({
86
85
  await _utils.useCache.set((0, _utils.getRelativePath)(url), {
87
86
  content: pageContent,
88
87
  lastmod: lastmodMap?.get(url),
89
- updatedAt: /* @__PURE__ */new Date().toISOString(),
88
+ updatedAt: ( /* @__PURE__ */new Date()).toISOString(),
90
89
  nextDate: cronCrawlBlockletJob.jobs[CRON_CRAWL_BLOCKLET_KEY].nextDate()
91
90
  });
92
91
  _utils.logger.info(`Crawler[${index}] ${url} success`);
@@ -178,7 +177,7 @@ const crawlBlocklet = async () => {
178
177
  skipBlockletLocTotal++;
179
178
  return false;
180
179
  }
181
- if (nextDate && new Date(nextDate).getTime() >= /* @__PURE__ */new Date().getTime()) {
180
+ if (nextDate && new Date(nextDate).getTime() >= ( /* @__PURE__ */new Date()).getTime()) {
182
181
  skipBlockletLocTotal++;
183
182
  return false;
184
183
  }
@@ -62,8 +62,8 @@ Object.keys(_config).forEach(function (key) {
62
62
  });
63
63
  });
64
64
  function _getRequireWildcardCache(e) { if ("function" != typeof WeakMap) return null; var r = new WeakMap(), t = new WeakMap(); return (_getRequireWildcardCache = function (e) { return e ? t : r; })(e); }
65
- function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != typeof e && "function" != typeof e) return { default: e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && Object.prototype.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n.default = e, t && t.set(e, n), n; }
66
- function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
65
+ function _interopRequireWildcard(e, r) { if (!r && e && e.__esModule) return e; if (null === e || "object" != typeof e && "function" != typeof e) return { default: e }; var t = _getRequireWildcardCache(r); if (t && t.has(e)) return t.get(e); var n = { __proto__: null }, a = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var u in e) if ("default" !== u && {}.hasOwnProperty.call(e, u)) { var i = a ? Object.getOwnPropertyDescriptor(e, u) : null; i && (i.get || i.set) ? Object.defineProperty(n, u, i) : n[u] = e[u]; } return n.default = e, t && t.set(e, n), n; }
66
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
67
67
  const {
68
68
  logger
69
69
  } = _config.default;
@@ -338,7 +338,6 @@ const cachePool = exports.cachePool = (0, _genericPool.createPool)({
338
338
  min: 1
339
339
  // evictionRunIntervalMillis: 0,
340
340
  });
341
-
342
341
  const withCache = async cb => {
343
342
  const client = await cachePool.acquire();
344
343
  if (client) {
@@ -165,8 +165,7 @@ export const crawlBlocklet = async () => {
165
165
  })
166
166
  )).filter(Boolean);
167
167
  tempLocList.forEach((loc) => {
168
- if (item.lastmod)
169
- lastmodMap.set(loc, item.lastmod);
168
+ if (item.lastmod) lastmodMap.set(loc, item.lastmod);
170
169
  });
171
170
  canUseBlockletLocList.push(...tempLocList);
172
171
  })
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blocklet/crawler",
3
- "version": "2.0.172",
3
+ "version": "2.0.175",
4
4
  "description": "blocklet crawler lib",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -40,15 +40,6 @@
40
40
  "lib",
41
41
  "*.d.ts"
42
42
  ],
43
- "scripts": {
44
- "coverage": "yarn test -- --coverage",
45
- "test": "vitest tests",
46
- "build": "unbuild",
47
- "build:watch": "npx nodemon --ext 'ts,tsx,json,js,jsx' --exec 'yarn run build' --ignore 'lib/*' ",
48
- "dev": "yarn run build:watch",
49
- "prepublish": "yarn run build",
50
- "prebuild:dep": "yarn run build"
51
- },
52
43
  "keywords": [
53
44
  "blocklet",
54
45
  "crawler"
@@ -59,37 +50,45 @@
59
50
  "@abtnode/cron": "1.16.30",
60
51
  "@blocklet/puppeteer": "^22.11.3",
61
52
  "@blocklet/sdk": "1.16.30",
62
- "axios": "^1.5.1",
53
+ "axios": "^1.7.7",
63
54
  "crypto": "^1.0.1",
64
- "fs-extra": "^11.1.1",
55
+ "fs-extra": "^11.2.0",
65
56
  "generic-pool": "^3.9.0",
66
57
  "lodash": "^4.17.21",
67
- "lru-cache": "^10.0.1",
58
+ "lru-cache": "^10.4.3",
68
59
  "p-queue": "6.6.2",
69
60
  "p-wait-for": "^5.0.2",
70
- "prettier": "^3.3.2",
71
- "redis": "^4.6.10",
61
+ "prettier": "^3.3.3",
62
+ "redis": "^4.7.0",
72
63
  "robots-parser": "^3.0.1",
73
- "sitemap": "^7.1.1",
74
- "ufo": "^1.3.1",
64
+ "sitemap": "^7.1.2",
65
+ "ufo": "^1.5.4",
75
66
  "url-join": "^4.0.1"
76
67
  },
77
68
  "devDependencies": {
78
69
  "@arcblock/eslint-config-ts": "^0.3.2",
79
- "@types/express": "^4.17.18",
80
- "@types/fs-extra": "^11.0.2",
81
- "@types/lodash": "^4.14.199",
82
- "@types/mime-types": "^2.1.2",
83
- "@types/node": "^20.8.3",
70
+ "@types/express": "^4.17.21",
71
+ "@types/fs-extra": "^11.0.4",
72
+ "@types/lodash": "^4.17.7",
73
+ "@types/mime-types": "^2.1.4",
74
+ "@types/node": "^20.16.5",
84
75
  "@types/puppeteer": "^7.0.4",
85
- "@types/react": "^18.2.25",
86
- "@types/url-join": "^4.0.1",
76
+ "@types/react": "^18.3.5",
77
+ "@types/url-join": "^4.0.3",
87
78
  "@vitest/coverage-c8": "^0.33.0",
88
79
  "jsdom": "^22.1.0",
89
- "typescript": "^5.2.2",
80
+ "typescript": "^5.6.2",
90
81
  "unbuild": "^1.2.1",
91
- "vitest": "^1.4.0",
82
+ "vitest": "^1.6.0",
92
83
  "vitest-fetch-mock": "^0.2.2"
93
84
  },
94
- "gitHead": "749dd785b5ab83dbb7b7575b2e7f20cc21084129"
95
- }
85
+ "scripts": {
86
+ "coverage": "pnpm test -- --coverage",
87
+ "test": "vitest tests",
88
+ "build": "unbuild",
89
+ "build:watch": "npx nodemon --ext 'ts,tsx,json,js,jsx' --exec 'pnpm run build' --ignore 'lib/*' ",
90
+ "dev": "pnpm run build:watch",
91
+ "prepublish": "pnpm run build",
92
+ "prebuild:dep": "pnpm run build"
93
+ }
94
+ }