@nitpicker/crawler 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import type { Database } from './database.js';
2
- import type { DatabaseEvent, PageFilter } from './types.js';
2
+ import type { Config, DatabaseEvent, PageFilter } from './types.js';
3
3
  import type { ParseURLOptions } from '@d-zero/shared/parse-url';
4
4
  import { TypedAwaitEventEmitter as EventEmitter } from '@d-zero/shared/typed-await-event-emitter';
5
5
  import Page from './page.js';
@@ -25,6 +25,11 @@ export declare class ArchiveAccessor extends EventEmitter<DatabaseEvent> {
25
25
  * When null, `setData` is not available.
26
26
  */
27
27
  constructor(tmpDir: string, db: Database, namespace?: string | null);
28
+ /**
29
+ * Retrieves the crawl configuration stored in the archive database.
30
+ * @returns The parsed {@link Config} object.
31
+ */
32
+ getConfig(): Promise<Config>;
28
33
  /**
29
34
  * Retrieves anchor (link) data for a specific page by its database ID.
30
35
  * @param pageId - The database ID of the page whose anchors to retrieve.
@@ -41,6 +41,13 @@ export class ArchiveAccessor extends EventEmitter {
41
41
  void this.emit('error', e);
42
42
  });
43
43
  }
44
+ /**
45
+ * Retrieves the crawl configuration stored in the archive database.
46
+ * @returns The parsed {@link Config} object.
47
+ */
48
+ async getConfig() {
49
+ return this.#db.getConfig();
50
+ }
44
51
  /**
45
52
  * Retrieves anchor (link) data for a specific page by its database ID.
46
53
  * @param pageId - The database ID of the page whose anchors to retrieve.
@@ -723,7 +723,6 @@ let Database = (() => {
723
723
  t.string('name');
724
724
  t.string('baseUrl');
725
725
  t.boolean('recursive');
726
- t.boolean('useSubprocess');
727
726
  t.integer('interval');
728
727
  t.boolean('image');
729
728
  t.boolean('fetchExternal');
@@ -736,6 +735,8 @@ let Database = (() => {
736
735
  t.integer('retry');
737
736
  t.boolean('fromList');
738
737
  t.boolean('disableQueries');
738
+ t.string('userAgent');
739
+ t.boolean('ignoreRobots');
739
740
  })
740
741
  .createTable('pages', (t) => {
741
742
  t.increments('id');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nitpicker/crawler",
3
- "version": "0.4.3",
3
+ "version": "0.4.4",
4
4
  "description": "Web crawler engine with headless browser rendering and archive storage",
5
5
  "author": "D-ZERO",
6
6
  "license": "Apache-2.0",
@@ -48,5 +48,5 @@
48
48
  "@types/tar": "7.0.87",
49
49
  "@types/unzipper": "0.10.11"
50
50
  },
51
- "gitHead": "0f4ca55751be2f83dd5b6622c3502503fc7dfb41"
51
+ "gitHead": "7c9bad351d1a3f4eb6ea7233f7143140d0989ef2"
52
52
  }