@wargas/crawler 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,15 +1,109 @@
1
- # crawler
1
+ # Crawler
2
2
 
3
- To install dependencies:
3
+ Classe utilitária para realizar requisições HTTP com:
4
+
5
+ * Persistência automática de cookies
6
+ * Parsing automático de HTML
7
+ * Manipulação do DOM usando `linkedom`
8
+ * Cliente HTTP baseado em got
9
+
10
+ ---
11
+
12
+ # Instalação
4
13
 
5
14
  ```bash
6
- bun install
15
+ bun add @wargas/crawler
7
16
  ```
8
17
 
9
- To run:
18
+ ou usando npm:
10
19
 
11
20
  ```bash
12
- bun run index.ts
21
+ npm install @wargas/crawler
22
+ ```
23
+
24
+ ---
25
+
26
+ # Uso básico
27
+
28
+ ```ts
29
+ import { Crawler } from "@wargas/crawler";
30
+
31
+ const crawler = Crawler.factory();
32
+
33
+ await crawler.client.get("https://example.com");
34
+
35
+ console.log(crawler.html);
36
+
37
+ console.log(
38
+ crawler.document.querySelector("title")?.textContent
39
+ );
40
+ ```
41
+
42
+ ---
43
+
44
+ # Cookies persistentes
45
+
46
+ Os cookies são armazenados automaticamente no arquivo:
47
+
48
+ ```txt
49
+ cookies.json
13
50
  ```
14
51
 
15
- This project was created using `bun init` in bun v1.3.14. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime.
52
+ Isso permite manter sessão entre execuções.
53
+
54
+ ---
55
+
56
+ # Limpar cookies
57
+
58
+ ```ts
59
+ await crawler.removeAllCookies();
60
+ ```
61
+
62
+ ---
63
+
64
+ # Acessando o DOM
65
+
66
+ Como o HTML é convertido automaticamente usando `linkedom`, é possível utilizar APIs similares ao navegador:
67
+
68
+ ```ts
69
+ const links = crawler.document.querySelectorAll("a");
70
+
71
+ for (const link of links) {
72
+ console.log(link.getAttribute("href"));
73
+ }
74
+ ```
75
+
76
+ ---
77
+
78
+ # Configurações atuais
79
+
80
+ A instância do `got` é criada com:
81
+
82
+ ```ts
83
+ followRedirect: false
84
+ ```
85
+
86
+ Ou seja:
87
+
88
+ * redirects não são seguidos automaticamente
89
+ * cookies são persistidos
90
+ * HTML é parseado automaticamente após cada resposta
91
+
92
+ ---
93
+
94
+ # Possíveis melhorias
95
+
96
+ * Suporte a proxy
97
+ * Retry automático
98
+ * Timeout configurável
99
+ * User-Agent customizado
100
+ * Suporte a certificados digitais
101
+ * Suporte a HTTP2
102
+ * Métodos helper (`get`, `post`, `login`, etc.)
103
+ * Cache de páginas
104
+
105
+ ---
106
+
107
+ # Licença
108
+
109
+ MIT
package/bun.lock CHANGED
@@ -12,9 +12,10 @@
12
12
  },
13
13
  "devDependencies": {
14
14
  "@types/bun": "latest",
15
+ "tsc": "^2.0.4",
15
16
  },
16
17
  "peerDependencies": {
17
- "typescript": "^5",
18
+ "typescript": "^6.0.3",
18
19
  },
19
20
  },
20
21
  },
@@ -103,9 +104,11 @@
103
104
 
104
105
  "tough-cookie-file-store": ["tough-cookie-file-store@3.3.0", "", { "dependencies": { "tough-cookie": "^6.0.0" } }, "sha512-FbO/cOi/jp4wweo8soVNG/ZjDsgpBZWqaxWwu7gRKvsjg/Qt44kStp87VLfJnin749DlTbZDYvV1wuSr5jly2g=="],
105
106
 
107
+ "tsc": ["tsc@2.0.4", "", { "bin": { "tsc": "bin/tsc" } }, "sha512-fzoSieZI5KKJVBYGvwbVZs/J5za84f2lSTLPYf6AGiIf43tZ3GNrI1QzTLcjtyDDP4aLxd46RTZq1nQxe7+k5Q=="],
108
+
106
109
  "type-fest": ["type-fest@5.6.0", "", { "dependencies": { "tagged-tag": "^1.0.0" } }, "sha512-8ZiHFm91orbSAe2PSAiSVBVko18pbhbiB3U9GglSzF/zCGkR+rxpHx6sEMCUm4kxY4LjDIUGgCfUMtwfZfjfUA=="],
107
110
 
108
- "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
111
+ "typescript": ["typescript@6.0.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw=="],
109
112
 
110
113
  "uhyphen": ["uhyphen@0.2.0", "", {}, "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA=="],
111
114
 
package/dist/index.js CHANGED
@@ -23300,7 +23300,9 @@ class Crawler {
23300
23300
  return res;
23301
23301
  }
23302
23302
  ]
23303
- }
23303
+ },
23304
+ followRedirect: false,
23305
+ cookieJar: instance.cookieJar
23304
23306
  });
23305
23307
  return instance;
23306
23308
  }
package/index.d.ts ADDED
@@ -0,0 +1,15 @@
1
+ // crawler.d.ts
2
+
3
+ import type { Got } from "got";
4
+ import type { CookieJar } from "tough-cookie";
5
+
6
+ export declare class Crawler {
7
+ client: Got;
8
+ cookieJar: CookieJar;
9
+ html: string;
10
+ document: Document;
11
+
12
+ static factory(): Crawler;
13
+
14
+ removeAllCookies(): Promise<void>;
15
+ }
package/index.test.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { expect, test } from 'bun:test'
2
- import { Crawler } from "./index";
2
+ import { Crawler } from "./";
3
3
 
4
4
  test(`get page title`, async function() {
5
5
 
package/index.ts CHANGED
@@ -8,7 +8,7 @@ export class Crawler {
8
8
  client!: Got
9
9
  cookieJar!: CookieJar
10
10
  html = ``
11
- document = parseHTML(``).document
11
+ document: Document = parseHTML(``).document
12
12
 
13
13
  static factory() {
14
14
  const instance = new Crawler()
@@ -27,9 +27,9 @@ export class Crawler {
27
27
  return res
28
28
  }
29
29
  ]
30
- }
31
- // followRedirect:false,
32
- // cookieJar: instance.cookieJar,
30
+ },
31
+ followRedirect:false,
32
+ cookieJar: instance.cookieJar,
33
33
 
34
34
  })
35
35
 
package/package.json CHANGED
@@ -1,19 +1,24 @@
1
1
  {
2
2
  "name": "@wargas/crawler",
3
- "version": "0.0.3",
3
+ "version": "0.0.5",
4
4
  "module": "index.ts",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",
7
7
  "private": false,
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "https://github.com/wargas/crawler.git"
11
+ },
8
12
  "scripts": {
9
13
  "build": "bun build index.ts --target node --outdir dist",
10
14
  "publish": "npm publish --tag latest --access public"
11
15
  },
12
16
  "devDependencies": {
13
- "@types/bun": "latest"
17
+ "@types/bun": "latest",
18
+ "tsc": "^2.0.4"
14
19
  },
15
20
  "peerDependencies": {
16
- "typescript": "^5"
21
+ "typescript": "^6.0.3"
17
22
  },
18
23
  "dependencies": {
19
24
  "got": "^15.0.5",