torrent-agent 0.0.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -23
- package/dist/agent.d.ts +13 -1
- package/dist/agent.js +55 -14
- package/dist/index.d.ts +4 -1
- package/dist/index.js +4 -1
- package/dist/query.d.ts +9 -0
- package/dist/query.js +61 -29
- package/dist/scrapers/1337x.js +20 -14
- package/dist/scrapers/nyaa.d.ts +8 -0
- package/dist/scrapers/nyaa.js +79 -0
- package/dist/scrapers/scraper.d.ts +10 -3
- package/dist/scrapers/scraper.js +2 -2
- package/dist/scrapers/thepiratebay.d.ts +10 -0
- package/dist/scrapers/thepiratebay.js +89 -0
- package/dist/scrapers/torrentGalaxy.d.ts +7 -0
- package/dist/scrapers/torrentGalaxy.js +71 -0
- package/package.json +17 -11
package/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
<h1 align="center">Torrent Agent</h1>
|
|
4
4
|
</div>
|
|
5
5
|
|
|
6
|
-
Torrent Agent is an npm library for searching torrents from torrent sites like 1337x,
|
|
6
|
+
Torrent Agent is an npm library for searching torrents from torrent sites like 1337x, Torrent Galaxy, etc. It can run multiple queries concurrently and manage multiple scrapers that looks for torrents from differente providers in the same time.
|
|
7
7
|
|
|
8
8
|
## How to use it
|
|
9
9
|
|
|
@@ -20,34 +20,47 @@ import TorrentAgent from "torrent-agent";
|
|
|
20
20
|
|
|
21
21
|
const agent = new TorrentAgent();
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
23
|
+
async function getTorrents() {
|
|
24
|
+
let query = await agent.add({
|
|
25
|
+
searchQuery: "Ubuntu",
|
|
26
|
+
options: {
|
|
27
|
+
limit: 20,
|
|
28
|
+
concurrency: 5,
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
// Listen for torrents
|
|
33
|
+
query.on("torrent", (torrent) => {
|
|
34
|
+
console.log(torrent);
|
|
35
|
+
});
|
|
36
|
+
// Listen for errors
|
|
37
|
+
query.on("error", (e) => {
|
|
38
|
+
console.log(e);
|
|
39
|
+
});
|
|
40
|
+
// Listen for query completion
|
|
41
|
+
query.on("done", () => {
|
|
42
|
+
console.log("done");
|
|
43
|
+
});
|
|
44
|
+
}
|
|
43
45
|
```
|
|
44
46
|
|
|
45
47
|
#### Default scrapers :
|
|
46
48
|
|
|
47
|
-
-
|
|
49
|
+
- Torrent Galaxy
|
|
50
|
+
- Nyaa
|
|
51
|
+
- 1337x
|
|
48
52
|
|
|
49
53
|
> More scrapers will be available soon
|
|
50
54
|
|
|
55
|
+
#### Default Chromium scrapers :
|
|
56
|
+
|
|
57
|
+
- The Pirate Bay
|
|
58
|
+
> **Note:** To use these scrapers, set `allowChromiumScrapers` in the agent config. You must have Chromium installed on your OS. Keep in mind that Chromium uses more memory, which means Chromium scrapers will be heavier than the normal ones and take more time.
|
|
59
|
+
|
|
60
|
+
```js
|
|
61
|
+
const agent = new TorrentAgent({ allowChromiumScrapers: true });
|
|
62
|
+
```
|
|
63
|
+
|
|
51
64
|
#### Custom scrapers
|
|
52
65
|
|
|
53
66
|
You can create your own custom scrapers that scrape from any site.
|
|
@@ -69,7 +82,7 @@ class CustomScraper extends Scraper {
|
|
|
69
82
|
// then create the agent
|
|
70
83
|
const agent = new TorrentAgent();
|
|
71
84
|
// use your custom scraper in your query
|
|
72
|
-
let query = agent.add({
|
|
85
|
+
let query = await agent.add({
|
|
73
86
|
searchQuery: "Ubuntu",
|
|
74
87
|
options: {
|
|
75
88
|
limit: 20,
|
package/dist/agent.d.ts
CHANGED
|
@@ -2,30 +2,42 @@ import { EventEmitter } from "events";
|
|
|
2
2
|
import Query, { QueryOpts } from "./query.js";
|
|
3
3
|
import PQueue from "p-queue";
|
|
4
4
|
import { Scraper } from "./scrapers/scraper.js";
|
|
5
|
+
import { Browser } from "playwright";
|
|
5
6
|
interface AgentOpts {
|
|
6
7
|
/**
|
|
7
8
|
* Max queries that run at the same time.
|
|
8
9
|
*/
|
|
9
10
|
QueriesConcurrency: number;
|
|
11
|
+
allowChromiumScrapers: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Request timeout for fetching web page (ms).
|
|
14
|
+
*/
|
|
15
|
+
fetchTimeOut: number;
|
|
10
16
|
}
|
|
11
17
|
interface AgentEvents {
|
|
12
18
|
query: [query: Query];
|
|
13
19
|
query_done: [];
|
|
14
20
|
destroyed: [];
|
|
21
|
+
browser: [browser: Browser];
|
|
22
|
+
browser_error: [err: any];
|
|
15
23
|
}
|
|
16
24
|
interface AddQueryOpts {
|
|
17
25
|
searchQuery: string;
|
|
18
26
|
scrapers?: Scraper[];
|
|
19
27
|
options?: QueryOpts;
|
|
28
|
+
useChromium?: boolean;
|
|
20
29
|
}
|
|
21
30
|
export declare class AgentError extends Error {
|
|
22
31
|
constructor(msg: string);
|
|
23
32
|
}
|
|
33
|
+
export declare const defaultQueryConfigs: QueryOpts;
|
|
24
34
|
export default class TorrentAgent extends EventEmitter<AgentEvents> {
|
|
25
35
|
protected queue: PQueue | null;
|
|
26
36
|
protected isDestroyed: boolean;
|
|
37
|
+
browser?: Browser;
|
|
38
|
+
opts: Partial<AgentOpts>;
|
|
27
39
|
constructor(opts?: Partial<AgentOpts>);
|
|
28
|
-
add(opts: AddQueryOpts): Query
|
|
40
|
+
add(opts: AddQueryOpts): Promise<Query>;
|
|
29
41
|
/**
|
|
30
42
|
* Clear the queue.
|
|
31
43
|
*/
|
package/dist/agent.js
CHANGED
|
@@ -10,11 +10,19 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
import { EventEmitter } from "events";
|
|
11
11
|
import Query from "./query.js";
|
|
12
12
|
import PQueue from "p-queue";
|
|
13
|
+
import { chromium } from "playwright-extra";
|
|
14
|
+
import stealth from "puppeteer-extra-plugin-stealth";
|
|
15
|
+
chromium.use(stealth());
|
|
13
16
|
export class AgentError extends Error {
|
|
14
17
|
constructor(msg) {
|
|
15
18
|
super(msg);
|
|
16
19
|
}
|
|
17
20
|
}
|
|
21
|
+
export const defaultQueryConfigs = {
|
|
22
|
+
limit: 10,
|
|
23
|
+
concurrency: 5,
|
|
24
|
+
fetchTimeOut: 30 * 1000,
|
|
25
|
+
};
|
|
18
26
|
export default class TorrentAgent extends EventEmitter {
|
|
19
27
|
constructor(opts = {}) {
|
|
20
28
|
super();
|
|
@@ -22,22 +30,55 @@ export default class TorrentAgent extends EventEmitter {
|
|
|
22
30
|
concurrency: opts.QueriesConcurrency || 5,
|
|
23
31
|
});
|
|
24
32
|
this.isDestroyed = false;
|
|
33
|
+
this.opts = opts;
|
|
34
|
+
if (opts.allowChromiumScrapers) {
|
|
35
|
+
(() => __awaiter(this, void 0, void 0, function* () {
|
|
36
|
+
try {
|
|
37
|
+
this.browser = yield chromium.launch();
|
|
38
|
+
this.emit("browser", this.browser);
|
|
39
|
+
}
|
|
40
|
+
catch (err) {
|
|
41
|
+
this.emit("browser_error", err);
|
|
42
|
+
}
|
|
43
|
+
}))();
|
|
44
|
+
}
|
|
25
45
|
}
|
|
26
46
|
add(opts) {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
47
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
48
|
+
var _a;
|
|
49
|
+
if (this.isDestroyed) {
|
|
50
|
+
throw new AgentError("agent is destroyed cannot add new query");
|
|
51
|
+
}
|
|
52
|
+
if (!this.queue) {
|
|
53
|
+
throw new AgentError("queue is destroyed cannot add a new query");
|
|
54
|
+
}
|
|
55
|
+
let options = Object.assign(Object.assign(Object.assign({}, defaultQueryConfigs), opts.options), { useChromiumScrapers: ((_a = opts.options) === null || _a === void 0 ? void 0 : _a.useChromiumScrapers) === false
|
|
56
|
+
? false
|
|
57
|
+
: this.opts.allowChromiumScrapers });
|
|
58
|
+
if (this.opts.allowChromiumScrapers) {
|
|
59
|
+
if (!this.browser) {
|
|
60
|
+
yield new Promise((res, rej) => {
|
|
61
|
+
this.once("browser", (browser) => {
|
|
62
|
+
//@ts-ignore
|
|
63
|
+
options.browser = browser;
|
|
64
|
+
res(0);
|
|
65
|
+
});
|
|
66
|
+
this.once("browser_error", rej);
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
options.browser = this.browser;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
const query = new Query(opts.searchQuery, opts.scrapers, options);
|
|
74
|
+
this.queue.add(() => __awaiter(this, void 0, void 0, function* () {
|
|
75
|
+
yield query.run();
|
|
76
|
+
yield query.destroy();
|
|
77
|
+
this.emit("query_done");
|
|
78
|
+
}));
|
|
79
|
+
this.emit("query", query);
|
|
80
|
+
return query;
|
|
81
|
+
});
|
|
41
82
|
}
|
|
42
83
|
/**
|
|
43
84
|
* Clear the queue.
|
package/dist/index.d.ts
CHANGED
|
@@ -2,5 +2,8 @@ import TorrentAgent from "./agent.js";
|
|
|
2
2
|
import Query from "./query.js";
|
|
3
3
|
import { Scraper } from "./scrapers/scraper.js";
|
|
4
4
|
import { Scraper1337x } from "./scrapers/1337x.js";
|
|
5
|
-
|
|
5
|
+
import { Nyaa } from "./scrapers/nyaa.js";
|
|
6
|
+
import { TorrentGalaxy } from "./scrapers/torrentGalaxy.js";
|
|
7
|
+
import { ThePirateBay } from "./scrapers/thepiratebay.js";
|
|
8
|
+
export { Scraper, Query, Scraper1337x, Nyaa, TorrentGalaxy, ThePirateBay };
|
|
6
9
|
export default TorrentAgent;
|
package/dist/index.js
CHANGED
|
@@ -2,5 +2,8 @@ import TorrentAgent from "./agent.js";
|
|
|
2
2
|
import Query from "./query.js";
|
|
3
3
|
import { Scraper } from "./scrapers/scraper.js";
|
|
4
4
|
import { Scraper1337x } from "./scrapers/1337x.js";
|
|
5
|
-
|
|
5
|
+
import { Nyaa } from "./scrapers/nyaa.js";
|
|
6
|
+
import { TorrentGalaxy } from "./scrapers/torrentGalaxy.js";
|
|
7
|
+
import { ThePirateBay } from "./scrapers/thepiratebay.js";
|
|
8
|
+
export { Scraper, Query, Scraper1337x, Nyaa, TorrentGalaxy, ThePirateBay };
|
|
6
9
|
export default TorrentAgent;
|
package/dist/query.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { EventEmitter } from "events";
|
|
2
2
|
import { Scraper, Torrent } from "./scrapers/scraper.js";
|
|
3
3
|
import PQueue from "p-queue";
|
|
4
|
+
import { Browser } from "playwright";
|
|
4
5
|
export declare class QueryError extends Error {
|
|
5
6
|
constructor(msg: string);
|
|
6
7
|
}
|
|
7
8
|
export declare const DefaultScrapers: Scraper[];
|
|
9
|
+
export declare const ChromiumScrapers: Scraper[];
|
|
8
10
|
interface QueryEvents {
|
|
9
11
|
error: [error: QueryError];
|
|
10
12
|
torrent: [torrent: Torrent];
|
|
@@ -15,6 +17,12 @@ export interface QueryOpts {
|
|
|
15
17
|
concurrency: number;
|
|
16
18
|
/** Max torrents per scraper. */
|
|
17
19
|
limit: number;
|
|
20
|
+
browser?: Browser;
|
|
21
|
+
useChromiumScrapers?: boolean;
|
|
22
|
+
/**
|
|
23
|
+
* Request timeout for fetching web page (ms).
|
|
24
|
+
*/
|
|
25
|
+
fetchTimeOut: number;
|
|
18
26
|
}
|
|
19
27
|
export declare const QueueDestroyedErr: QueryError;
|
|
20
28
|
export declare const QueryDestroyed: QueryError;
|
|
@@ -23,6 +31,7 @@ export default class Query extends EventEmitter<QueryEvents> {
|
|
|
23
31
|
protected searchQuery: string;
|
|
24
32
|
protected queue: PQueue | null;
|
|
25
33
|
protected isDestroyed: boolean;
|
|
34
|
+
protected opts: QueryOpts;
|
|
26
35
|
limit?: number;
|
|
27
36
|
constructor(searchQuery: string, scrapers?: Scraper[], opts?: Partial<QueryOpts>);
|
|
28
37
|
protected getTorrents(scraper: Scraper): Promise<void>;
|
package/dist/query.js
CHANGED
|
@@ -9,21 +9,45 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
9
9
|
};
|
|
10
10
|
import { EventEmitter } from "events";
|
|
11
11
|
import PQueue from "p-queue";
|
|
12
|
+
import { TorrentGalaxy } from "./scrapers/torrentGalaxy.js";
|
|
13
|
+
import { ThePirateBay } from "./scrapers/thepiratebay.js";
|
|
14
|
+
import { Nyaa } from "./scrapers/nyaa.js";
|
|
12
15
|
import { Scraper1337x } from "./scrapers/1337x.js";
|
|
16
|
+
import { defaultQueryConfigs } from "./agent.js";
|
|
13
17
|
export class QueryError extends Error {
|
|
14
18
|
constructor(msg) {
|
|
15
19
|
super();
|
|
16
20
|
this.message = msg;
|
|
17
21
|
}
|
|
18
22
|
}
|
|
19
|
-
export const DefaultScrapers = [
|
|
23
|
+
export const DefaultScrapers = [
|
|
24
|
+
new TorrentGalaxy(),
|
|
25
|
+
new Nyaa(),
|
|
26
|
+
new Scraper1337x(),
|
|
27
|
+
];
|
|
28
|
+
export const ChromiumScrapers = [new ThePirateBay()];
|
|
20
29
|
export const QueueDestroyedErr = new QueryError("The queue is destroyed cannot run the query.\nThis error may be caused because the query is already destroyed.");
|
|
21
30
|
export const QueryDestroyed = new QueryError("The query is destroyed cannot run the query.");
|
|
22
31
|
export default class Query extends EventEmitter {
|
|
23
32
|
constructor(searchQuery, scrapers, opts = {}) {
|
|
24
33
|
super();
|
|
25
34
|
this.searchQuery = searchQuery;
|
|
26
|
-
this.
|
|
35
|
+
this.opts = Object.assign(Object.assign({}, defaultQueryConfigs), opts);
|
|
36
|
+
if (scrapers) {
|
|
37
|
+
scrapers.forEach((s) => {
|
|
38
|
+
s.browser = opts.browser;
|
|
39
|
+
});
|
|
40
|
+
this.scrapers = scrapers;
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
this.scrapers = DefaultScrapers;
|
|
44
|
+
if (opts.useChromiumScrapers && opts.browser) {
|
|
45
|
+
ChromiumScrapers.forEach((s) => {
|
|
46
|
+
s.browser = opts.browser;
|
|
47
|
+
});
|
|
48
|
+
this.scrapers = [...this.scrapers, ...ChromiumScrapers];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
27
51
|
this.isDestroyed = false;
|
|
28
52
|
this.limit = opts.limit;
|
|
29
53
|
this.queue = new PQueue({
|
|
@@ -42,34 +66,42 @@ export default class Query extends EventEmitter {
|
|
|
42
66
|
this.emit("error", QueueDestroyedErr);
|
|
43
67
|
throw QueueDestroyedErr;
|
|
44
68
|
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
this.emit("torrent", torrent);
|
|
69
|
+
try {
|
|
70
|
+
yield this.queue.add(() => __awaiter(this, void 0, void 0, function* () {
|
|
71
|
+
let t;
|
|
72
|
+
const timeout = new Promise((_, reject) => {
|
|
73
|
+
t = setTimeout(() => {
|
|
74
|
+
reject(new Error(`FirstTouch Timeout (${scraper.opts.name}) : can't load search page (${this.opts.fetchTimeOut}ms)`));
|
|
75
|
+
}, this.opts.fetchTimeOut);
|
|
76
|
+
});
|
|
77
|
+
let links = yield Promise.race([
|
|
78
|
+
scraper.firstTouch(this.searchQuery, this.limit),
|
|
79
|
+
timeout,
|
|
80
|
+
]);
|
|
81
|
+
clearTimeout(t);
|
|
82
|
+
if (!links)
|
|
83
|
+
return;
|
|
84
|
+
for (let i = 0; i < links.length; i++) {
|
|
85
|
+
if (!this.queue) {
|
|
86
|
+
this.emit("error", QueueDestroyedErr);
|
|
87
|
+
throw QueueDestroyedErr;
|
|
65
88
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
89
|
+
this.queue.add(() => __awaiter(this, void 0, void 0, function* () {
|
|
90
|
+
try {
|
|
91
|
+
let torrent = yield scraper.scrapeTorrent(links[i]);
|
|
92
|
+
this.emit("torrent", torrent);
|
|
93
|
+
}
|
|
94
|
+
catch (err) {
|
|
95
|
+
this.emit("error", new QueryError(`error while scraping torrent page${(err === null || err === void 0 ? void 0 : err.message) ? ` : ${err.message}` : ""}`));
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
}));
|
|
99
|
+
}
|
|
100
|
+
}));
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
this.emit("error", new QueryError(`error while scraping search page${(err === null || err === void 0 ? void 0 : err.message) ? ` : ${err.message}` : ""}`));
|
|
104
|
+
}
|
|
73
105
|
});
|
|
74
106
|
}
|
|
75
107
|
run() {
|
package/dist/scrapers/1337x.js
CHANGED
|
@@ -11,8 +11,9 @@ import axios from "axios";
|
|
|
11
11
|
import { Scraper } from "./scraper.js";
|
|
12
12
|
import { load } from "cheerio";
|
|
13
13
|
export class Scraper1337x extends Scraper {
|
|
14
|
-
constructor(opts = {}) {
|
|
14
|
+
constructor(opts = { name: "1337x Scraper" }) {
|
|
15
15
|
super(opts);
|
|
16
|
+
this.browser = opts.browser;
|
|
16
17
|
}
|
|
17
18
|
firstTouch(query, limit) {
|
|
18
19
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -20,11 +21,12 @@ export class Scraper1337x extends Scraper {
|
|
|
20
21
|
throw new Error("search query is required to scrape");
|
|
21
22
|
}
|
|
22
23
|
let results = [];
|
|
23
|
-
let
|
|
24
|
+
let p = 1;
|
|
24
25
|
while (results.length != (limit || 20)) {
|
|
25
|
-
const
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
const url = new URL(Scraper1337x.firstTouchUrl);
|
|
27
|
+
url.searchParams.set("q", query);
|
|
28
|
+
url.searchParams.set("page", p.toString());
|
|
29
|
+
const { data } = yield axios.get(url.href);
|
|
28
30
|
const $ = load(data);
|
|
29
31
|
let torrentCount = $(".table-list tbody tr").length;
|
|
30
32
|
if (torrentCount === 0) {
|
|
@@ -34,14 +36,17 @@ export class Scraper1337x extends Scraper {
|
|
|
34
36
|
if (results.length >= (limit || 20))
|
|
35
37
|
return;
|
|
36
38
|
const name = $(el).find("td.name a").eq(1).text().trim();
|
|
37
|
-
const
|
|
39
|
+
const urlPath = $(el).find("td.name a").eq(1).attr("href");
|
|
40
|
+
if (!urlPath)
|
|
41
|
+
return;
|
|
42
|
+
const url = new URL(urlPath, Scraper1337x.firstTouchUrl);
|
|
38
43
|
const seeders = $(el).find("td.seeds").text().trim();
|
|
39
44
|
const size = $(el).find("td.size").text().trim();
|
|
40
|
-
const uploader = $(el).find("td
|
|
45
|
+
const uploader = $(el).find("td").eq(5).text().trim();
|
|
41
46
|
const leechers = $(el).find("td.leeches").text().trim();
|
|
42
47
|
results.push({
|
|
43
48
|
name,
|
|
44
|
-
url,
|
|
49
|
+
url: url.href,
|
|
45
50
|
seeders: +seeders,
|
|
46
51
|
leechers: +leechers,
|
|
47
52
|
provider: "1337x",
|
|
@@ -49,7 +54,7 @@ export class Scraper1337x extends Scraper {
|
|
|
49
54
|
uploader,
|
|
50
55
|
});
|
|
51
56
|
});
|
|
52
|
-
|
|
57
|
+
p++;
|
|
53
58
|
}
|
|
54
59
|
return results;
|
|
55
60
|
});
|
|
@@ -61,11 +66,12 @@ export class Scraper1337x extends Scraper {
|
|
|
61
66
|
}
|
|
62
67
|
const { data } = yield axios.get(link.url);
|
|
63
68
|
const $ = load(data);
|
|
64
|
-
const
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
69
|
+
const text = $(".box-info").text().trim().replace(/\s+/g, " ");
|
|
70
|
+
const infoHashMatch = text.match(/info\s*hash\s*:\s*([a-fA-F0-9]+)/i);
|
|
71
|
+
if (!infoHashMatch || (infoHashMatch === null || infoHashMatch === void 0 ? void 0 : infoHashMatch.length) == 0)
|
|
72
|
+
throw Error("cant get info hash");
|
|
73
|
+
return Object.assign({ infoHash: infoHashMatch[1] }, link);
|
|
68
74
|
});
|
|
69
75
|
}
|
|
70
76
|
}
|
|
71
|
-
Scraper1337x.firstTouchUrl = "https://1337x.
|
|
77
|
+
Scraper1337x.firstTouchUrl = "https://1337x.pro/search";
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { Scraper, ScraperOpts, Torrent, TorrentLink } from "./scraper.js";
|
|
2
|
+
export declare class Nyaa extends Scraper {
|
|
3
|
+
static homeUrl: string;
|
|
4
|
+
static firstTouchUrl: string;
|
|
5
|
+
constructor(opts?: ScraperOpts);
|
|
6
|
+
firstTouch(query: string, limit?: number): Promise<TorrentLink[]>;
|
|
7
|
+
scrapeTorrent(link: TorrentLink): Promise<Torrent>;
|
|
8
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import axios from "axios";
|
|
11
|
+
import { Scraper } from "./scraper.js";
|
|
12
|
+
import { load } from "cheerio";
|
|
13
|
+
function extractInfoHash(magnetUri) {
|
|
14
|
+
if (typeof magnetUri !== "string")
|
|
15
|
+
return null;
|
|
16
|
+
const match = magnetUri.match(/xt=urn:btih:([a-zA-Z0-9]+)/);
|
|
17
|
+
return match ? match[1].toLowerCase() : null;
|
|
18
|
+
}
|
|
19
|
+
export class Nyaa extends Scraper {
|
|
20
|
+
constructor(opts = { name: "Nyaa Scraper" }) {
|
|
21
|
+
super(opts);
|
|
22
|
+
}
|
|
23
|
+
firstTouch(query, limit) {
|
|
24
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
25
|
+
if (!query) {
|
|
26
|
+
throw new Error("search query is required to scrape");
|
|
27
|
+
}
|
|
28
|
+
let results = [];
|
|
29
|
+
let page = 1;
|
|
30
|
+
while (results.length != (limit || 20)) {
|
|
31
|
+
const { data } = yield axios.get(Nyaa.firstTouchUrl
|
|
32
|
+
.replace(":query", query || "")
|
|
33
|
+
.replace(":page", page.toString()));
|
|
34
|
+
const $ = load(data);
|
|
35
|
+
let torrentCount = $(".container .table-responsive table tbody tr").length;
|
|
36
|
+
if (torrentCount === 0) {
|
|
37
|
+
break;
|
|
38
|
+
}
|
|
39
|
+
$(".container .table-responsive table tbody tr").each((i, el) => {
|
|
40
|
+
if (results.length >= (limit || 20))
|
|
41
|
+
return;
|
|
42
|
+
const tds = $(el).find("td");
|
|
43
|
+
const name = tds.eq(1).find("a").text().trim();
|
|
44
|
+
const url = new URL(tds.eq(1).find("a").attr("href") || "", Nyaa.homeUrl);
|
|
45
|
+
const magnetURI = tds.eq(2).find("a").eq(1).attr("href");
|
|
46
|
+
const size = tds.eq(3).text().trim();
|
|
47
|
+
const seeders = tds.eq(5).text().trim();
|
|
48
|
+
const leechers = tds.eq(6).text().trim();
|
|
49
|
+
results.push({
|
|
50
|
+
name,
|
|
51
|
+
url: url.href,
|
|
52
|
+
seeders: +seeders,
|
|
53
|
+
leechers: +leechers,
|
|
54
|
+
provider: "nyaa",
|
|
55
|
+
size,
|
|
56
|
+
magnetURI,
|
|
57
|
+
infoHash: extractInfoHash(magnetURI || "") || "",
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
page++;
|
|
61
|
+
}
|
|
62
|
+
return results;
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
scrapeTorrent(link) {
|
|
66
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
67
|
+
if (!link.url) {
|
|
68
|
+
throw new Error("url is required in the torrent link");
|
|
69
|
+
}
|
|
70
|
+
if (!link.infoHash)
|
|
71
|
+
throw new Error("not valid link (info hash doesn't exist)");
|
|
72
|
+
if (!link.magnetURI)
|
|
73
|
+
throw new Error("not valid link (info hash doesn't exist)");
|
|
74
|
+
return Object.assign({ infoHash: link.infoHash, magnetURI: link.magnetURI }, link);
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
Nyaa.homeUrl = "https://nyaa.si";
|
|
79
|
+
Nyaa.firstTouchUrl = "https://nyaa.si/?q=:query&p=:page";
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
+
import { Browser } from "playwright";
|
|
1
2
|
export interface ScraperOpts {
|
|
3
|
+
browser?: Browser;
|
|
4
|
+
query?: string;
|
|
5
|
+
name?: string;
|
|
2
6
|
}
|
|
3
7
|
export interface Torrent {
|
|
4
8
|
name: string;
|
|
@@ -10,7 +14,7 @@ export interface Torrent {
|
|
|
10
14
|
magnetURI?: string;
|
|
11
15
|
torrentDownload?: string;
|
|
12
16
|
size: string;
|
|
13
|
-
uploader
|
|
17
|
+
uploader?: string;
|
|
14
18
|
}
|
|
15
19
|
export interface TorrentLink {
|
|
16
20
|
name: string;
|
|
@@ -19,10 +23,13 @@ export interface TorrentLink {
|
|
|
19
23
|
provider: string;
|
|
20
24
|
url: string;
|
|
21
25
|
size: string;
|
|
22
|
-
uploader
|
|
26
|
+
uploader?: string;
|
|
27
|
+
infoHash?: string;
|
|
28
|
+
magnetURI?: string;
|
|
23
29
|
}
|
|
24
30
|
export declare abstract class Scraper {
|
|
25
|
-
|
|
31
|
+
opts: ScraperOpts;
|
|
32
|
+
browser?: Browser;
|
|
26
33
|
constructor(opts?: ScraperOpts);
|
|
27
34
|
abstract firstTouch(query: string, limit?: number): Promise<TorrentLink[]>;
|
|
28
35
|
abstract scrapeTorrent(link: TorrentLink): Promise<Torrent>;
|
package/dist/scrapers/scraper.js
CHANGED
|
@@ -8,13 +8,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
10
|
export class Scraper {
|
|
11
|
-
constructor(opts = {}) {
|
|
11
|
+
constructor(opts = { name: "Not Named" }) {
|
|
12
12
|
this.opts = opts;
|
|
13
13
|
}
|
|
14
14
|
}
|
|
15
15
|
export class TestScraper extends Scraper {
|
|
16
16
|
constructor(opts) {
|
|
17
|
-
super({});
|
|
17
|
+
super({ name: "Test Scraper" });
|
|
18
18
|
this.linkCount = opts.linksCount || 0;
|
|
19
19
|
this.runTime = opts.runTime || 0;
|
|
20
20
|
this.name = opts.name || "Test Scraper";
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { Scraper, ScraperOpts, Torrent, TorrentLink } from "./scraper.js";
|
|
2
|
+
import { Browser } from "playwright";
|
|
3
|
+
export declare class ThePirateBay extends Scraper {
|
|
4
|
+
static homeUrl: string;
|
|
5
|
+
static firstTouchUrl: string;
|
|
6
|
+
browser?: Browser;
|
|
7
|
+
constructor(opts?: ScraperOpts);
|
|
8
|
+
firstTouch(query: string, limit?: number): Promise<TorrentLink[]>;
|
|
9
|
+
scrapeTorrent(link: TorrentLink): Promise<Torrent>;
|
|
10
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { Scraper } from "./scraper.js";
|
|
11
|
+
import { load } from "cheerio";
|
|
12
|
+
function extractInfoHash(magnetUri) {
|
|
13
|
+
if (typeof magnetUri !== "string")
|
|
14
|
+
return null;
|
|
15
|
+
const match = magnetUri.match(/xt=urn:btih:([a-zA-Z0-9]+)/);
|
|
16
|
+
return match ? match[1].toLowerCase() : null;
|
|
17
|
+
}
|
|
18
|
+
export class ThePirateBay extends Scraper {
|
|
19
|
+
constructor(opts = { name: "ThePirateBay Scraper" }) {
|
|
20
|
+
super(opts);
|
|
21
|
+
if (opts.browser) {
|
|
22
|
+
this.browser = this.browser;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
firstTouch(query, limit) {
|
|
26
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
27
|
+
if (!query) {
|
|
28
|
+
throw new Error("search query is required to scrape");
|
|
29
|
+
}
|
|
30
|
+
if (!this.browser)
|
|
31
|
+
throw new Error("browser is not created");
|
|
32
|
+
let results = [];
|
|
33
|
+
let p = 1;
|
|
34
|
+
while (results.length != (limit || 20)) {
|
|
35
|
+
const page = yield this.browser.newPage();
|
|
36
|
+
yield page.goto(ThePirateBay.firstTouchUrl
|
|
37
|
+
.replace(":query", query || "")
|
|
38
|
+
.replace(":page", page.toString()), {
|
|
39
|
+
waitUntil: "networkidle",
|
|
40
|
+
});
|
|
41
|
+
const data = yield page.content();
|
|
42
|
+
const $ = load(data);
|
|
43
|
+
let torrents = $("li.list-entry");
|
|
44
|
+
if (torrents.length - 1 <= 0) {
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
47
|
+
torrents.each((i, el) => {
|
|
48
|
+
if (i == 0)
|
|
49
|
+
return;
|
|
50
|
+
if (results.length >= (limit || 20))
|
|
51
|
+
return;
|
|
52
|
+
const span = $(el).find("span");
|
|
53
|
+
const name = span.eq(1).find("a").text().trim();
|
|
54
|
+
const url = new URL(span.eq(1).find("a").attr("href") || "", ThePirateBay.homeUrl);
|
|
55
|
+
const magnetURI = span.eq(3).find("a").eq(0).attr("href");
|
|
56
|
+
const size = span.eq(4).text().trim();
|
|
57
|
+
const seeders = span.eq(5).text().trim();
|
|
58
|
+
const leechers = span.eq(6).text().trim();
|
|
59
|
+
results.push({
|
|
60
|
+
name,
|
|
61
|
+
url: url.href,
|
|
62
|
+
seeders: +seeders,
|
|
63
|
+
leechers: +leechers,
|
|
64
|
+
provider: "thepiratebay",
|
|
65
|
+
size,
|
|
66
|
+
magnetURI,
|
|
67
|
+
infoHash: extractInfoHash(magnetURI || "") || "",
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
p++;
|
|
71
|
+
}
|
|
72
|
+
return results;
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
scrapeTorrent(link) {
|
|
76
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
77
|
+
if (!link.url) {
|
|
78
|
+
throw new Error("url is required in the torrent link");
|
|
79
|
+
}
|
|
80
|
+
if (!link.infoHash)
|
|
81
|
+
throw new Error("not valid link (info hash doesn't exist)");
|
|
82
|
+
if (!link.magnetURI)
|
|
83
|
+
throw new Error("not valid link (info hash doesn't exist)");
|
|
84
|
+
return Object.assign({ infoHash: link.infoHash, magnetURI: link.magnetURI }, link);
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
ThePirateBay.homeUrl = "https://thepiratebay.org";
|
|
89
|
+
ThePirateBay.firstTouchUrl = "https://thepiratebay.org/search.php?q=:query";
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { Scraper, ScraperOpts, Torrent, TorrentLink } from "./scraper.js";
|
|
2
|
+
export declare class TorrentGalaxy extends Scraper {
|
|
3
|
+
static firstTouchUrl: string;
|
|
4
|
+
constructor(opts?: ScraperOpts);
|
|
5
|
+
firstTouch(query: string, limit?: number): Promise<TorrentLink[]>;
|
|
6
|
+
scrapeTorrent(link: TorrentLink): Promise<Torrent>;
|
|
7
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import axios from "axios";
|
|
11
|
+
import { Scraper } from "./scraper.js";
|
|
12
|
+
import { load } from "cheerio";
|
|
13
|
+
export class TorrentGalaxy extends Scraper {
|
|
14
|
+
constructor(opts = { name: "TorrentGalaxy" }) {
|
|
15
|
+
super(opts);
|
|
16
|
+
}
|
|
17
|
+
firstTouch(query, limit) {
|
|
18
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
19
|
+
if (!query) {
|
|
20
|
+
throw new Error("search query is required to scrape");
|
|
21
|
+
}
|
|
22
|
+
let results = [];
|
|
23
|
+
let page = 1;
|
|
24
|
+
while (results.length != (limit || 20)) {
|
|
25
|
+
const { data } = yield axios.get(TorrentGalaxy.firstTouchUrl
|
|
26
|
+
.replace(":query", query || "")
|
|
27
|
+
.replace(":page", page.toString()));
|
|
28
|
+
const $ = load(data);
|
|
29
|
+
let torrentCount = $(".table-list-wrap tbody tr").length;
|
|
30
|
+
if (torrentCount === 0) {
|
|
31
|
+
break;
|
|
32
|
+
}
|
|
33
|
+
$(".table-list-wrap tbody tr").each((i, el) => {
|
|
34
|
+
if (results.length >= (limit || 20))
|
|
35
|
+
return;
|
|
36
|
+
const name = $(el).find("td .tt-name a").eq(0).text().trim();
|
|
37
|
+
const url = "https://torrentgalaxy.hair" +
|
|
38
|
+
$(el).find("td .tt-name a").eq(0).attr("href");
|
|
39
|
+
const tds = $(el).find("td");
|
|
40
|
+
const size = $(tds[2]).text().trim();
|
|
41
|
+
const seeders = $(tds[3]).text().trim();
|
|
42
|
+
const leechers = $(tds[4]).text().trim();
|
|
43
|
+
results.push({
|
|
44
|
+
name,
|
|
45
|
+
url,
|
|
46
|
+
seeders: +seeders,
|
|
47
|
+
leechers: +leechers,
|
|
48
|
+
provider: "TGx",
|
|
49
|
+
size,
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
page++;
|
|
53
|
+
}
|
|
54
|
+
return results;
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
scrapeTorrent(link) {
|
|
58
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
59
|
+
if (!link.url) {
|
|
60
|
+
throw new Error("url is required in the torrent link");
|
|
61
|
+
}
|
|
62
|
+
const { data } = yield axios.get(link.url);
|
|
63
|
+
const $ = load(data);
|
|
64
|
+
const magnetURI = $("a[href^='magnet:?']").attr("href");
|
|
65
|
+
const infoHash = $(".infohash-box span").text().trim();
|
|
66
|
+
const torrentDownload = $("a[href$='.torrent']").attr("href");
|
|
67
|
+
return Object.assign({ magnetURI, infoHash, torrentDownload }, link);
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
TorrentGalaxy.firstTouchUrl = "https://torrentgalaxy.hair/lmsearch?q=:query&category=lmsearch&page=:page";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "torrent-agent",
|
|
3
|
-
"version": "0.0
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "NPM library for searching torrents.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -13,21 +13,27 @@
|
|
|
13
13
|
"test:watch": "jest --watch",
|
|
14
14
|
"build": "tsc"
|
|
15
15
|
},
|
|
16
|
-
"keywords": [
|
|
16
|
+
"keywords": [
|
|
17
|
+
"torrent",
|
|
18
|
+
"torrent search"
|
|
19
|
+
],
|
|
17
20
|
"author": "khlala",
|
|
18
21
|
"license": "Apache-2.0",
|
|
19
22
|
"devDependencies": {
|
|
20
|
-
"@types/jest": "^
|
|
21
|
-
"@types/node": "^
|
|
22
|
-
"jest": "^
|
|
23
|
-
"nock": "^14.0.
|
|
24
|
-
"ts-jest": "^29.
|
|
25
|
-
"typescript": "^5.
|
|
23
|
+
"@types/jest": "^30.0.0",
|
|
24
|
+
"@types/node": "^25.0.9",
|
|
25
|
+
"jest": "^30.2.0",
|
|
26
|
+
"nock": "^14.0.10",
|
|
27
|
+
"ts-jest": "^29.4.6",
|
|
28
|
+
"typescript": "^5.9.3"
|
|
26
29
|
},
|
|
27
30
|
"dependencies": {
|
|
28
|
-
"axios": "^1.
|
|
29
|
-
"cheerio": "^1.
|
|
30
|
-
"p-queue": "^
|
|
31
|
+
"axios": "^1.13.2",
|
|
32
|
+
"cheerio": "^1.1.2",
|
|
33
|
+
"p-queue": "^9.1.0",
|
|
34
|
+
"playwright": "^1.57.0",
|
|
35
|
+
"playwright-extra": "^4.3.6",
|
|
36
|
+
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
|
31
37
|
},
|
|
32
38
|
"repository": {
|
|
33
39
|
"type": "git",
|