recker 1.0.2-0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. package/LICENSE +0 -2
  2. package/README.md +121 -72
  3. package/dist/cache/memory-storage.d.ts.map +1 -1
  4. package/dist/cache/memory-storage.js +7 -1
  5. package/dist/constants/http-status.d.ts +74 -0
  6. package/dist/constants/http-status.d.ts.map +1 -0
  7. package/dist/constants/http-status.js +156 -0
  8. package/dist/constants.d.ts.map +1 -1
  9. package/dist/constants.js +6 -6
  10. package/dist/cookies/memory-cookie-jar.d.ts +31 -0
  11. package/dist/cookies/memory-cookie-jar.d.ts.map +1 -0
  12. package/dist/cookies/memory-cookie-jar.js +210 -0
  13. package/dist/core/client.d.ts +9 -0
  14. package/dist/core/client.d.ts.map +1 -1
  15. package/dist/core/client.js +252 -53
  16. package/dist/core/errors.d.ts +18 -2
  17. package/dist/core/errors.d.ts.map +1 -1
  18. package/dist/core/errors.js +66 -5
  19. package/dist/core/index.d.ts +6 -0
  20. package/dist/core/index.d.ts.map +1 -0
  21. package/dist/core/index.js +5 -0
  22. package/dist/core/request-promise.d.ts.map +1 -1
  23. package/dist/core/request-promise.js +8 -2
  24. package/dist/core/request.d.ts +7 -1
  25. package/dist/core/request.d.ts.map +1 -1
  26. package/dist/core/request.js +32 -0
  27. package/dist/core/response.d.ts +2 -0
  28. package/dist/core/response.d.ts.map +1 -1
  29. package/dist/core/response.js +44 -19
  30. package/dist/events/request-events.d.ts +48 -0
  31. package/dist/events/request-events.d.ts.map +1 -0
  32. package/dist/events/request-events.js +85 -0
  33. package/dist/index.d.ts +28 -2
  34. package/dist/index.d.ts.map +1 -1
  35. package/dist/index.js +28 -2
  36. package/dist/mcp/client.d.ts.map +1 -1
  37. package/dist/mcp/client.js +16 -5
  38. package/dist/mcp/contract.d.ts +77 -0
  39. package/dist/mcp/contract.d.ts.map +1 -0
  40. package/dist/mcp/contract.js +278 -0
  41. package/dist/mcp/types.d.ts +1 -0
  42. package/dist/mcp/types.d.ts.map +1 -1
  43. package/dist/plugins/auth.d.ts +45 -0
  44. package/dist/plugins/auth.d.ts.map +1 -0
  45. package/dist/plugins/auth.js +268 -0
  46. package/dist/plugins/cache.d.ts +7 -1
  47. package/dist/plugins/cache.d.ts.map +1 -1
  48. package/dist/plugins/cache.js +470 -49
  49. package/dist/plugins/circuit-breaker.js +1 -1
  50. package/dist/plugins/compression.d.ts.map +1 -1
  51. package/dist/plugins/compression.js +3 -3
  52. package/dist/plugins/dedup.d.ts.map +1 -1
  53. package/dist/plugins/dedup.js +2 -1
  54. package/dist/plugins/graphql.d.ts +4 -3
  55. package/dist/plugins/graphql.d.ts.map +1 -1
  56. package/dist/plugins/graphql.js +24 -5
  57. package/dist/plugins/grpc-web.d.ts +80 -0
  58. package/dist/plugins/grpc-web.d.ts.map +1 -0
  59. package/dist/plugins/grpc-web.js +261 -0
  60. package/dist/plugins/har-player.d.ts.map +1 -1
  61. package/dist/plugins/har-player.js +11 -2
  62. package/dist/plugins/hls.d.ts +33 -0
  63. package/dist/plugins/hls.d.ts.map +1 -0
  64. package/dist/plugins/hls.js +225 -0
  65. package/dist/plugins/http2-push.d.ts +64 -0
  66. package/dist/plugins/http2-push.d.ts.map +1 -0
  67. package/dist/plugins/http2-push.js +274 -0
  68. package/dist/plugins/http3.d.ts +76 -0
  69. package/dist/plugins/http3.d.ts.map +1 -0
  70. package/dist/plugins/http3.js +231 -0
  71. package/dist/plugins/interface-rotator.d.ts +10 -0
  72. package/dist/plugins/interface-rotator.d.ts.map +1 -0
  73. package/dist/plugins/interface-rotator.js +57 -0
  74. package/dist/plugins/jsonrpc.d.ts +76 -0
  75. package/dist/plugins/jsonrpc.d.ts.map +1 -0
  76. package/dist/plugins/jsonrpc.js +143 -0
  77. package/dist/plugins/logger.d.ts +8 -5
  78. package/dist/plugins/logger.d.ts.map +1 -1
  79. package/dist/plugins/logger.js +66 -30
  80. package/dist/plugins/odata.d.ts +182 -0
  81. package/dist/plugins/odata.d.ts.map +1 -0
  82. package/dist/plugins/odata.js +561 -0
  83. package/dist/plugins/retry.d.ts +1 -0
  84. package/dist/plugins/retry.d.ts.map +1 -1
  85. package/dist/plugins/retry.js +26 -2
  86. package/dist/plugins/scrape.d.ts +22 -0
  87. package/dist/plugins/scrape.d.ts.map +1 -0
  88. package/dist/plugins/scrape.js +87 -0
  89. package/dist/plugins/soap.d.ts +73 -0
  90. package/dist/plugins/soap.d.ts.map +1 -0
  91. package/dist/plugins/soap.js +347 -0
  92. package/dist/plugins/user-agent.d.ts +8 -0
  93. package/dist/plugins/user-agent.d.ts.map +1 -0
  94. package/dist/plugins/user-agent.js +46 -0
  95. package/dist/plugins/xml.d.ts +10 -0
  96. package/dist/plugins/xml.d.ts.map +1 -0
  97. package/dist/plugins/xml.js +194 -0
  98. package/dist/presets/anthropic.d.ts +7 -0
  99. package/dist/presets/anthropic.d.ts.map +1 -0
  100. package/dist/presets/anthropic.js +17 -0
  101. package/dist/presets/azure-openai.d.ts +9 -0
  102. package/dist/presets/azure-openai.d.ts.map +1 -0
  103. package/dist/presets/azure-openai.js +25 -0
  104. package/dist/presets/cloudflare.d.ts +13 -0
  105. package/dist/presets/cloudflare.d.ts.map +1 -0
  106. package/dist/presets/cloudflare.js +39 -0
  107. package/dist/presets/cohere.d.ts +6 -0
  108. package/dist/presets/cohere.d.ts.map +1 -0
  109. package/dist/presets/cohere.js +16 -0
  110. package/dist/presets/deepseek.d.ts +6 -0
  111. package/dist/presets/deepseek.d.ts.map +1 -0
  112. package/dist/presets/deepseek.js +16 -0
  113. package/dist/presets/digitalocean.d.ts +6 -0
  114. package/dist/presets/digitalocean.d.ts.map +1 -0
  115. package/dist/presets/digitalocean.js +16 -0
  116. package/dist/presets/discord.d.ts +7 -0
  117. package/dist/presets/discord.d.ts.map +1 -0
  118. package/dist/presets/discord.js +17 -0
  119. package/dist/presets/fireworks.d.ts +6 -0
  120. package/dist/presets/fireworks.d.ts.map +1 -0
  121. package/dist/presets/fireworks.js +16 -0
  122. package/dist/presets/gemini.d.ts +6 -0
  123. package/dist/presets/gemini.d.ts.map +1 -0
  124. package/dist/presets/gemini.js +16 -0
  125. package/dist/presets/github.d.ts +7 -0
  126. package/dist/presets/github.d.ts.map +1 -0
  127. package/dist/presets/github.js +17 -0
  128. package/dist/presets/gitlab.d.ts +7 -0
  129. package/dist/presets/gitlab.d.ts.map +1 -0
  130. package/dist/presets/gitlab.js +16 -0
  131. package/dist/presets/groq.d.ts +6 -0
  132. package/dist/presets/groq.d.ts.map +1 -0
  133. package/dist/presets/groq.js +16 -0
  134. package/dist/presets/huggingface.d.ts +6 -0
  135. package/dist/presets/huggingface.d.ts.map +1 -0
  136. package/dist/presets/huggingface.js +16 -0
  137. package/dist/presets/index.d.ts +28 -0
  138. package/dist/presets/index.d.ts.map +1 -0
  139. package/dist/presets/index.js +27 -0
  140. package/dist/presets/linear.d.ts +6 -0
  141. package/dist/presets/linear.d.ts.map +1 -0
  142. package/dist/presets/linear.js +16 -0
  143. package/dist/presets/mistral.d.ts +6 -0
  144. package/dist/presets/mistral.d.ts.map +1 -0
  145. package/dist/presets/mistral.js +16 -0
  146. package/dist/presets/notion.d.ts +7 -0
  147. package/dist/presets/notion.d.ts.map +1 -0
  148. package/dist/presets/notion.js +17 -0
  149. package/dist/presets/openai.d.ts +8 -0
  150. package/dist/presets/openai.d.ts.map +1 -0
  151. package/dist/presets/openai.js +23 -0
  152. package/dist/presets/perplexity.d.ts +6 -0
  153. package/dist/presets/perplexity.d.ts.map +1 -0
  154. package/dist/presets/perplexity.js +16 -0
  155. package/dist/presets/registry.d.ts +20 -0
  156. package/dist/presets/registry.d.ts.map +1 -0
  157. package/dist/presets/registry.js +311 -0
  158. package/dist/presets/replicate.d.ts +6 -0
  159. package/dist/presets/replicate.d.ts.map +1 -0
  160. package/dist/presets/replicate.js +16 -0
  161. package/dist/presets/slack.d.ts +6 -0
  162. package/dist/presets/slack.d.ts.map +1 -0
  163. package/dist/presets/slack.js +16 -0
  164. package/dist/presets/stripe.d.ts +8 -0
  165. package/dist/presets/stripe.d.ts.map +1 -0
  166. package/dist/presets/stripe.js +23 -0
  167. package/dist/presets/supabase.d.ts +7 -0
  168. package/dist/presets/supabase.d.ts.map +1 -0
  169. package/dist/presets/supabase.js +18 -0
  170. package/dist/presets/together.d.ts +6 -0
  171. package/dist/presets/together.d.ts.map +1 -0
  172. package/dist/presets/together.js +16 -0
  173. package/dist/presets/twilio.d.ts +7 -0
  174. package/dist/presets/twilio.d.ts.map +1 -0
  175. package/dist/presets/twilio.js +17 -0
  176. package/dist/presets/vercel.d.ts +7 -0
  177. package/dist/presets/vercel.d.ts.map +1 -0
  178. package/dist/presets/vercel.js +23 -0
  179. package/dist/presets/xai.d.ts +7 -0
  180. package/dist/presets/xai.d.ts.map +1 -0
  181. package/dist/presets/xai.js +17 -0
  182. package/dist/protocols/ftp.d.ts +63 -0
  183. package/dist/protocols/ftp.d.ts.map +1 -0
  184. package/dist/protocols/ftp.js +388 -0
  185. package/dist/protocols/index.d.ts +4 -0
  186. package/dist/protocols/index.d.ts.map +1 -0
  187. package/dist/protocols/index.js +3 -0
  188. package/dist/protocols/sftp.d.ts +65 -0
  189. package/dist/protocols/sftp.d.ts.map +1 -0
  190. package/dist/protocols/sftp.js +346 -0
  191. package/dist/protocols/telnet.d.ts +50 -0
  192. package/dist/protocols/telnet.d.ts.map +1 -0
  193. package/dist/protocols/telnet.js +139 -0
  194. package/dist/runner/request-runner.d.ts.map +1 -1
  195. package/dist/runner/request-runner.js +1 -0
  196. package/dist/scrape/document.d.ts +44 -0
  197. package/dist/scrape/document.d.ts.map +1 -0
  198. package/dist/scrape/document.js +198 -0
  199. package/dist/scrape/element.d.ts +50 -0
  200. package/dist/scrape/element.d.ts.map +1 -0
  201. package/dist/scrape/element.js +176 -0
  202. package/dist/scrape/extractors.d.ts +17 -0
  203. package/dist/scrape/extractors.d.ts.map +1 -0
  204. package/dist/scrape/extractors.js +356 -0
  205. package/dist/scrape/index.d.ts +5 -0
  206. package/dist/scrape/index.d.ts.map +1 -0
  207. package/dist/scrape/index.js +3 -0
  208. package/dist/scrape/types.d.ts +108 -0
  209. package/dist/scrape/types.d.ts.map +1 -0
  210. package/dist/scrape/types.js +1 -0
  211. package/dist/testing/index.d.ts +3 -0
  212. package/dist/testing/index.d.ts.map +1 -0
  213. package/dist/testing/index.js +1 -0
  214. package/dist/testing/mock.d.ts +58 -0
  215. package/dist/testing/mock.d.ts.map +1 -0
  216. package/dist/testing/mock.js +252 -0
  217. package/dist/transport/fetch.d.ts.map +1 -1
  218. package/dist/transport/fetch.js +12 -4
  219. package/dist/transport/undici.d.ts +17 -1
  220. package/dist/transport/undici.d.ts.map +1 -1
  221. package/dist/transport/undici.js +708 -47
  222. package/dist/types/index.d.ts +111 -10
  223. package/dist/types/index.d.ts.map +1 -1
  224. package/dist/types/index.js +1 -1
  225. package/dist/types/logger.d.ts +17 -0
  226. package/dist/types/logger.d.ts.map +1 -0
  227. package/dist/types/logger.js +66 -0
  228. package/dist/utils/agent-manager.d.ts.map +1 -1
  229. package/dist/utils/agent-manager.js +20 -4
  230. package/dist/utils/body.d.ts.map +1 -1
  231. package/dist/utils/body.js +14 -2
  232. package/dist/utils/charset.d.ts +16 -0
  233. package/dist/utils/charset.d.ts.map +1 -0
  234. package/dist/utils/charset.js +169 -0
  235. package/dist/utils/client-pool.d.ts +21 -0
  236. package/dist/utils/client-pool.d.ts.map +1 -0
  237. package/dist/utils/client-pool.js +49 -0
  238. package/dist/utils/concurrency.d.ts.map +1 -1
  239. package/dist/utils/concurrency.js +8 -4
  240. package/dist/utils/dns-toolkit.d.ts +13 -0
  241. package/dist/utils/dns-toolkit.d.ts.map +1 -0
  242. package/dist/utils/dns-toolkit.js +48 -0
  243. package/dist/utils/doh.d.ts.map +1 -1
  244. package/dist/utils/doh.js +16 -3
  245. package/dist/utils/download.d.ts +15 -0
  246. package/dist/utils/download.d.ts.map +1 -0
  247. package/dist/utils/download.js +44 -0
  248. package/dist/utils/env-proxy.d.ts +13 -0
  249. package/dist/utils/env-proxy.d.ts.map +1 -0
  250. package/dist/utils/env-proxy.js +105 -0
  251. package/dist/utils/header-parser.d.ts +15 -1
  252. package/dist/utils/header-parser.d.ts.map +1 -1
  253. package/dist/utils/header-parser.js +161 -1
  254. package/dist/utils/link-header.d.ts +70 -0
  255. package/dist/utils/link-header.d.ts.map +1 -0
  256. package/dist/utils/link-header.js +190 -0
  257. package/dist/utils/progress.d.ts +7 -2
  258. package/dist/utils/progress.d.ts.map +1 -1
  259. package/dist/utils/progress.js +48 -15
  260. package/dist/utils/rdap.d.ts +17 -0
  261. package/dist/utils/rdap.d.ts.map +1 -0
  262. package/dist/utils/rdap.js +32 -0
  263. package/dist/utils/request-pool.d.ts.map +1 -1
  264. package/dist/utils/request-pool.js +4 -3
  265. package/dist/utils/sse.d.ts.map +1 -1
  266. package/dist/utils/sse.js +8 -2
  267. package/dist/utils/status-codes.d.ts +84 -0
  268. package/dist/utils/status-codes.d.ts.map +1 -0
  269. package/dist/utils/status-codes.js +204 -0
  270. package/dist/utils/streaming.d.ts.map +1 -1
  271. package/dist/utils/streaming.js +1 -0
  272. package/dist/utils/tls-inspector.d.ts +21 -0
  273. package/dist/utils/tls-inspector.d.ts.map +1 -0
  274. package/dist/utils/tls-inspector.js +39 -0
  275. package/dist/utils/try-fn.d.ts.map +1 -1
  276. package/dist/utils/try-fn.js +11 -5
  277. package/dist/utils/upload.d.ts +1 -0
  278. package/dist/utils/upload.d.ts.map +1 -1
  279. package/dist/utils/upload.js +20 -3
  280. package/dist/utils/user-agent.d.ts +9 -9
  281. package/dist/utils/user-agent.js +9 -9
  282. package/dist/utils/whois.d.ts.map +1 -1
  283. package/dist/utils/whois.js +11 -2
  284. package/dist/websocket/client.d.ts +29 -1
  285. package/dist/websocket/client.d.ts.map +1 -1
  286. package/dist/websocket/client.js +145 -13
  287. package/package.json +45 -8
@@ -0,0 +1,198 @@
1
+ import { load } from 'cheerio';
2
+ import { ScrapeElement } from './element.js';
3
+ import { extractLinks, extractImages, extractMeta, extractOpenGraph, extractTwitterCard, extractJsonLd, extractForms, extractTables, extractScripts, extractStyles, } from './extractors.js';
4
+ export class ScrapeDocument {
5
+ $;
6
+ options;
7
+ constructor(html, options) {
8
+ this.$ = load(html);
9
+ this.options = options || {};
10
+ }
11
+ select(selector) {
12
+ return new ScrapeElement(this.$(selector), this.$);
13
+ }
14
+ selectFirst(selector) {
15
+ return new ScrapeElement(this.$(selector).first(), this.$);
16
+ }
17
+ selectAll(selector) {
18
+ const elements = [];
19
+ this.$(selector).each((_, element) => {
20
+ elements.push(new ScrapeElement(this.$(element), this.$));
21
+ });
22
+ return elements;
23
+ }
24
+ query(selector) {
25
+ return this.select(selector);
26
+ }
27
+ queryAll(selector) {
28
+ return this.selectAll(selector);
29
+ }
30
+ text(selector) {
31
+ return this.$(selector).first().text().trim();
32
+ }
33
+ texts(selector) {
34
+ const texts = [];
35
+ this.$(selector).each((_, element) => {
36
+ const text = this.$(element).text().trim();
37
+ if (text) {
38
+ texts.push(text);
39
+ }
40
+ });
41
+ return texts;
42
+ }
43
+ attr(selector, attribute) {
44
+ return this.$(selector).first().attr(attribute);
45
+ }
46
+ attrs(selector, attribute) {
47
+ const attrs = [];
48
+ this.$(selector).each((_, element) => {
49
+ const value = this.$(element).attr(attribute);
50
+ if (value !== undefined) {
51
+ attrs.push(value);
52
+ }
53
+ });
54
+ return attrs;
55
+ }
56
+ innerHtml(selector) {
57
+ return this.$(selector).first().html();
58
+ }
59
+ outerHtml(selector) {
60
+ const el = this.$(selector).first();
61
+ return this.$.html(el) || '';
62
+ }
63
+ links(options) {
64
+ return extractLinks(this.$, {
65
+ ...options,
66
+ baseUrl: this.options.baseUrl,
67
+ });
68
+ }
69
+ images(options) {
70
+ return extractImages(this.$, {
71
+ ...options,
72
+ baseUrl: this.options.baseUrl,
73
+ });
74
+ }
75
+ meta() {
76
+ return extractMeta(this.$);
77
+ }
78
+ openGraph() {
79
+ return extractOpenGraph(this.$);
80
+ }
81
+ twitterCard() {
82
+ return extractTwitterCard(this.$);
83
+ }
84
+ jsonLd() {
85
+ return extractJsonLd(this.$);
86
+ }
87
+ forms(selector) {
88
+ return extractForms(this.$, selector);
89
+ }
90
+ tables(selector) {
91
+ return extractTables(this.$, selector);
92
+ }
93
+ scripts() {
94
+ return extractScripts(this.$);
95
+ }
96
+ styles() {
97
+ return extractStyles(this.$);
98
+ }
99
+ extract(schema) {
100
+ const result = {};
101
+ for (const [key, fieldConfig] of Object.entries(schema)) {
102
+ result[key] = this.extractField(fieldConfig);
103
+ }
104
+ return result;
105
+ }
106
+ extractField(field) {
107
+ if (typeof field === 'string') {
108
+ return this.text(field) || undefined;
109
+ }
110
+ const { selector, attribute, multiple, transform } = field;
111
+ if (multiple) {
112
+ const values = [];
113
+ this.$(selector).each((_, element) => {
114
+ const $el = this.$(element);
115
+ let value;
116
+ if (attribute) {
117
+ value = $el.attr(attribute) || '';
118
+ }
119
+ else {
120
+ value = $el.text().trim();
121
+ }
122
+ if (value) {
123
+ values.push(transform ? transform(value) : value);
124
+ }
125
+ });
126
+ return values;
127
+ }
128
+ else {
129
+ const $el = this.$(selector).first();
130
+ let value;
131
+ if (attribute) {
132
+ value = $el.attr(attribute) || '';
133
+ }
134
+ else {
135
+ value = $el.text().trim();
136
+ }
137
+ if (!value)
138
+ return undefined;
139
+ return transform ? transform(value) : value;
140
+ }
141
+ }
142
+ title() {
143
+ const title = this.$('title').first().text().trim();
144
+ return title || undefined;
145
+ }
146
+ body() {
147
+ return new ScrapeElement(this.$('body').first(), this.$);
148
+ }
149
+ head() {
150
+ return new ScrapeElement(this.$('head').first(), this.$);
151
+ }
152
+ html() {
153
+ return this.$.html() || '';
154
+ }
155
+ root() {
156
+ return new ScrapeElement(this.$.root(), this.$);
157
+ }
158
+ exists(selector) {
159
+ return this.$(selector).length > 0;
160
+ }
161
+ count(selector) {
162
+ return this.$(selector).length;
163
+ }
164
+ findByText(text, selector) {
165
+ const baseSelector = selector || '*';
166
+ const elements = [];
167
+ this.$(baseSelector).each((_, element) => {
168
+ const $el = this.$(element);
169
+ if ($el.text().includes(text)) {
170
+ elements.push(new ScrapeElement($el, this.$));
171
+ }
172
+ });
173
+ return elements;
174
+ }
175
+ findByExactText(text, selector) {
176
+ const baseSelector = selector || '*';
177
+ const elements = [];
178
+ this.$(baseSelector).each((_, element) => {
179
+ const $el = this.$(element);
180
+ if ($el.text().trim() === text) {
181
+ elements.push(new ScrapeElement($el, this.$));
182
+ }
183
+ });
184
+ return elements;
185
+ }
186
+ findByData(name, value) {
187
+ const selector = value !== undefined
188
+ ? `[data-${name}="${value}"]`
189
+ : `[data-${name}]`;
190
+ return this.selectAll(selector);
191
+ }
192
+ get raw() {
193
+ return this.$;
194
+ }
195
+ get baseUrl() {
196
+ return this.options.baseUrl;
197
+ }
198
+ }
@@ -0,0 +1,50 @@
1
+ import type { Cheerio, CheerioAPI } from 'cheerio';
2
+ import type { Element } from 'domhandler';
3
+ export declare class ScrapeElement {
4
+ private $el;
5
+ private $;
6
+ constructor($el: Cheerio<Element>, $: CheerioAPI);
7
+ find(selector: string): ScrapeElement;
8
+ parent(selector?: string): ScrapeElement;
9
+ children(selector?: string): ScrapeElement;
10
+ siblings(selector?: string): ScrapeElement;
11
+ next(selector?: string): ScrapeElement;
12
+ prev(selector?: string): ScrapeElement;
13
+ nextAll(selector?: string): ScrapeElement;
14
+ prevAll(selector?: string): ScrapeElement;
15
+ closest(selector: string): ScrapeElement;
16
+ first(): ScrapeElement;
17
+ last(): ScrapeElement;
18
+ eq(index: number): ScrapeElement;
19
+ filter(selector: string): ScrapeElement;
20
+ not(selector: string): ScrapeElement;
21
+ has(selector: string): ScrapeElement;
22
+ add(selector: string): ScrapeElement;
23
+ parents(selector?: string): ScrapeElement;
24
+ contents(): ScrapeElement;
25
+ text(): string;
26
+ html(): string | null;
27
+ outerHtml(): string;
28
+ attr(name: string): string | undefined;
29
+ attrs(): Record<string, string>;
30
+ data(name?: string): unknown;
31
+ val(): string | string[] | undefined;
32
+ prop(name: string): unknown;
33
+ exists(): boolean;
34
+ get length(): number;
35
+ is(selector: string): boolean;
36
+ hasClass(className: string): boolean;
37
+ index(selector?: string): number;
38
+ each(callback: (el: ScrapeElement, index: number) => void): this;
39
+ map<T>(callback: (el: ScrapeElement, index: number) => T): T[];
40
+ toArray(): ScrapeElement[];
41
+ reduce<T>(callback: (acc: T, el: ScrapeElement, index: number) => T, initialValue: T): T;
42
+ some(callback: (el: ScrapeElement, index: number) => boolean): boolean;
43
+ every(callback: (el: ScrapeElement, index: number) => boolean): boolean;
44
+ tagName(): string | undefined;
45
+ clone(): ScrapeElement;
46
+ toString(): string;
47
+ get raw(): Cheerio<Element>;
48
+ get(index?: number): Element | undefined;
49
+ }
50
+ //# sourceMappingURL=element.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"element.d.ts","sourceRoot":"","sources":["../../src/scrape/element.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAE1C,qBAAa,aAAa;IACxB,OAAO,CAAC,GAAG,CAAmB;IAC9B,OAAO,CAAC,CAAC,CAAa;gBAEV,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,UAAU;IAUhD,IAAI,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAOrC,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;IAQxC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;IAQ1C,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;IAQ1C,IAAI,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;IAQtC,IAAI,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;IAQtC,OAAO,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;IAQzC,OAAO,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;IAQzC,OAAO,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAOxC,KAAK,IAAI,aAAa;IAOtB,IAAI,IAAI,aAAa;IAOrB,EAAE,CAAC,KAAK,EAAE,MAAM,GAAG,aAAa;IAOhC,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAOvC,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAOpC,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAOpC,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa;IAOpC,OAAO,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa;IAQzC,QAAQ,IAAI,aAAa;IASzB,IAAI,IAAI,MAAM;IAOd,IAAI,IAAI,MAAM,GAAG,IAAI;IAOrB,SAAS,IAAI,MAAM;IAOnB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;IAOtC,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAY/B,IAAI,CAAC,IAAI,CAAC,EAAE,MAAM,GAAG,OAAO;IAU5B,GAAG,IAAI,MAAM,GAAG,MAAM,EAAE,GAAG,SAAS;IAOpC,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAS3B,MAAM,IAAI,OAAO;IAOjB,IAAI,MAAM,IAAI,MAAM,CAEnB;IAKD,EAAE,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO;IAO7B,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO;IAOpC,KAAK,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM;IAShC,IAAI,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI;IAUhE,GAAG,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,KAAK,CAAC,GAAG,CAAC,EAAE;IAW9D,OAAO,IAAI,aAAa,EAAE;IAS1B,MAAM,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,KAAK,CAAC,EAAE,YAAY,EAAE,CAAC,GAAG,CAAC;IAexF,IAAI,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,GAAG,OAAO;IActE,KAAK,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,GAAG,OAAO;IAgBvE,OAAO,IAAI,MAAM,GAAG,SAAS;IAQ7B,KAAK,IAAI,aAAa;IAOtB,QAAQ,IAAI,MAAM;IASlB,IAAI,GAAG,IAAI,OAAO,CAAC,OAAO,CAAC,CAE1B;IAKD,GAAG,CAAC,KAAK,GAAE,MAAU,GAAG,OAAO,GAAG,SAAS;CAG5C"}
@@ -0,0 +1,176 @@
1
+ export class ScrapeElement {
2
+ $el;
3
+ $;
4
+ constructor($el, $) {
5
+ this.$el = $el;
6
+ this.$ = $;
7
+ }
8
+ find(selector) {
9
+ return new ScrapeElement(this.$el.find(selector), this.$);
10
+ }
11
+ parent(selector) {
12
+ const parent = selector ? this.$el.parent(selector) : this.$el.parent();
13
+ return new ScrapeElement(parent, this.$);
14
+ }
15
+ children(selector) {
16
+ const children = selector ? this.$el.children(selector) : this.$el.children();
17
+ return new ScrapeElement(children, this.$);
18
+ }
19
+ siblings(selector) {
20
+ const siblings = selector ? this.$el.siblings(selector) : this.$el.siblings();
21
+ return new ScrapeElement(siblings, this.$);
22
+ }
23
+ next(selector) {
24
+ const next = selector ? this.$el.next(selector) : this.$el.next();
25
+ return new ScrapeElement(next, this.$);
26
+ }
27
+ prev(selector) {
28
+ const prev = selector ? this.$el.prev(selector) : this.$el.prev();
29
+ return new ScrapeElement(prev, this.$);
30
+ }
31
+ nextAll(selector) {
32
+ const nextAll = selector ? this.$el.nextAll(selector) : this.$el.nextAll();
33
+ return new ScrapeElement(nextAll, this.$);
34
+ }
35
+ prevAll(selector) {
36
+ const prevAll = selector ? this.$el.prevAll(selector) : this.$el.prevAll();
37
+ return new ScrapeElement(prevAll, this.$);
38
+ }
39
+ closest(selector) {
40
+ return new ScrapeElement(this.$el.closest(selector), this.$);
41
+ }
42
+ first() {
43
+ return new ScrapeElement(this.$el.first(), this.$);
44
+ }
45
+ last() {
46
+ return new ScrapeElement(this.$el.last(), this.$);
47
+ }
48
+ eq(index) {
49
+ return new ScrapeElement(this.$el.eq(index), this.$);
50
+ }
51
+ filter(selector) {
52
+ return new ScrapeElement(this.$el.filter(selector), this.$);
53
+ }
54
+ not(selector) {
55
+ return new ScrapeElement(this.$el.not(selector), this.$);
56
+ }
57
+ has(selector) {
58
+ return new ScrapeElement(this.$el.has(selector), this.$);
59
+ }
60
+ add(selector) {
61
+ return new ScrapeElement(this.$el.add(selector), this.$);
62
+ }
63
+ parents(selector) {
64
+ const parents = selector ? this.$el.parents(selector) : this.$el.parents();
65
+ return new ScrapeElement(parents, this.$);
66
+ }
67
+ contents() {
68
+ return new ScrapeElement(this.$el.contents(), this.$);
69
+ }
70
+ text() {
71
+ return this.$el.text().trim();
72
+ }
73
+ html() {
74
+ return this.$el.html();
75
+ }
76
+ outerHtml() {
77
+ return this.$.html(this.$el) || '';
78
+ }
79
+ attr(name) {
80
+ return this.$el.attr(name);
81
+ }
82
+ attrs() {
83
+ const attributes = {};
84
+ const el = this.$el.get(0);
85
+ if (el && 'attribs' in el) {
86
+ Object.assign(attributes, el.attribs);
87
+ }
88
+ return attributes;
89
+ }
90
+ data(name) {
91
+ if (name) {
92
+ return this.$el.data(name);
93
+ }
94
+ return this.$el.data();
95
+ }
96
+ val() {
97
+ return this.$el.val();
98
+ }
99
+ prop(name) {
100
+ return this.$el.prop(name);
101
+ }
102
+ exists() {
103
+ return this.$el.length > 0;
104
+ }
105
+ get length() {
106
+ return this.$el.length;
107
+ }
108
+ is(selector) {
109
+ return this.$el.is(selector);
110
+ }
111
+ hasClass(className) {
112
+ return this.$el.hasClass(className);
113
+ }
114
+ index(selector) {
115
+ return selector ? this.$el.index(selector) : this.$el.index();
116
+ }
117
+ each(callback) {
118
+ this.$el.each((index, element) => {
119
+ callback(new ScrapeElement(this.$(element), this.$), index);
120
+ });
121
+ return this;
122
+ }
123
+ map(callback) {
124
+ const results = [];
125
+ this.$el.each((index, element) => {
126
+ results.push(callback(new ScrapeElement(this.$(element), this.$), index));
127
+ });
128
+ return results;
129
+ }
130
+ toArray() {
131
+ return this.$el.toArray().map((element) => new ScrapeElement(this.$(element), this.$));
132
+ }
133
+ reduce(callback, initialValue) {
134
+ let accumulator = initialValue;
135
+ this.$el.each((index, element) => {
136
+ accumulator = callback(accumulator, new ScrapeElement(this.$(element), this.$), index);
137
+ });
138
+ return accumulator;
139
+ }
140
+ some(callback) {
141
+ let found = false;
142
+ this.$el.each((index, element) => {
143
+ if (callback(new ScrapeElement(this.$(element), this.$), index)) {
144
+ found = true;
145
+ return false;
146
+ }
147
+ });
148
+ return found;
149
+ }
150
+ every(callback) {
151
+ let allMatch = true;
152
+ this.$el.each((index, element) => {
153
+ if (!callback(new ScrapeElement(this.$(element), this.$), index)) {
154
+ allMatch = false;
155
+ return false;
156
+ }
157
+ });
158
+ return allMatch;
159
+ }
160
+ tagName() {
161
+ const el = this.$el.get(0);
162
+ return el ? el.tagName?.toLowerCase() : undefined;
163
+ }
164
+ clone() {
165
+ return new ScrapeElement(this.$el.clone(), this.$);
166
+ }
167
+ toString() {
168
+ return this.outerHtml();
169
+ }
170
+ get raw() {
171
+ return this.$el;
172
+ }
173
+ get(index = 0) {
174
+ return this.$el.get(index);
175
+ }
176
+ }
@@ -0,0 +1,17 @@
1
+ import type { CheerioAPI } from 'cheerio';
2
+ import type { ExtractedLink, ExtractedImage, ExtractedMeta, OpenGraphData, TwitterCardData, JsonLdData, ExtractedForm, ExtractedTable, ExtractedScript, ExtractedStyle, LinkExtractionOptions, ImageExtractionOptions } from './types.js';
3
+ export declare function extractLinks($: CheerioAPI, options?: LinkExtractionOptions & {
4
+ baseUrl?: string;
5
+ }): ExtractedLink[];
6
+ export declare function extractImages($: CheerioAPI, options?: ImageExtractionOptions & {
7
+ baseUrl?: string;
8
+ }): ExtractedImage[];
9
+ export declare function extractMeta($: CheerioAPI): ExtractedMeta;
10
+ export declare function extractOpenGraph($: CheerioAPI): OpenGraphData;
11
+ export declare function extractTwitterCard($: CheerioAPI): TwitterCardData;
12
+ export declare function extractJsonLd($: CheerioAPI): JsonLdData[];
13
+ export declare function extractForms($: CheerioAPI, selector?: string): ExtractedForm[];
14
+ export declare function extractTables($: CheerioAPI, selector?: string): ExtractedTable[];
15
+ export declare function extractScripts($: CheerioAPI): ExtractedScript[];
16
+ export declare function extractStyles($: CheerioAPI): ExtractedStyle[];
17
+ //# sourceMappingURL=extractors.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extractors.d.ts","sourceRoot":"","sources":["../../src/scrape/extractors.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAC1C,OAAO,KAAK,EACV,aAAa,EACb,cAAc,EACd,aAAa,EACb,aAAa,EACb,eAAe,EACf,UAAU,EACV,aAAa,EAEb,cAAc,EACd,eAAe,EACf,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACvB,MAAM,YAAY,CAAC;AAoDpB,wBAAgB,YAAY,CAC1B,CAAC,EAAE,UAAU,EACb,OAAO,CAAC,EAAE,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GACrD,aAAa,EAAE,CAwBjB;AAKD,wBAAgB,aAAa,CAC3B,CAAC,EAAE,UAAU,EACb,OAAO,CAAC,EAAE,sBAAsB,GAAG;IAAE,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GACtD,cAAc,EAAE,CA4BlB;AAKD,wBAAgB,WAAW,CAAC,CAAC,EAAE,UAAU,GAAG,aAAa,CA0DxD;AAKD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,UAAU,GAAG,aAAa,CAiD7D;AAKD,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,UAAU,GAAG,eAAe,CAqCjE;AAKD,wBAAgB,aAAa,CAAC,CAAC,EAAE,UAAU,GAAG,UAAU,EAAE,CAwBzD;AAKD,wBAAgB,YAAY,CAAC,CAAC,EAAE,UAAU,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,aAAa,EAAE,CA+C9E;AAKD,wBAAgB,aAAa,CAAC,CAAC,EAAE,UAAU,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,cAAc,EAAE,CA2DhF;AAKD,wBAAgB,cAAc,CAAC,CAAC,EAAE,UAAU,GAAG,eAAe,EAAE,CAqB/D;AAKD,wBAAgB,aAAa,CAAC,CAAC,EAAE,UAAU,GAAG,cAAc,EAAE,CAyB7D"}