@nanocollective/get-md 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +25 -3
  2. package/dist/cli.js +2 -2
  3. package/dist/cli.js.map +1 -1
  4. package/dist/extractors/metadata-extractor.js +1 -1
  5. package/dist/extractors/metadata-extractor.js.map +1 -1
  6. package/dist/extractors/metadata-extractor.spec.d.ts +2 -0
  7. package/dist/extractors/metadata-extractor.spec.d.ts.map +1 -0
  8. package/dist/extractors/metadata-extractor.spec.js +486 -0
  9. package/dist/extractors/metadata-extractor.spec.js.map +1 -0
  10. package/dist/index.d.ts +1 -1
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +2 -2
  13. package/dist/index.js.map +1 -1
  14. package/dist/index.spec.d.ts +2 -0
  15. package/dist/index.spec.d.ts.map +1 -0
  16. package/dist/index.spec.js +518 -0
  17. package/dist/index.spec.js.map +1 -0
  18. package/dist/optimizers/html-cleaner.js +1 -1
  19. package/dist/optimizers/html-cleaner.js.map +1 -1
  20. package/dist/optimizers/html-cleaner.spec.d.ts +2 -0
  21. package/dist/optimizers/html-cleaner.spec.d.ts.map +1 -0
  22. package/dist/optimizers/html-cleaner.spec.js +351 -0
  23. package/dist/optimizers/html-cleaner.spec.js.map +1 -0
  24. package/dist/optimizers/llm-formatter.js +2 -2
  25. package/dist/optimizers/llm-formatter.js.map +1 -1
  26. package/dist/optimizers/llm-formatter.spec.d.ts +2 -0
  27. package/dist/optimizers/llm-formatter.spec.d.ts.map +1 -0
  28. package/dist/optimizers/llm-formatter.spec.js +276 -0
  29. package/dist/optimizers/llm-formatter.spec.js.map +1 -0
  30. package/dist/optimizers/structure-enhancer.js +3 -3
  31. package/dist/optimizers/structure-enhancer.js.map +1 -1
  32. package/dist/optimizers/structure-enhancer.spec.d.ts +2 -0
  33. package/dist/optimizers/structure-enhancer.spec.d.ts.map +1 -0
  34. package/dist/optimizers/structure-enhancer.spec.js +331 -0
  35. package/dist/optimizers/structure-enhancer.spec.js.map +1 -0
  36. package/dist/parsers/markdown-parser.d.ts +1 -1
  37. package/dist/parsers/markdown-parser.d.ts.map +1 -1
  38. package/dist/parsers/markdown-parser.js +58 -37
  39. package/dist/parsers/markdown-parser.js.map +1 -1
  40. package/dist/parsers/markdown-parser.spec.js +106 -98
  41. package/dist/parsers/markdown-parser.spec.js.map +1 -1
  42. package/dist/utils/url-fetcher.d.ts.map +1 -1
  43. package/dist/utils/url-fetcher.js +1 -1
  44. package/dist/utils/url-fetcher.js.map +1 -1
  45. package/dist/utils/url-fetcher.spec.d.ts +2 -0
  46. package/dist/utils/url-fetcher.spec.d.ts.map +1 -0
  47. package/dist/utils/url-fetcher.spec.js +206 -0
  48. package/dist/utils/url-fetcher.spec.js.map +1 -0
  49. package/dist/utils/validators.js +1 -1
  50. package/dist/utils/validators.js.map +1 -1
  51. package/dist/utils/validators.spec.d.ts +2 -0
  52. package/dist/utils/validators.spec.d.ts.map +1 -0
  53. package/dist/utils/validators.spec.js +290 -0
  54. package/dist/utils/validators.spec.js.map +1 -0
  55. package/package.json +8 -14
@@ -0,0 +1,206 @@
1
+ // src/utils/url-fetcher.spec.ts
2
+ import test from "ava";
3
+ import { fetchUrl, isValidUrl } from "./url-fetcher.js";
4
+ // Add cleanup hook to force exit after tests complete
5
+ test.after.always("cleanup", () => {
6
+ // Give a brief moment for cleanup, then force exit
7
+ setTimeout(() => {
8
+ process.exit(0);
9
+ }, 100);
10
+ });
11
+ // Mock fetch for testing
12
+ const createMockFetch = (response) => {
13
+ return async () => ({
14
+ ok: true,
15
+ status: 200,
16
+ statusText: "OK",
17
+ text: async () => "<html><body>Test</body></html>",
18
+ ...response,
19
+ });
20
+ };
21
+ test("fetchUrl: successfully fetches HTML from a URL", async (t) => {
22
+ const mockHtml = "<html><body>Test content</body></html>";
23
+ global.fetch = createMockFetch({
24
+ text: async () => mockHtml,
25
+ });
26
+ const result = await fetchUrl("https://example.com");
27
+ t.is(result, mockHtml);
28
+ });
29
+ test("fetchUrl: uses custom user agent", async (t) => {
30
+ const customUserAgent = "CustomBot/1.0";
31
+ let capturedHeaders;
32
+ global.fetch = (async (_url, init) => {
33
+ capturedHeaders = init?.headers;
34
+ return {
35
+ ok: true,
36
+ status: 200,
37
+ statusText: "OK",
38
+ text: async () => "<html><body>Test</body></html>",
39
+ };
40
+ });
41
+ await fetchUrl("https://example.com", { userAgent: customUserAgent });
42
+ t.truthy(capturedHeaders);
43
+ if (capturedHeaders &&
44
+ typeof capturedHeaders === "object" &&
45
+ !Array.isArray(capturedHeaders)) {
46
+ t.is(capturedHeaders["User-Agent"], customUserAgent);
47
+ }
48
+ });
49
+ test("fetchUrl: handles followRedirects option set to false", async (t) => {
50
+ let capturedRedirect;
51
+ global.fetch = (async (_url, init) => {
52
+ capturedRedirect = init?.redirect;
53
+ return {
54
+ ok: true,
55
+ status: 200,
56
+ statusText: "OK",
57
+ text: async () => "<html><body>Test</body></html>",
58
+ };
59
+ });
60
+ await fetchUrl("https://example.com", { followRedirects: false });
61
+ t.is(capturedRedirect, "manual");
62
+ });
63
+ test("fetchUrl: handles followRedirects option set to true", async (t) => {
64
+ let capturedRedirect;
65
+ global.fetch = (async (_url, init) => {
66
+ capturedRedirect = init?.redirect;
67
+ return {
68
+ ok: true,
69
+ status: 200,
70
+ statusText: "OK",
71
+ text: async () => "<html><body>Test</body></html>",
72
+ };
73
+ });
74
+ await fetchUrl("https://example.com", { followRedirects: true });
75
+ t.is(capturedRedirect, "follow");
76
+ });
77
+ test("fetchUrl: merges custom headers", async (t) => {
78
+ const customHeaders = { Authorization: "Bearer token123" };
79
+ let capturedHeaders;
80
+ global.fetch = (async (_url, init) => {
81
+ capturedHeaders = init?.headers;
82
+ return {
83
+ ok: true,
84
+ status: 200,
85
+ statusText: "OK",
86
+ text: async () => "<html><body>Test</body></html>",
87
+ };
88
+ });
89
+ await fetchUrl("https://example.com", { headers: customHeaders });
90
+ t.truthy(capturedHeaders);
91
+ if (capturedHeaders &&
92
+ typeof capturedHeaders === "object" &&
93
+ !Array.isArray(capturedHeaders)) {
94
+ const headers = capturedHeaders;
95
+ t.is(headers.Authorization, "Bearer token123");
96
+ t.truthy(headers["User-Agent"]);
97
+ }
98
+ });
99
+ test("fetchUrl: throws error on HTTP 404 status", async (t) => {
100
+ global.fetch = createMockFetch({
101
+ ok: false,
102
+ status: 404,
103
+ statusText: "Not Found",
104
+ });
105
+ const error = await t.throwsAsync(fetchUrl("https://example.com"));
106
+ t.regex(error?.message ?? "", /HTTP 404: Not Found/);
107
+ });
108
+ test("fetchUrl: throws error on HTTP 500 status", async (t) => {
109
+ global.fetch = createMockFetch({
110
+ ok: false,
111
+ status: 500,
112
+ statusText: "Internal Server Error",
113
+ });
114
+ const error = await t.throwsAsync(fetchUrl("https://example.com"));
115
+ t.regex(error?.message ?? "", /HTTP 500: Internal Server Error/);
116
+ });
117
+ test("fetchUrl: handles network errors", async (t) => {
118
+ global.fetch = (async () => {
119
+ throw new Error("Network connection failed");
120
+ });
121
+ const error = await t.throwsAsync(fetchUrl("https://example.com"));
122
+ t.regex(error?.message ?? "", /Failed to fetch URL: Network connection failed/);
123
+ });
124
+ test("fetchUrl: handles timeout errors", async (t) => {
125
+ global.fetch = (async () => {
126
+ const error = new Error("Aborted");
127
+ error.name = "AbortError";
128
+ throw error;
129
+ });
130
+ const error = await t.throwsAsync(fetchUrl("https://example.com", { timeout: 50 }));
131
+ t.regex(error?.message ?? "", /Request timeout after 50ms/);
132
+ });
133
+ test("fetchUrl: handles non-Error exceptions", async (t) => {
134
+ global.fetch = (async () => {
135
+ // eslint-disable-next-line no-throw-literal
136
+ throw "String error";
137
+ });
138
+ // For non-Error exceptions, the promise will still reject
139
+ // We just need to verify it throws something
140
+ try {
141
+ await fetchUrl("https://example.com");
142
+ t.fail("Should have thrown an error");
143
+ }
144
+ catch (error) {
145
+ t.is(error, "String error");
146
+ }
147
+ });
148
+ test("isValidUrl: returns true for valid HTTP URLs", (t) => {
149
+ t.true(isValidUrl("http://example.com"));
150
+ t.true(isValidUrl("http://example.com/path"));
151
+ t.true(isValidUrl("http://example.com:8080"));
152
+ t.true(isValidUrl("http://subdomain.example.com"));
153
+ });
154
+ test("isValidUrl: returns true for valid HTTPS URLs", (t) => {
155
+ t.true(isValidUrl("https://example.com"));
156
+ t.true(isValidUrl("https://example.com/path/to/page"));
157
+ t.true(isValidUrl("https://subdomain.example.com"));
158
+ t.true(isValidUrl("https://example.com:443"));
159
+ });
160
+ test("isValidUrl: returns false for invalid protocols", (t) => {
161
+ t.false(isValidUrl("ftp://example.com"));
162
+ t.false(isValidUrl("file:///path/to/file"));
163
+ t.false(isValidUrl("javascript:alert(1)"));
164
+ t.false(isValidUrl("data:text/html,<h1>Test</h1>"));
165
+ });
166
+ test("isValidUrl: returns false for malformed URLs", (t) => {
167
+ t.false(isValidUrl("not-a-url"));
168
+ t.false(isValidUrl(""));
169
+ t.false(isValidUrl("htp://missing-t"));
170
+ t.false(isValidUrl("://no-protocol"));
171
+ });
172
+ test("isValidUrl: returns false for relative URLs", (t) => {
173
+ t.false(isValidUrl("/relative/path"));
174
+ t.false(isValidUrl("../relative/path"));
175
+ t.false(isValidUrl("./relative/path"));
176
+ t.false(isValidUrl("relative/path"));
177
+ });
178
+ test("isValidUrl: handles URLs with query parameters", (t) => {
179
+ t.true(isValidUrl("https://example.com?param=value"));
180
+ t.true(isValidUrl("https://example.com?param1=value1&param2=value2"));
181
+ t.true(isValidUrl("http://example.com/page?search=test&sort=asc"));
182
+ });
183
+ test("isValidUrl: handles URLs with fragments", (t) => {
184
+ t.true(isValidUrl("https://example.com#section"));
185
+ t.true(isValidUrl("https://example.com/page#anchor"));
186
+ t.true(isValidUrl("http://example.com/docs#intro"));
187
+ });
188
+ test("isValidUrl: handles URLs with authentication", (t) => {
189
+ t.true(isValidUrl("https://user:pass@example.com"));
190
+ t.true(isValidUrl("http://admin:secret@example.com:8080"));
191
+ });
192
+ test("isValidUrl: handles complex valid URLs", (t) => {
193
+ t.true(isValidUrl("https://user:pass@subdomain.example.com:8080/path/to/resource?query=value#fragment"));
194
+ t.true(isValidUrl("http://localhost:3000/api/users?filter=active"));
195
+ });
196
+ test("isValidUrl: rejects localhost without protocol", (t) => {
197
+ t.false(isValidUrl("localhost:3000"));
198
+ t.false(isValidUrl("127.0.0.1:8080"));
199
+ });
200
+ test("isValidUrl: handles IP addresses with protocol", (t) => {
201
+ t.true(isValidUrl("http://127.0.0.1"));
202
+ t.true(isValidUrl("https://192.168.1.1:8080"));
203
+ t.true(isValidUrl("http://[::1]"));
204
+ t.true(isValidUrl("http://[2001:db8::1]"));
205
+ });
206
+ //# sourceMappingURL=url-fetcher.spec.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"url-fetcher.spec.js","sourceRoot":"","sources":["../../src/utils/url-fetcher.spec.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAEhC,OAAO,IAAI,MAAM,KAAK,CAAC;AACvB,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAExD,sDAAsD;AACtD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,GAAG,EAAE;IAChC,mDAAmD;IACnD,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,EAAE,GAAG,CAAC,CAAC;AACV,CAAC,CAAC,CAAC;AAEH,yBAAyB;AACzB,MAAM,eAAe,GAAG,CAAC,QAA2B,EAAE,EAAE;IACtD,OAAO,KAAK,IAAI,EAAE,CAChB,CAAC;QACC,EAAE,EAAE,IAAI;QACR,MAAM,EAAE,GAAG;QACX,UAAU,EAAE,IAAI;QAChB,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC,gCAAgC;QAClD,GAAG,QAAQ;KACZ,CAAa,CAAC;AACnB,CAAC,CAAC;AAEF,IAAI,CAAC,gDAAgD,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IACjE,MAAM,QAAQ,GAAG,wCAAwC,CAAC;IAC1D,MAAM,CAAC,KAAK,GAAG,eAAe,CAAC;QAC7B,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC,QAAQ;KAC3B,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,qBAAqB,CAAC,CAAC;IACrD,CAAC,CAAC,EAAE,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;AACzB,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,kCAAkC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IACnD,MAAM,eAAe,GAAG,eAAe,CAAC;IACxC,IAAI,eAAwC,CAAC;IAE7C,MAAM,CAAC,KAAK,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QACnC,eAAe,GAAG,IAAI,EAAE,OAAsB,CAAC;QAC/C,OAAO;YACL,EAAE,EAAE,IAAI;YACR,MAAM,EAAE,GAAG;YACX,UAAU,EAAE,IAAI;YAChB,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC,gCAAgC;SACvC,CAAC;IAChB,CAAC,CAAiB,CAAC;IAEnB,MAAM,QAAQ,CAAC,qBAAqB,EAAE,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC,CAAC;IAEtE,CAAC,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAC1B,IACE,eAAe;QACf,OAAO,eAAe,KAAK,QAAQ;QACnC,CAAC,KAAK,CAAC,OAAO,CAAC,eAAe,CAAC,EAC/B,CAAC;QACD,CAAC,CAAC,EAAE,CACD,eAA0C,CAAC,YAAY,CAAC,EACzD,eAAe,CAChB,CAAC;IACJ,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,uDAAuD,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IACxE,IAAI,gBAA6C,CAAC;IAElD,MAAM,CAAC,KAAK,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QACnC,gBAAgB,GAAG,IAAI,EAAE,QAAQ,CAAC;QAClC,OAAO;YACL,EAAE,EAAE,IAAI;YACR,MAAM,EAAE,GAAG;YACX,UAAU,EAAE,IAAI;YAChB,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC,gCAAgC;SACvC,CAAC;IAChB,CAAC,CAAiB,CAAC;IAEnB,MAAM,QAAQ,CAAC,qBAAqB,EAAE,EAAE,eAAe,EAAE,KAAK,EAAE,CAAC,CAAC;IAElE,CAAC,CAAC,EAAE,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC;AACnC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sDAAsD,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IACvE,IAAI,gBAA6C,CAAC;IAElD,MAAM,CAAC,KAAK,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QACnC,gBAAgB,GAAG,IAAI,EAAE,QAAQ,CAAC;QAClC,OAAO;YACL,EAAE,EAAE,IAAI;YACR,MAAM,EAAE,GAAG;YACX,UAAU,EAAE,IAAI;YAChB,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC,gCAAgC;SACvC,CAAC;IAChB,CAAC,CAAiB,CAAC;IAEnB,MAAM,QAAQ,CAAC,qBAAqB,EAAE,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,CAAC;IAEjE,CAAC,CAAC,EAAE,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC;AACnC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,iCAAiC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IAClD,MAAM,aAAa,GAAG,EAAE,aAAa,EAAE,iBAAiB,EAAE,CAAC;IAC3D,IAAI,eAAwC,CAAC;IAE7C,MAAM,CAAC,KAAK,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QACnC,eAAe,GAAG,IAAI,EAAE,OAAsB,CAAC;QAC/C,OAAO;YACL,EAAE,EAAE,IAAI;YACR,MAAM,EAAE,GAAG;YACX,UAAU,EAAE,IAAI;YAChB,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC,gCAAgC;SACvC,CAAC;IAChB,CAAC,CAAiB,CAAC;IAEnB,MAAM,QAAQ,CAAC,qBAAqB,EAAE,EAAE,OAAO,EAAE,aAAa,EAAE,CAAC,CAAC;IAElE,CAAC,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAC1B,IACE,eAAe;QACf,OAAO,eAAe,KAAK,QAAQ;QACnC,CAAC,KAAK,CAAC,OAAO,CAAC,eAAe,CAAC,EAC/B,CAAC;QACD,MAAM,OAAO,GAAG,eAAyC,CAAC;QAC1D,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE,iBAAiB,CAAC,CAAC;QAC/C,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC;IAClC,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,2CAA2C,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IAC5D,MAAM,CAAC,KAAK,GAAG,eAAe,CAAC;QAC7B,EAAE,EAAE,KAAK;QACT,MAAM,EAAE,GAAG;QACX,UAAU,EAAE,WAAW;KACxB,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,qBAAqB,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,KAAK,CAAC,KAAK,EAAE,OAAO,IAAI,EAAE,EAAE,qBAAqB,CAAC,CAAC;AACvD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,2CAA2C,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IAC5D,MAAM,CAAC,KAAK,GAAG,eAAe,CAAC;QAC7B,EAAE,EAAE,KAAK;QACT,MAAM,EAAE,GAAG;QACX,UAAU,EAAE,uBAAuB;KACpC,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,qBAAqB,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,KAAK,CAAC,KAAK,EAAE,OAAO,IAAI,EAAE,EAAE,iCAAiC,CAAC,CAAC;AACnE,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,kCAAkC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IACnD,MAAM,CAAC,KAAK,GAAG,CAAC,KAAK,IAAI,EAAE;QACzB,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAC/C,CAAC,CAAiB,CAAC;IAEnB,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,qBAAqB,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,KAAK,CACL,KAAK,EAAE,OAAO,IAAI,EAAE,EACpB,gDAAgD,CACjD,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,kCAAkC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IACnD,MAAM,CAAC,KAAK,GAAG,CAAC,KAAK,IAAI,EAAE;QACzB,MAAM,KAAK,GAAG,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;QACnC,KAAK,CAAC,IAAI,GAAG,YAAY,CAAC;QAC1B,MAAM,KAAK,CAAC;IACd,CAAC,CAAiB,CAAC;IAEnB,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,WAAW,CAC/B,QAAQ,CAAC,qBAAqB,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CACjD,CAAC;IACF,CAAC,CAAC,KAAK,CAAC,KAAK,EAAE,OAAO,IAAI,EAAE,EAAE,4BAA4B,CAAC,CAAC;AAC9D,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,wCAAwC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE;IACzD,MAAM,CAAC,KAAK,GAAG,CAAC,KAAK,IAAI,EAAE;QACzB,4CAA4C;QAC5C,MAAM,cAAc,CAAC;IACvB,CAAC,CAAiB,CAAC;IAEnB,0DAA0D;IAC1D,6CAA6C;IAC7C,IAAI,CAAC;QACH,MAAM,QAAQ,CAAC,qBAAqB,CAAC,CAAC;QACtC,CAAC,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IACxC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,CAAC,CAAC,EAAE,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;IAC9B,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,8CAA8C,EAAE,CAAC,CAAC,EAAE,EAAE;IACzD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,oBAAoB,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,yBAAyB,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,yBAAyB,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,8BAA8B,CAAC,CAAC,CAAC;AACrD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,+CAA+C,EAAE,CAAC,CAAC,EAAE,EAAE;IAC1D,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,qBAAqB,CAAC,CAAC,CAAC;IAC1C,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,kCAAkC,CAAC,CAAC,CAAC;IACvD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,+BAA+B,CAAC,CAAC,CAAC;IACpD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,yBAAyB,CAAC,CAAC,CAAC;AAChD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,iDAAiD,EAAE,CAAC,CAAC,EAAE,EAAE;IAC5D,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,mBAAmB,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,sBAAsB,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,qBAAqB,CAAC,CAAC,CAAC;IAC3C,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,8BAA8B,CAAC,CAAC,CAAC;AACtD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,8CAA8C,EAAE,CAAC,CAAC,EAAE,EAAE;IACzD,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC;IACxB,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,iBAAiB,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;AACxC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,6CAA6C,EAAE,CAAC,CAAC,EAAE,EAAE;IACxD,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAC;IACxC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,iBAAiB,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC,CAAC;AACvC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,gDAAgD,EAAE,CAAC,CAAC,EAAE,EAAE;IAC3D,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,iCAAiC,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,iDAAiD,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,8CAA8C,CAAC,CAAC,CAAC;AACrE,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,yCAAyC,EAAE,CAAC,CAAC,EAAE,EAAE;IACpD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,6BAA6B,CAAC,CAAC,CAAC;IAClD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,iCAAiC,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,+BAA+B,CAAC,CAAC,CAAC;AACtD,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,8CAA8C,EAAE,CAAC,CAAC,EAAE,EAAE;IACzD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,+BAA+B,CAAC,CAAC,CAAC;IACpD,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,sCAAsC,CAAC,CAAC,CAAC;AAC7D,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,wCAAwC,EAAE,CAAC,CAAC,EAAE,EAAE;IACnD,CAAC,CAAC,IAAI,CACJ,UAAU,CACR,oFAAoF,CACrF,CACF,CAAC;IACF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,+CAA+C,CAAC,CAAC,CAAC;AACtE,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,gDAAgD,EAAE,CAAC,CAAC,EAAE,EAAE;IAC3D,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC,CAAC;AACxC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,gDAAgD,EAAE,CAAC,CAAC,EAAE,EAAE;IAC3D,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,0BAA0B,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC,CAAC;IACnC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,sBAAsB,CAAC,CAAC,CAAC;AAC7C,CAAC,CAAC,CAAC"}
@@ -1,5 +1,5 @@
1
1
  // src/utils/validators.ts
2
- import * as cheerio from "cheerio";
2
+ import * as cheerio from "cheerio/slim";
3
3
  /**
4
4
  * Validate if HTML contains extractable content
5
5
  */
@@ -1 +1 @@
1
- {"version":3,"file":"validators.js","sourceRoot":"","sources":["../../src/utils/validators.ts"],"names":[],"mappings":"AAAA,0BAA0B;AAE1B,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE7B,2CAA2C;QAC3C,CAAC,CAAC,oCAAoC,CAAC,CAAC,MAAM,EAAE,CAAC;QAEjD,mBAAmB;QACnB,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAErC,qEAAqE;QACrE,OAAO,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"validators.js","sourceRoot":"","sources":["../../src/utils/validators.ts"],"names":[],"mappings":"AAAA,0BAA0B;AAE1B,OAAO,KAAK,OAAO,MAAM,cAAc,CAAC;AAExC;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,IAAY;IACrC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE7B,2CAA2C;QAC3C,CAAC,CAAC,oCAAoC,CAAC,CAAC,MAAM,EAAE,CAAC;QAEjD,mBAAmB;QACnB,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAErC,qEAAqE;QACrE,OAAO,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=validators.spec.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validators.spec.d.ts","sourceRoot":"","sources":["../../src/utils/validators.spec.ts"],"names":[],"mappings":""}
@@ -0,0 +1,290 @@
1
+ // src/utils/validators.spec.ts
2
+ import test from "ava";
3
+ import { hasContent } from "./validators.js";
4
+ test("hasContent: returns true for HTML with sufficient text content", (t) => {
5
+ const html = `
6
+ <html>
7
+ <body>
8
+ <article>
9
+ <h1>Article Title</h1>
10
+ <p>This is a paragraph with enough text content to be considered valid content.
11
+ It contains more than 100 characters which is the threshold for content validation.</p>
12
+ </article>
13
+ </body>
14
+ </html>
15
+ `;
16
+ t.true(hasContent(html));
17
+ });
18
+ test("hasContent: returns false for HTML with insufficient text content", (t) => {
19
+ const html = `
20
+ <html>
21
+ <body>
22
+ <p>Short text</p>
23
+ </body>
24
+ </html>
25
+ `;
26
+ t.false(hasContent(html));
27
+ });
28
+ test("hasContent: returns false for empty HTML", (t) => {
29
+ t.false(hasContent(""));
30
+ });
31
+ test("hasContent: returns false for non-string input", (t) => {
32
+ t.false(hasContent(null));
33
+ t.false(hasContent(undefined));
34
+ t.false(hasContent(123));
35
+ t.false(hasContent({}));
36
+ t.false(hasContent([]));
37
+ });
38
+ test("hasContent: ignores script tags when counting content", (t) => {
39
+ const html = `
40
+ <html>
41
+ <body>
42
+ <script>
43
+ console.log("This script content should be ignored and not counted toward the 100 character minimum for content validation");
44
+ </script>
45
+ <p>Short</p>
46
+ </body>
47
+ </html>
48
+ `;
49
+ t.false(hasContent(html));
50
+ });
51
+ test("hasContent: ignores style tags when counting content", (t) => {
52
+ const html = `
53
+ <html>
54
+ <body>
55
+ <style>
56
+ body { font-family: Arial; }
57
+ .container { max-width: 1200px; }
58
+ This style content is long enough but should be ignored completely.
59
+ </style>
60
+ <p>Short</p>
61
+ </body>
62
+ </html>
63
+ `;
64
+ t.false(hasContent(html));
65
+ });
66
+ test("hasContent: ignores nav elements when counting content", (t) => {
67
+ const html = `
68
+ <html>
69
+ <body>
70
+ <nav>
71
+ <a href="/">Home</a>
72
+ <a href="/about">About</a>
73
+ <a href="/contact">Contact</a>
74
+ This navigation content should be ignored when checking for meaningful content in the document.
75
+ </nav>
76
+ <p>Short content</p>
77
+ </body>
78
+ </html>
79
+ `;
80
+ t.false(hasContent(html));
81
+ });
82
+ test("hasContent: ignores header elements when counting content", (t) => {
83
+ const html = `
84
+ <html>
85
+ <body>
86
+ <header>
87
+ <h1>Site Header</h1>
88
+ <p>This header content should be ignored when validating if the page has meaningful extractable content.</p>
89
+ </header>
90
+ <p>Brief text</p>
91
+ </body>
92
+ </html>
93
+ `;
94
+ t.false(hasContent(html));
95
+ });
96
+ test("hasContent: ignores footer elements when counting content", (t) => {
97
+ const html = `
98
+ <html>
99
+ <body>
100
+ <p>Short main content</p>
101
+ <footer>
102
+ <p>Copyright 2024. All rights reserved. This footer content should be ignored when checking content.</p>
103
+ </footer>
104
+ </body>
105
+ </html>
106
+ `;
107
+ t.false(hasContent(html));
108
+ });
109
+ test("hasContent: counts content from body after removing noise elements", (t) => {
110
+ const html = `
111
+ <html>
112
+ <body>
113
+ <header>Site Header with some text</header>
114
+ <nav>Navigation links here</nav>
115
+ <script>console.log("ignored");</script>
116
+ <style>.ignored { color: red; }</style>
117
+ <main>
118
+ <h1>Main Content</h1>
119
+ <p>This is the actual content of the page that should be counted. It has enough characters to pass the validation threshold of 100 characters.</p>
120
+ </main>
121
+ <footer>Footer text here</footer>
122
+ </body>
123
+ </html>
124
+ `;
125
+ t.true(hasContent(html));
126
+ });
127
+ test("hasContent: handles HTML with only whitespace", (t) => {
128
+ const html = `
129
+ <html>
130
+ <body>
131
+
132
+
133
+
134
+ </body>
135
+ </html>
136
+ `;
137
+ t.false(hasContent(html));
138
+ });
139
+ test("hasContent: handles malformed HTML gracefully", (t) => {
140
+ const html = "<div><p>Unclosed tags and malformed structure";
141
+ t.false(hasContent(html));
142
+ });
143
+ test("hasContent: returns false for HTML with only removed elements", (t) => {
144
+ const html = `
145
+ <html>
146
+ <body>
147
+ <script>alert("Only script content here with enough characters to exceed 100 if it were counted");</script>
148
+ <style>body { color: blue; padding: 20px; margin: 0; font-size: 16px; line-height: 1.5; }</style>
149
+ </body>
150
+ </html>
151
+ `;
152
+ t.false(hasContent(html));
153
+ });
154
+ test("hasContent: handles HTML with exactly 100 characters", (t) => {
155
+ // Create exactly 100 characters of content
156
+ const content = "a".repeat(100);
157
+ const html = `<html><body><p>${content}</p></body></html>`;
158
+ t.true(hasContent(html));
159
+ });
160
+ test("hasContent: handles HTML with 99 characters (just below threshold)", (t) => {
161
+ const content = "a".repeat(99);
162
+ const html = `<html><body><p>${content}</p></body></html>`;
163
+ t.false(hasContent(html));
164
+ });
165
+ test("hasContent: handles HTML with 101 characters (just above threshold)", (t) => {
166
+ const content = "a".repeat(101);
167
+ const html = `<html><body><p>${content}</p></body></html>`;
168
+ t.true(hasContent(html));
169
+ });
170
+ test("hasContent: handles deeply nested HTML structure", (t) => {
171
+ const html = `
172
+ <html>
173
+ <body>
174
+ <div>
175
+ <div>
176
+ <div>
177
+ <article>
178
+ <section>
179
+ <p>This is deeply nested content with enough text to be considered valid.
180
+ The validation should work regardless of nesting depth in the HTML structure.</p>
181
+ </section>
182
+ </article>
183
+ </div>
184
+ </div>
185
+ </div>
186
+ </body>
187
+ </html>
188
+ `;
189
+ t.true(hasContent(html));
190
+ });
191
+ test("hasContent: handles HTML parsing errors", (t) => {
192
+ const invalidHtml = "<<<>>>{{{}}}";
193
+ t.false(hasContent(invalidHtml));
194
+ });
195
+ test("hasContent: trims whitespace before checking length", (t) => {
196
+ const html = `
197
+ <html>
198
+ <body>
199
+ <p>
200
+
201
+ Content with lots of whitespace padding that should be trimmed before length check happens here and continues.
202
+
203
+ </p>
204
+ </body>
205
+ </html>
206
+ `;
207
+ t.true(hasContent(html));
208
+ });
209
+ test("hasContent: handles multiple paragraphs", (t) => {
210
+ const html = `
211
+ <html>
212
+ <body>
213
+ <p>First paragraph with some text.</p>
214
+ <p>Second paragraph with more text.</p>
215
+ <p>Third paragraph to ensure we exceed the 100 character threshold for content validation.</p>
216
+ </body>
217
+ </html>
218
+ `;
219
+ t.true(hasContent(html));
220
+ });
221
+ test("hasContent: handles list elements", (t) => {
222
+ const html = `
223
+ <html>
224
+ <body>
225
+ <ul>
226
+ <li>First item with some content</li>
227
+ <li>Second item with more content</li>
228
+ <li>Third item to ensure sufficient length for validation purposes and exceed threshold</li>
229
+ </ul>
230
+ </body>
231
+ </html>
232
+ `;
233
+ t.true(hasContent(html));
234
+ });
235
+ test("hasContent: handles HTML entities", (t) => {
236
+ const html = `
237
+ <html>
238
+ <body>
239
+ <p>&lt;This text contains HTML entities&gt; and should be counted properly.
240
+ It has enough content to pass the validation threshold of 100 characters total.</p>
241
+ </body>
242
+ </html>
243
+ `;
244
+ t.true(hasContent(html));
245
+ });
246
+ test("hasContent: handles mixed content types", (t) => {
247
+ const html = `
248
+ <html>
249
+ <body>
250
+ <h1>Heading</h1>
251
+ <p>Paragraph text</p>
252
+ <blockquote>Quote text that adds to the total character count</blockquote>
253
+ <div>Division with additional content to ensure we pass validation</div>
254
+ </body>
255
+ </html>
256
+ `;
257
+ t.true(hasContent(html));
258
+ });
259
+ test("hasContent: handles empty body tag", (t) => {
260
+ const html = `<html><body></body></html>`;
261
+ t.false(hasContent(html));
262
+ });
263
+ test("hasContent: handles body with only whitespace and newlines", (t) => {
264
+ const html = `
265
+ <html>
266
+ <body>
267
+
268
+
269
+
270
+ </body>
271
+ </html>
272
+ `;
273
+ t.false(hasContent(html));
274
+ });
275
+ test("hasContent: ignores all noise elements combined", (t) => {
276
+ const html = `
277
+ <html>
278
+ <body>
279
+ <header>Header content with text</header>
280
+ <nav>Navigation content</nav>
281
+ <script>console.log("script");</script>
282
+ <style>body { color: red; }</style>
283
+ <footer>Footer content</footer>
284
+ <p>Only this short text should be counted</p>
285
+ </body>
286
+ </html>
287
+ `;
288
+ t.false(hasContent(html));
289
+ });
290
+ //# sourceMappingURL=validators.spec.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validators.spec.js","sourceRoot":"","sources":["../../src/utils/validators.spec.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAE/B,OAAO,IAAI,MAAM,KAAK,CAAC;AACvB,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAE7C,IAAI,CAAC,gEAAgE,EAAE,CAAC,CAAC,EAAE,EAAE;IAC3E,MAAM,IAAI,GAAG;;;;;;;;;;GAUZ,CAAC;IAEF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,mEAAmE,EAAE,CAAC,CAAC,EAAE,EAAE;IAC9E,MAAM,IAAI,GAAG;;;;;;GAMZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,0CAA0C,EAAE,CAAC,CAAC,EAAE,EAAE;IACrD,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC;AAC1B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,gDAAgD,EAAE,CAAC,CAAC,EAAE,EAAE;IAC3D,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAW,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,SAAgB,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,GAAU,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,EAAS,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,EAAS,CAAC,CAAC,CAAC;AACjC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,uDAAuD,EAAE,CAAC,CAAC,EAAE,EAAE;IAClE,MAAM,IAAI,GAAG;;;;;;;;;GASZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sDAAsD,EAAE,CAAC,CAAC,EAAE,EAAE;IACjE,MAAM,IAAI,GAAG;;;;;;;;;;;GAWZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,wDAAwD,EAAE,CAAC,CAAC,EAAE,EAAE;IACnE,MAAM,IAAI,GAAG;;;;;;;;;;;;GAYZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,2DAA2D,EAAE,CAAC,CAAC,EAAE,EAAE;IACtE,MAAM,IAAI,GAAG;;;;;;;;;;GAUZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,2DAA2D,EAAE,CAAC,CAAC,EAAE,EAAE;IACtE,MAAM,IAAI,GAAG;;;;;;;;;GASZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,oEAAoE,EAAE,CAAC,CAAC,EAAE,EAAE;IAC/E,MAAM,IAAI,GAAG;;;;;;;;;;;;;;GAcZ,CAAC;IAEF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,+CAA+C,EAAE,CAAC,CAAC,EAAE,EAAE;IAC1D,MAAM,IAAI,GAAG;;;;;;;;GAQZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,+CAA+C,EAAE,CAAC,CAAC,EAAE,EAAE;IAC1D,MAAM,IAAI,GAAG,+CAA+C,CAAC;IAE7D,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,+DAA+D,EAAE,CAAC,CAAC,EAAE,EAAE;IAC1E,MAAM,IAAI,GAAG;;;;;;;GAOZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,sDAAsD,EAAE,CAAC,CAAC,EAAE,EAAE;IACjE,2CAA2C;IAC3C,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAChC,MAAM,IAAI,GAAG,kBAAkB,OAAO,oBAAoB,CAAC;IAE3D,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,oEAAoE,EAAE,CAAC,CAAC,EAAE,EAAE;IAC/E,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,kBAAkB,OAAO,oBAAoB,CAAC;IAE3D,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,qEAAqE,EAAE,CAAC,CAAC,EAAE,EAAE;IAChF,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAChC,MAAM,IAAI,GAAG,kBAAkB,OAAO,oBAAoB,CAAC;IAE3D,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,kDAAkD,EAAE,CAAC,CAAC,EAAE,EAAE;IAC7D,MAAM,IAAI,GAAG;;;;;;;;;;;;;;;;;GAiBZ,CAAC;IAEF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,yCAAyC,EAAE,CAAC,CAAC,EAAE,EAAE;IACpD,MAAM,WAAW,GAAG,cAAc,CAAC;IAEnC,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC;AACnC,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,qDAAqD,EAAE,CAAC,CAAC,EAAE,EAAE;IAChE,MAAM,IAAI,GAAG;;;;;;;;;;GAUZ,CAAC;IAEF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,yCAAyC,EAAE,CAAC,CAAC,EAAE,EAAE;IACpD,MAAM,IAAI,GAAG;;;;;;;;GAQZ,CAAC;IAEF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,mCAAmC,EAAE,CAAC,CAAC,EAAE,EAAE;IAC9C,MAAM,IAAI,GAAG;;;;;;;;;;GAUZ,CAAC;IAEF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,mCAAmC,EAAE,CAAC,CAAC,EAAE,EAAE;IAC9C,MAAM,IAAI,GAAG;;;;;;;GAOZ,CAAC;IAEF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,yCAAyC,EAAE,CAAC,CAAC,EAAE,EAAE;IACpD,MAAM,IAAI,GAAG;;;;;;;;;GASZ,CAAC;IAEF,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,oCAAoC,EAAE,CAAC,CAAC,EAAE,EAAE;IAC/C,MAAM,IAAI,GAAG,4BAA4B,CAAC;IAE1C,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,4DAA4D,EAAE,CAAC,CAAC,EAAE,EAAE;IACvE,MAAM,IAAI,GAAG;;;;;;;;GAQZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEH,IAAI,CAAC,iDAAiD,EAAE,CAAC,CAAC,EAAE,EAAE;IAC5D,MAAM,IAAI,GAAG;;;;;;;;;;;GAWZ,CAAC;IAEF,CAAC,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nanocollective/get-md",
3
- "version": "1.0.1",
3
+ "version": "1.0.3",
4
4
  "description": "Fast HTML to Markdown converter optimized for LLM consumption",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -29,12 +29,12 @@
29
29
  "test:all": "./scripts/test.sh",
30
30
  "test:ava": "ava",
31
31
  "test:ava:coverage": "c8 ava",
32
- "test:format": "prettier --check .",
32
+ "test:format": "biome check .",
33
33
  "test:types": "tsc --noEmit",
34
- "test:lint": "eslint .",
35
- "test:lint:fix": "eslint . --fix",
34
+ "test:lint": "biome lint .",
35
+ "test:lint:fix": "biome check --write .",
36
36
  "test:knip": "knip",
37
- "format": "prettier --write .",
37
+ "format": "biome format --write .",
38
38
  "prepublishOnly": "pnpm run build && pnpm run test:all"
39
39
  },
40
40
  "keywords": [
@@ -55,7 +55,7 @@
55
55
  "ajv": "^8.17.1",
56
56
  "cheerio": "^1.1.2",
57
57
  "commander": "^14.0.2",
58
- "jsdom": "^24.1.3",
58
+ "happy-dom-without-node": "^14.12.3",
59
59
  "turndown": "^7.2.2",
60
60
  "turndown-plugin-gfm": "^1.0.2"
61
61
  },
@@ -66,21 +66,15 @@
66
66
  "license": "MIT",
67
67
  "devDependencies": {
68
68
  "@ava/typescript": "^6.0.0",
69
- "@eslint/js": "^9.38.0",
69
+ "@biomejs/biome": "^2.3.9",
70
70
  "@types/jsdom": "^27.0.0",
71
71
  "@types/node": "^24.9.1",
72
72
  "@types/turndown": "^5.0.6",
73
- "@typescript-eslint/eslint-plugin": "^8.46.2",
74
- "@typescript-eslint/parser": "^8.46.2",
75
73
  "ava": "^6.4.1",
76
74
  "c8": "^10.1.3",
77
- "eslint": "^9.38.0",
78
- "globals": "^16.4.0",
79
75
  "knip": "^5.66.3",
80
- "prettier": "^3.6.2",
81
76
  "tsx": "^4.20.6",
82
- "typescript": "^5.9.3",
83
- "typescript-eslint": "^8.46.2"
77
+ "typescript": "^5.9.3"
84
78
  },
85
79
  "ava": {
86
80
  "extensions": {