@j0hanz/superfetch 1.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +200 -36
- package/dist/config/index.d.ts +10 -5
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +41 -17
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +98 -57
- package/dist/config/types.d.ts.map +1 -1
- package/dist/errors/app-error.d.ts +4 -28
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +10 -51
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +31 -46
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +2 -2
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +12 -14
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +31 -14
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/parsers/base-html-element-parser.d.ts +43 -0
- package/dist/parsers/base-html-element-parser.d.ts.map +1 -0
- package/dist/parsers/base-html-element-parser.js +59 -0
- package/dist/parsers/base-html-element-parser.js.map +1 -0
- package/dist/parsers/heading-element-parser.d.ts +14 -0
- package/dist/parsers/heading-element-parser.d.ts.map +1 -0
- package/dist/parsers/heading-element-parser.js +26 -0
- package/dist/parsers/heading-element-parser.js.map +1 -0
- package/dist/parsers/image-element-parser.d.ts +16 -0
- package/dist/parsers/image-element-parser.d.ts.map +1 -0
- package/dist/parsers/image-element-parser.js +33 -0
- package/dist/parsers/image-element-parser.js.map +1 -0
- package/dist/parsers/link-element-parser.d.ts +15 -0
- package/dist/parsers/link-element-parser.d.ts.map +1 -0
- package/dist/parsers/link-element-parser.js +28 -0
- package/dist/parsers/link-element-parser.js.map +1 -0
- package/dist/parsers/open-graph-parser.d.ts +17 -0
- package/dist/parsers/open-graph-parser.d.ts.map +1 -0
- package/dist/parsers/open-graph-parser.js +41 -0
- package/dist/parsers/open-graph-parser.js.map +1 -0
- package/dist/parsers/schema-org-parser.d.ts +17 -0
- package/dist/parsers/schema-org-parser.d.ts.map +1 -0
- package/dist/parsers/schema-org-parser.js +32 -0
- package/dist/parsers/schema-org-parser.js.map +1 -0
- package/dist/parsers/standard-meta-parser.d.ts +18 -0
- package/dist/parsers/standard-meta-parser.d.ts.map +1 -0
- package/dist/parsers/standard-meta-parser.js +32 -0
- package/dist/parsers/standard-meta-parser.js.map +1 -0
- package/dist/parsers/twitter-card-parser.d.ts +17 -0
- package/dist/parsers/twitter-card-parser.d.ts.map +1 -0
- package/dist/parsers/twitter-card-parser.js +41 -0
- package/dist/parsers/twitter-card-parser.js.map +1 -0
- package/dist/resources/cached-content.d.ts +0 -2
- package/dist/resources/cached-content.d.ts.map +1 -1
- package/dist/resources/cached-content.js +3 -34
- package/dist/resources/cached-content.js.map +1 -1
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +8 -8
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +12 -11
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +0 -28
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +10 -166
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +0 -4
- package/dist/services/card-extractor.d.ts.map +1 -1
- package/dist/services/card-extractor.js +6 -1
- package/dist/services/card-extractor.js.map +1 -1
- package/dist/services/extractor.d.ts +1 -11
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +86 -84
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +2 -13
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +79 -79
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts +5 -4
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +27 -42
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +35 -27
- package/dist/services/parser.js.map +1 -1
- package/dist/services/session-manager.d.ts +18 -0
- package/dist/services/session-manager.d.ts.map +1 -0
- package/dist/services/session-manager.js +73 -0
- package/dist/services/session-manager.js.map +1 -0
- package/dist/strategies/exponential-backoff-strategy.d.ts +13 -0
- package/dist/strategies/exponential-backoff-strategy.d.ts.map +1 -0
- package/dist/strategies/exponential-backoff-strategy.js +32 -0
- package/dist/strategies/exponential-backoff-strategy.js.map +1 -0
- package/dist/tools/handlers/fetch-links.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +3 -0
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -2
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +23 -33
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +15 -20
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +2 -9
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.js +141 -108
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +0 -4
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +6 -7
- package/dist/tools/utils/common.d.ts.map +1 -1
- package/dist/tools/utils/common.js +8 -8
- package/dist/tools/utils/common.js.map +1 -1
- package/dist/tools/utils/fetch-pipeline.d.ts +8 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
- package/dist/tools/utils/fetch-pipeline.js +60 -63
- package/dist/tools/utils/fetch-pipeline.js.map +1 -1
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +15 -10
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +58 -62
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/utils/concurrency.d.ts +2 -5
- package/dist/utils/concurrency.d.ts.map +1 -1
- package/dist/utils/concurrency.js +19 -19
- package/dist/utils/concurrency.js.map +1 -1
- package/dist/utils/content-cleaner.d.ts +0 -25
- package/dist/utils/content-cleaner.d.ts.map +1 -1
- package/dist/utils/content-cleaner.js +14 -171
- package/dist/utils/content-cleaner.js.map +1 -1
- package/dist/utils/html-truncator.d.ts +2 -0
- package/dist/utils/html-truncator.d.ts.map +1 -0
- package/dist/utils/html-truncator.js +14 -0
- package/dist/utils/html-truncator.js.map +1 -0
- package/dist/utils/language-detector.d.ts +0 -3
- package/dist/utils/language-detector.d.ts.map +1 -1
- package/dist/utils/language-detector.js +0 -11
- package/dist/utils/language-detector.js.map +1 -1
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +7 -5
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +16 -41
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +1 -0
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +42 -23
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +9 -8
|
@@ -1,26 +1,24 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { FetchError } from '../errors/app-error.js';
|
|
2
2
|
import { logError } from '../services/logger.js';
|
|
3
|
-
export function errorHandler(err,
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
res.set('Retry-After', String(err.retryAfter));
|
|
3
|
+
export function errorHandler(err, req, res) {
|
|
4
|
+
const isFetchError = err instanceof FetchError;
|
|
5
|
+
const statusCode = isFetchError ? err.statusCode : 500;
|
|
6
|
+
const code = isFetchError ? err.code : 'INTERNAL_ERROR';
|
|
7
|
+
const message = isFetchError ? err.message : 'Internal Server Error';
|
|
8
|
+
logError(`HTTP ${statusCode}: ${err.message} - ${req.method} ${req.path}`, err);
|
|
9
|
+
if (isFetchError && err.code === 'RATE_LIMITED' && err.details.retryAfter) {
|
|
10
|
+
const retryAfter = err.details.retryAfter;
|
|
11
|
+
res.set('Retry-After', String(retryAfter));
|
|
13
12
|
}
|
|
14
13
|
const response = {
|
|
15
14
|
error: {
|
|
16
15
|
message,
|
|
17
16
|
code,
|
|
18
17
|
statusCode,
|
|
18
|
+
...(isFetchError &&
|
|
19
|
+
Object.keys(err.details).length > 0 && { details: err.details }),
|
|
19
20
|
},
|
|
20
21
|
};
|
|
21
|
-
if (err instanceof ValidationError && err.details) {
|
|
22
|
-
response.error.details = err.details;
|
|
23
|
-
}
|
|
24
22
|
if (process.env.NODE_ENV === 'development') {
|
|
25
23
|
response.error.stack = err.stack;
|
|
26
24
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"error-handler.js","sourceRoot":"","sources":["../../src/middleware/error-handler.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"error-handler.js","sourceRoot":"","sources":["../../src/middleware/error-handler.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAEjD,MAAM,UAAU,YAAY,CAAC,GAAU,EAAE,GAAY,EAAE,GAAa;IAClE,MAAM,YAAY,GAAG,GAAG,YAAY,UAAU,CAAC;IAC/C,MAAM,UAAU,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC;IACvD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,gBAAgB,CAAC;IACxD,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,uBAAuB,CAAC;IAErE,QAAQ,CACN,QAAQ,UAAU,KAAK,GAAG,CAAC,OAAO,MAAM,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE,EAChE,GAAG,CACJ,CAAC;IAEF,IAAI,YAAY,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,IAAI,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;QAC1E,MAAM,UAAU,GAAG,GAAG,CAAC,OAAO,CAAC,UAAoB,CAAC;QACpD,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;IAC7C,CAAC;IAED,MAAM,QAAQ,GAAkB;QAC9B,KAAK,EAAE;YACL,OAAO;YACP,IAAI;YACJ,UAAU;YACV,GAAG,CAAC,YAAY;gBACd,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC;SACnE;KACF,CAAC;IAEF,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,aAAa,EAAE,CAAC;QAC3C,QAAQ,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;IACnC,CAAC;IAED,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AACxC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rate-limiter.d.ts","sourceRoot":"","sources":["../../src/middleware/rate-limiter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAE/D,OAAO,KAAK,EAAkB,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"rate-limiter.d.ts","sourceRoot":"","sources":["../../src/middleware/rate-limiter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAE/D,OAAO,KAAK,EAAkB,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAoB7E,cAAM,WAAW;IACf,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqC;IAC3D,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,eAAe,CAA+C;gBAE1D,OAAO,GAAE,OAAO,CAAC,kBAAkB,CAAM;IAmBrD,OAAO,IAAI,IAAI;IAQf,UAAU,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,YAAY,KAAK,IAAI;IA8CvE,OAAO,CAAC,MAAM;IAwBd,OAAO,CAAC,OAAO;CAUhB;AAED,eAAO,MAAM,WAAW,aAGtB,CAAC"}
|
|
@@ -7,6 +7,10 @@ const MIN_MAX_REQUESTS = 1;
|
|
|
7
7
|
const MAX_MAX_REQUESTS = 10000;
|
|
8
8
|
const MIN_WINDOW_MS = 1000;
|
|
9
9
|
const MAX_WINDOW_MS = 3600000;
|
|
10
|
+
const TRUSTED_PROXIES = new Set((process.env.TRUSTED_PROXIES ?? '')
|
|
11
|
+
.split(',')
|
|
12
|
+
.map((ip) => ip.trim())
|
|
13
|
+
.filter(Boolean));
|
|
10
14
|
class RateLimiter {
|
|
11
15
|
store = new Map();
|
|
12
16
|
maxRequests;
|
|
@@ -36,14 +40,25 @@ class RateLimiter {
|
|
|
36
40
|
let entry = this.store.get(key);
|
|
37
41
|
if (!entry || now > entry.resetTime) {
|
|
38
42
|
entry = {
|
|
39
|
-
count:
|
|
43
|
+
count: 1,
|
|
40
44
|
resetTime: now + this.windowMs,
|
|
41
45
|
lastAccessed: now,
|
|
42
46
|
};
|
|
43
47
|
this.store.set(key, entry);
|
|
44
48
|
}
|
|
45
|
-
|
|
46
|
-
|
|
49
|
+
else {
|
|
50
|
+
if (entry.count >= this.maxRequests) {
|
|
51
|
+
const retryAfter = Math.ceil((entry.resetTime - now) / 1000);
|
|
52
|
+
res.set('Retry-After', String(retryAfter));
|
|
53
|
+
res.status(429).json({
|
|
54
|
+
error: 'Too many requests',
|
|
55
|
+
retryAfter,
|
|
56
|
+
});
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
entry.count++;
|
|
60
|
+
entry.lastAccessed = now;
|
|
61
|
+
}
|
|
47
62
|
if (entry.count > this.maxRequests) {
|
|
48
63
|
const retryAfter = Math.ceil((entry.resetTime - now) / 1000);
|
|
49
64
|
res.set('Retry-After', String(retryAfter));
|
|
@@ -61,16 +76,20 @@ class RateLimiter {
|
|
|
61
76
|
}
|
|
62
77
|
getKey(req) {
|
|
63
78
|
const fallback = req.ip ?? req.socket.remoteAddress ?? 'unknown';
|
|
64
|
-
const
|
|
65
|
-
const
|
|
79
|
+
const sourceIp = req.socket.remoteAddress ?? '';
|
|
80
|
+
const isTrustedProxy = TRUSTED_PROXIES.size === 0 || TRUSTED_PROXIES.has(sourceIp);
|
|
66
81
|
let ip = fallback;
|
|
67
|
-
if (
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
82
|
+
if (isTrustedProxy) {
|
|
83
|
+
const realIp = req.headers['x-real-ip'];
|
|
84
|
+
const forwardedFor = req.headers['x-forwarded-for'];
|
|
85
|
+
if (typeof realIp === 'string' && realIp) {
|
|
86
|
+
ip = realIp;
|
|
87
|
+
}
|
|
88
|
+
else if (typeof forwardedFor === 'string') {
|
|
89
|
+
const firstIp = forwardedFor.split(',')[0]?.trim();
|
|
90
|
+
if (firstIp)
|
|
91
|
+
ip = firstIp;
|
|
92
|
+
}
|
|
74
93
|
}
|
|
75
94
|
const sanitized = ip.replace(/[^a-fA-F0-9.:]/g, '').substring(0, 45);
|
|
76
95
|
return sanitized.length > 0 ? sanitized : 'unknown';
|
|
@@ -85,8 +104,6 @@ class RateLimiter {
|
|
|
85
104
|
}
|
|
86
105
|
}
|
|
87
106
|
}
|
|
88
|
-
// Create default rate limiter instance
|
|
89
|
-
// Override via RateLimiter constructor if different values needed
|
|
90
107
|
export const rateLimiter = new RateLimiter({
|
|
91
108
|
maxRequests: 100,
|
|
92
109
|
windowMs: 60000,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rate-limiter.js","sourceRoot":"","sources":["../../src/middleware/rate-limiter.ts"],"names":[],"mappings":"AAIA,MAAM,eAAe,GAAuB;IAC1C,WAAW,EAAE,GAAG;IAChB,QAAQ,EAAE,KAAK;IACf,iBAAiB,EAAE,KAAK;CACzB,CAAC;AAEF,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAC3B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,aAAa,GAAG,IAAI,CAAC;AAC3B,MAAM,aAAa,GAAG,OAAO,CAAC;AAE9B,MAAM,WAAW;IACE,KAAK,GAAG,IAAI,GAAG,EAA0B,CAAC;IAC1C,WAAW,CAAS;IACpB,QAAQ,CAAS;IAC1B,eAAe,GAA0C,IAAI,CAAC;IAEtE,YAAY,UAAuC,EAAE;QACnD,MAAM,IAAI,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;QAEhD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CACzB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,gBAAgB,CAAC,EAC5C,gBAAgB,CACjB,CAAC;QACF,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,CACtB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,EACtC,aAAa,CACd,CAAC;QACF,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;QAExE,IAAI,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;YACtC,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC,EAAE,eAAe,CAAC,CAAC;QACpB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;IAED,OAAO;QACL,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACpC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC9B,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;IAED,UAAU;QACR,OAAO,CAAC,GAAY,EAAE,GAAa,EAAE,IAAkB,EAAQ,EAAE;YAC/D,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEvB,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEhC,IAAI,CAAC,KAAK,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;gBACpC,KAAK,GAAG;oBACN,KAAK,EAAE,CAAC;oBACR,SAAS,EAAE,GAAG,GAAG,IAAI,CAAC,QAAQ;oBAC9B,YAAY,EAAE,GAAG;iBAClB,CAAC;gBACF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC7B,CAAC;
|
|
1
|
+
{"version":3,"file":"rate-limiter.js","sourceRoot":"","sources":["../../src/middleware/rate-limiter.ts"],"names":[],"mappings":"AAIA,MAAM,eAAe,GAAuB;IAC1C,WAAW,EAAE,GAAG;IAChB,QAAQ,EAAE,KAAK;IACf,iBAAiB,EAAE,KAAK;CACzB,CAAC;AAEF,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAC3B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,aAAa,GAAG,IAAI,CAAC;AAC3B,MAAM,aAAa,GAAG,OAAO,CAAC;AAE9B,MAAM,eAAe,GAAG,IAAI,GAAG,CAC7B,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC;KAChC,KAAK,CAAC,GAAG,CAAC;KACV,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,EAAE,CAAC;KACtB,MAAM,CAAC,OAAO,CAAC,CACnB,CAAC;AAEF,MAAM,WAAW;IACE,KAAK,GAAG,IAAI,GAAG,EAA0B,CAAC;IAC1C,WAAW,CAAS;IACpB,QAAQ,CAAS;IAC1B,eAAe,GAA0C,IAAI,CAAC;IAEtE,YAAY,UAAuC,EAAE;QACnD,MAAM,IAAI,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;QAEhD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CACzB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,gBAAgB,CAAC,EAC5C,gBAAgB,CACjB,CAAC;QACF,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,CACtB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,EACtC,aAAa,CACd,CAAC;QACF,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;QAExE,IAAI,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;YACtC,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC,EAAE,eAAe,CAAC,CAAC;QACpB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;IAED,OAAO;QACL,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACpC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC9B,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;IAED,UAAU;QACR,OAAO,CAAC,GAAY,EAAE,GAAa,EAAE,IAAkB,EAAQ,EAAE;YAC/D,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEvB,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEhC,IAAI,CAAC,KAAK,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;gBACpC,KAAK,GAAG;oBACN,KAAK,EAAE,CAAC;oBACR,SAAS,EAAE,GAAG,GAAG,IAAI,CAAC,QAAQ;oBAC9B,YAAY,EAAE,GAAG;iBAClB,CAAC;gBACF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,IAAI,KAAK,CAAC,KAAK,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;oBACpC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;oBAC7D,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;oBAC3C,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;wBACnB,KAAK,EAAE,mBAAmB;wBAC1B,UAAU;qBACX,CAAC,CAAC;oBACH,OAAO;gBACT,CAAC;gBACD,KAAK,CAAC,KAAK,EAAE,CAAC;gBACd,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC;YAC3B,CAAC;YAED,IAAI,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;gBACnC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;gBAC7D,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC3C,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,mBAAmB;oBAC1B,UAAU;iBACX,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,GAAG,CAAC,GAAG,CAAC,mBAAmB,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;YACvD,GAAG,CAAC,GAAG,CAAC,uBAAuB,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;YACzE,GAAG,CAAC,GAAG,CAAC,mBAAmB,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAExE,IAAI,EAAE,CAAC;QACT,CAAC,CAAC;IACJ,CAAC;IAEO,MAAM,CAAC,GAAY;QACzB,MAAM,QAAQ,GAAG,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,aAAa,IAAI,SAAS,CAAC;QAEjE,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,CAAC;QAChD,MAAM,cAAc,GAClB,eAAe,CAAC,IAAI,KAAK,CAAC,IAAI,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE9D,IAAI,EAAE,GAAW,QAAQ,CAAC;QAC1B,IAAI,cAAc,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YACxC,MAAM,YAAY,GAAG,GAAG,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC;YAEpD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,EAAE,CAAC;gBACzC,EAAE,GAAG,MAAM,CAAC;YACd,CAAC;iBAAM,IAAI,OAAO,YAAY,KAAK,QAAQ,EAAE,CAAC;gBAC5C,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBACnD,IAAI,OAAO;oBAAE,EAAE,GAAG,OAAO,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrE,OAAO,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;IACtD,CAAC;IAEO,OAAO;QACb,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,aAAa,GAAG,OAAO,CAAC,CAAC,SAAS;QAExC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACtC,IAAI,KAAK,CAAC,SAAS,GAAG,GAAG,IAAI,GAAG,GAAG,KAAK,CAAC,YAAY,GAAG,aAAa,EAAE,CAAC;gBACtE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;CACF;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,CAAC;IACzC,WAAW,EAAE,GAAG;IAChB,QAAQ,EAAE,KAAK;CAChB,CAAC,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base class for HTML element parsers that extract specific content types.
|
|
3
|
+
* Provides common selector querying and attribute extraction functionality.
|
|
4
|
+
*/
|
|
5
|
+
export declare abstract class BaseHTMLElementParser<T> {
|
|
6
|
+
/**
|
|
7
|
+
* Parse HTML content and extract structured data.
|
|
8
|
+
* @param document - The JSDOM document to parse
|
|
9
|
+
* @returns Parsed data of type T or null if extraction fails
|
|
10
|
+
*/
|
|
11
|
+
abstract parse(document: Document): T | null;
|
|
12
|
+
/**
|
|
13
|
+
* Validate the parsed data conforms to expected type.
|
|
14
|
+
* @param data - The data to validate
|
|
15
|
+
* @returns True if data is valid
|
|
16
|
+
*/
|
|
17
|
+
protected isValidData(data: T | null): data is T;
|
|
18
|
+
/**
|
|
19
|
+
* Query a single element using CSS selector.
|
|
20
|
+
*/
|
|
21
|
+
protected querySelector(parent: Document | Element, selector: string): Element | null;
|
|
22
|
+
/**
|
|
23
|
+
* Query multiple elements using CSS selector.
|
|
24
|
+
*/
|
|
25
|
+
protected querySelectorAll(parent: Document | Element, selector: string): Element[];
|
|
26
|
+
/**
|
|
27
|
+
* Safely extract text content from an element.
|
|
28
|
+
*/
|
|
29
|
+
protected getTextContent(element: Element | null): string | undefined;
|
|
30
|
+
/**
|
|
31
|
+
* Safely extract attribute value from an element.
|
|
32
|
+
*/
|
|
33
|
+
protected getAttribute(element: Element | null, attr: string): string | undefined;
|
|
34
|
+
/**
|
|
35
|
+
* Extract multiple attributes from an element.
|
|
36
|
+
*/
|
|
37
|
+
protected getAttributes(element: Element | null, attrs: string[]): Record<string, string>;
|
|
38
|
+
/**
|
|
39
|
+
* Check if element exists and has content.
|
|
40
|
+
*/
|
|
41
|
+
protected hasContent(element: Element | null): boolean;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=base-html-element-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base-html-element-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/base-html-element-parser.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,8BAAsB,qBAAqB,CAAC,CAAC;IAC3C;;;;OAIG;IACH,QAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,CAAC,GAAG,IAAI;IAE5C;;;;OAIG;IACH,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,GAAG,IAAI,GAAG,IAAI,IAAI,CAAC;IAIhD;;OAEG;IACH,SAAS,CAAC,aAAa,CACrB,MAAM,EAAE,QAAQ,GAAG,OAAO,EAC1B,QAAQ,EAAE,MAAM,GACf,OAAO,GAAG,IAAI;IAIjB;;OAEG;IACH,SAAS,CAAC,gBAAgB,CACxB,MAAM,EAAE,QAAQ,GAAG,OAAO,EAC1B,QAAQ,EAAE,MAAM,GACf,OAAO,EAAE;IAIZ;;OAEG;IACH,SAAS,CAAC,cAAc,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,GAAG,MAAM,GAAG,SAAS;IAIrE;;OAEG;IACH,SAAS,CAAC,YAAY,CACpB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,IAAI,EAAE,MAAM,GACX,MAAM,GAAG,SAAS;IAIrB;;OAEG;IACH,SAAS,CAAC,aAAa,CACrB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,KAAK,EAAE,MAAM,EAAE,GACd,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAWzB;;OAEG;IACH,SAAS,CAAC,UAAU,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,GAAG,OAAO;CAGvD"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base class for HTML element parsers that extract specific content types.
|
|
3
|
+
* Provides common selector querying and attribute extraction functionality.
|
|
4
|
+
*/
|
|
5
|
+
export class BaseHTMLElementParser {
|
|
6
|
+
/**
|
|
7
|
+
* Validate the parsed data conforms to expected type.
|
|
8
|
+
* @param data - The data to validate
|
|
9
|
+
* @returns True if data is valid
|
|
10
|
+
*/
|
|
11
|
+
isValidData(data) {
|
|
12
|
+
return data !== null;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Query a single element using CSS selector.
|
|
16
|
+
*/
|
|
17
|
+
querySelector(parent, selector) {
|
|
18
|
+
return parent.querySelector(selector);
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Query multiple elements using CSS selector.
|
|
22
|
+
*/
|
|
23
|
+
querySelectorAll(parent, selector) {
|
|
24
|
+
return Array.from(parent.querySelectorAll(selector));
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Safely extract text content from an element.
|
|
28
|
+
*/
|
|
29
|
+
getTextContent(element) {
|
|
30
|
+
return element?.textContent.trim() ?? undefined;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Safely extract attribute value from an element.
|
|
34
|
+
*/
|
|
35
|
+
getAttribute(element, attr) {
|
|
36
|
+
return element?.getAttribute(attr)?.trim() ?? undefined;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Extract multiple attributes from an element.
|
|
40
|
+
*/
|
|
41
|
+
getAttributes(element, attrs) {
|
|
42
|
+
const result = {};
|
|
43
|
+
if (!element)
|
|
44
|
+
return result;
|
|
45
|
+
for (const attr of attrs) {
|
|
46
|
+
const value = this.getAttribute(element, attr);
|
|
47
|
+
if (value)
|
|
48
|
+
result[attr] = value;
|
|
49
|
+
}
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Check if element exists and has content.
|
|
54
|
+
*/
|
|
55
|
+
hasContent(element) {
|
|
56
|
+
return !!element && !!element.textContent.trim();
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=base-html-element-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"base-html-element-parser.js","sourceRoot":"","sources":["../../src/parsers/base-html-element-parser.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,OAAgB,qBAAqB;IAQzC;;;;OAIG;IACO,WAAW,CAAC,IAAc;QAClC,OAAO,IAAI,KAAK,IAAI,CAAC;IACvB,CAAC;IAED;;OAEG;IACO,aAAa,CACrB,MAA0B,EAC1B,QAAgB;QAEhB,OAAO,MAAM,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED;;OAEG;IACO,gBAAgB,CACxB,MAA0B,EAC1B,QAAgB;QAEhB,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACO,cAAc,CAAC,OAAuB;QAC9C,OAAO,OAAO,EAAE,WAAW,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC;IAClD,CAAC;IAED;;OAEG;IACO,YAAY,CACpB,OAAuB,EACvB,IAAY;QAEZ,OAAO,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;IAC1D,CAAC;IAED;;OAEG;IACO,aAAa,CACrB,OAAuB,EACvB,KAAe;QAEf,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,IAAI,CAAC,OAAO;YAAE,OAAO,MAAM,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YAC/C,IAAI,KAAK;gBAAE,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;QAClC,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACO,UAAU,CAAC,OAAuB;QAC1C,OAAO,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;IACnD,CAAC;CACF"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
interface HeadingData {
|
|
3
|
+
level: number;
|
|
4
|
+
text: string;
|
|
5
|
+
id?: string;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Parser for HTML heading elements (h1-h6) with hierarchy.
|
|
9
|
+
*/
|
|
10
|
+
export declare class HeadingElementParser extends BaseHTMLElementParser<HeadingData[]> {
|
|
11
|
+
parse(document: Document): HeadingData[] | null;
|
|
12
|
+
}
|
|
13
|
+
export {};
|
|
14
|
+
//# sourceMappingURL=heading-element-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"heading-element-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/heading-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,WAAW;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,CAAC,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,qBAAa,oBAAqB,SAAQ,qBAAqB,CAAC,WAAW,EAAE,CAAC;IAC5E,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,WAAW,EAAE,GAAG,IAAI;CAuBhD"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
/**
|
|
3
|
+
* Parser for HTML heading elements (h1-h6) with hierarchy.
|
|
4
|
+
*/
|
|
5
|
+
export class HeadingElementParser extends BaseHTMLElementParser {
|
|
6
|
+
parse(document) {
|
|
7
|
+
const headings = [];
|
|
8
|
+
// Extract all heading elements
|
|
9
|
+
for (let level = 1; level <= 6; level++) {
|
|
10
|
+
const elements = this.querySelectorAll(document, `h${level}`);
|
|
11
|
+
for (const el of elements) {
|
|
12
|
+
const text = this.getTextContent(el);
|
|
13
|
+
if (!text)
|
|
14
|
+
continue;
|
|
15
|
+
const heading = { level, text };
|
|
16
|
+
// Optional id attribute
|
|
17
|
+
const id = this.getAttribute(el, 'id');
|
|
18
|
+
if (id)
|
|
19
|
+
heading.id = id;
|
|
20
|
+
headings.push(heading);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return headings.length > 0 ? headings : null;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=heading-element-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"heading-element-parser.js","sourceRoot":"","sources":["../../src/parsers/heading-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAQtE;;GAEG;AACH,MAAM,OAAO,oBAAqB,SAAQ,qBAAoC;IAC5E,KAAK,CAAC,QAAkB;QACtB,MAAM,QAAQ,GAAkB,EAAE,CAAC;QAEnC,+BAA+B;QAC/B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,IAAI,KAAK,EAAE,CAAC,CAAC;YAE9D,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;gBACrC,IAAI,CAAC,IAAI;oBAAE,SAAS;gBAEpB,MAAM,OAAO,GAAgB,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;gBAE7C,wBAAwB;gBACxB,MAAM,EAAE,GAAG,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;gBACvC,IAAI,EAAE;oBAAE,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;gBAExB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC;IAC/C,CAAC;CACF"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
interface ImageData {
|
|
3
|
+
src?: string;
|
|
4
|
+
alt?: string;
|
|
5
|
+
width?: string;
|
|
6
|
+
height?: string;
|
|
7
|
+
loading?: string;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Parser for HTML image elements with metadata.
|
|
11
|
+
*/
|
|
12
|
+
export declare class ImageElementParser extends BaseHTMLElementParser<ImageData[]> {
|
|
13
|
+
parse(document: Document): ImageData[] | null;
|
|
14
|
+
}
|
|
15
|
+
export {};
|
|
16
|
+
//# sourceMappingURL=image-element-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image-element-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/image-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,SAAS;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,qBAAqB,CAAC,SAAS,EAAE,CAAC;IACxE,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,SAAS,EAAE,GAAG,IAAI;CA8B9C"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
/**
|
|
3
|
+
* Parser for HTML image elements with metadata.
|
|
4
|
+
*/
|
|
5
|
+
export class ImageElementParser extends BaseHTMLElementParser {
|
|
6
|
+
parse(document) {
|
|
7
|
+
const images = [];
|
|
8
|
+
// Extract all img elements with src
|
|
9
|
+
const imgElements = this.querySelectorAll(document, 'img[src]');
|
|
10
|
+
for (const img of imgElements) {
|
|
11
|
+
const src = this.getAttribute(img, 'src');
|
|
12
|
+
if (!src)
|
|
13
|
+
continue;
|
|
14
|
+
const imageData = { src };
|
|
15
|
+
// Optional attributes
|
|
16
|
+
const alt = this.getAttribute(img, 'alt');
|
|
17
|
+
if (alt)
|
|
18
|
+
imageData.alt = alt;
|
|
19
|
+
const width = this.getAttribute(img, 'width');
|
|
20
|
+
if (width)
|
|
21
|
+
imageData.width = width;
|
|
22
|
+
const height = this.getAttribute(img, 'height');
|
|
23
|
+
if (height)
|
|
24
|
+
imageData.height = height;
|
|
25
|
+
const loading = this.getAttribute(img, 'loading');
|
|
26
|
+
if (loading)
|
|
27
|
+
imageData.loading = loading;
|
|
28
|
+
images.push(imageData);
|
|
29
|
+
}
|
|
30
|
+
return images.length > 0 ? images : null;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=image-element-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image-element-parser.js","sourceRoot":"","sources":["../../src/parsers/image-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAUtE;;GAEG;AACH,MAAM,OAAO,kBAAmB,SAAQ,qBAAkC;IACxE,KAAK,CAAC,QAAkB;QACtB,MAAM,MAAM,GAAgB,EAAE,CAAC;QAE/B,oCAAoC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAEhE,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC1C,IAAI,CAAC,GAAG;gBAAE,SAAS;YAEnB,MAAM,SAAS,GAAc,EAAE,GAAG,EAAE,CAAC;YAErC,sBAAsB;YACtB,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC1C,IAAI,GAAG;gBAAE,SAAS,CAAC,GAAG,GAAG,GAAG,CAAC;YAE7B,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAC9C,IAAI,KAAK;gBAAE,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC;YAEnC,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;YAChD,IAAI,MAAM;gBAAE,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;YAEtC,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAClD,IAAI,OAAO;gBAAE,SAAS,CAAC,OAAO,GAAG,OAAO,CAAC;YAEzC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACzB,CAAC;QAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAC3C,CAAC;CACF"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
interface LinkData {
|
|
3
|
+
rel?: string;
|
|
4
|
+
href?: string;
|
|
5
|
+
type?: string;
|
|
6
|
+
title?: string;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Parser for HTML link elements (canonical, alternate, icon, etc.).
|
|
10
|
+
*/
|
|
11
|
+
export declare class LinkElementParser extends BaseHTMLElementParser<LinkData[]> {
|
|
12
|
+
parse(document: Document): LinkData[] | null;
|
|
13
|
+
}
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=link-element-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"link-element-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/link-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,QAAQ;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,qBAAqB,CAAC,QAAQ,EAAE,CAAC;IACtE,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,QAAQ,EAAE,GAAG,IAAI;CA0B7C"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
/**
|
|
3
|
+
* Parser for HTML link elements (canonical, alternate, icon, etc.).
|
|
4
|
+
*/
|
|
5
|
+
export class LinkElementParser extends BaseHTMLElementParser {
|
|
6
|
+
parse(document) {
|
|
7
|
+
const links = [];
|
|
8
|
+
// Extract all link elements with rel and href
|
|
9
|
+
const linkElements = this.querySelectorAll(document, 'link[rel][href]');
|
|
10
|
+
for (const link of linkElements) {
|
|
11
|
+
const rel = this.getAttribute(link, 'rel');
|
|
12
|
+
const href = this.getAttribute(link, 'href');
|
|
13
|
+
if (!rel || !href)
|
|
14
|
+
continue;
|
|
15
|
+
const linkData = { rel, href };
|
|
16
|
+
// Optional attributes
|
|
17
|
+
const type = this.getAttribute(link, 'type');
|
|
18
|
+
if (type)
|
|
19
|
+
linkData.type = type;
|
|
20
|
+
const title = this.getAttribute(link, 'title');
|
|
21
|
+
if (title)
|
|
22
|
+
linkData.title = title;
|
|
23
|
+
links.push(linkData);
|
|
24
|
+
}
|
|
25
|
+
return links.length > 0 ? links : null;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=link-element-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"link-element-parser.js","sourceRoot":"","sources":["../../src/parsers/link-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAStE;;GAEG;AACH,MAAM,OAAO,iBAAkB,SAAQ,qBAAiC;IACtE,KAAK,CAAC,QAAkB;QACtB,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,8CAA8C;QAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;QAExE,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAE7C,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI;gBAAE,SAAS;YAE5B,MAAM,QAAQ,GAAa,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC;YAEzC,sBAAsB;YACtB,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC7C,IAAI,IAAI;gBAAE,QAAQ,CAAC,IAAI,GAAG,IAAI,CAAC;YAE/B,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC/C,IAAI,KAAK;gBAAE,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC;YAElC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACvB,CAAC;QAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;IACzC,CAAC;CACF"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
interface OpenGraphData {
|
|
3
|
+
title?: string;
|
|
4
|
+
description?: string;
|
|
5
|
+
image?: string;
|
|
6
|
+
url?: string;
|
|
7
|
+
siteName?: string;
|
|
8
|
+
type?: string;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Parser for Open Graph meta tags (og:*).
|
|
12
|
+
*/
|
|
13
|
+
export declare class OpenGraphParser extends BaseHTMLElementParser<OpenGraphData> {
|
|
14
|
+
parse(document: Document): OpenGraphData | null;
|
|
15
|
+
}
|
|
16
|
+
export {};
|
|
17
|
+
//# sourceMappingURL=open-graph-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"open-graph-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/open-graph-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,aAAa;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,qBAAqB,CAAC,aAAa,CAAC;IACvE,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,aAAa,GAAG,IAAI;CAuChD"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
/**
|
|
3
|
+
* Parser for Open Graph meta tags (og:*).
|
|
4
|
+
*/
|
|
5
|
+
export class OpenGraphParser extends BaseHTMLElementParser {
|
|
6
|
+
parse(document) {
|
|
7
|
+
const data = {};
|
|
8
|
+
// Extract all Open Graph meta tags
|
|
9
|
+
const ogTags = this.querySelectorAll(document, 'meta[property^="og:"]');
|
|
10
|
+
for (const tag of ogTags) {
|
|
11
|
+
const property = this.getAttribute(tag, 'property');
|
|
12
|
+
const content = this.getAttribute(tag, 'content');
|
|
13
|
+
if (!property || !content)
|
|
14
|
+
continue;
|
|
15
|
+
// Extract property name (e.g., "og:title" → "title")
|
|
16
|
+
const key = property.replace('og:', '');
|
|
17
|
+
switch (key) {
|
|
18
|
+
case 'title':
|
|
19
|
+
data.title = content;
|
|
20
|
+
break;
|
|
21
|
+
case 'description':
|
|
22
|
+
data.description = content;
|
|
23
|
+
break;
|
|
24
|
+
case 'image':
|
|
25
|
+
data.image = content;
|
|
26
|
+
break;
|
|
27
|
+
case 'url':
|
|
28
|
+
data.url = content;
|
|
29
|
+
break;
|
|
30
|
+
case 'site_name':
|
|
31
|
+
data.siteName = content;
|
|
32
|
+
break;
|
|
33
|
+
case 'type':
|
|
34
|
+
data.type = content;
|
|
35
|
+
break;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return Object.keys(data).length > 0 ? data : null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=open-graph-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"open-graph-parser.js","sourceRoot":"","sources":["../../src/parsers/open-graph-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAWtE;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,qBAAoC;IACvE,KAAK,CAAC,QAAkB;QACtB,MAAM,IAAI,GAAkB,EAAE,CAAC;QAE/B,mCAAmC;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,uBAAuB,CAAC,CAAC;QAExE,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;YACpD,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAElD,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO;gBAAE,SAAS;YAEpC,qDAAqD;YACrD,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAExC,QAAQ,GAAG,EAAE,CAAC;gBACZ,KAAK,OAAO;oBACV,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC;oBACrB,MAAM;gBACR,KAAK,aAAa;oBAChB,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC;oBAC3B,MAAM;gBACR,KAAK,OAAO;oBACV,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC;oBACrB,MAAM;gBACR,KAAK,KAAK;oBACR,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC;oBACnB,MAAM;gBACR,KAAK,WAAW;oBACd,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;oBACxB,MAAM;gBACR,KAAK,MAAM;oBACT,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC;oBACpB,MAAM;YACV,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACpD,CAAC;CACF"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
interface SchemaOrgData {
|
|
3
|
+
type?: string;
|
|
4
|
+
name?: string;
|
|
5
|
+
description?: string;
|
|
6
|
+
image?: string;
|
|
7
|
+
url?: string;
|
|
8
|
+
[key: string]: unknown;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Parser for JSON-LD Schema.org structured data.
|
|
12
|
+
*/
|
|
13
|
+
export declare class SchemaOrgParser extends BaseHTMLElementParser<SchemaOrgData[]> {
|
|
14
|
+
parse(document: Document): SchemaOrgData[] | null;
|
|
15
|
+
}
|
|
16
|
+
export {};
|
|
17
|
+
//# sourceMappingURL=schema-org-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema-org-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/schema-org-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,aAAa;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,qBAAqB,CAAC,aAAa,EAAE,CAAC;IACzE,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,aAAa,EAAE,GAAG,IAAI;CA8BlD"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
/**
|
|
3
|
+
* Parser for JSON-LD Schema.org structured data.
|
|
4
|
+
*/
|
|
5
|
+
export class SchemaOrgParser extends BaseHTMLElementParser {
|
|
6
|
+
parse(document) {
|
|
7
|
+
const results = [];
|
|
8
|
+
// Find all JSON-LD script tags
|
|
9
|
+
const scripts = this.querySelectorAll(document, 'script[type="application/ld+json"]');
|
|
10
|
+
for (const script of scripts) {
|
|
11
|
+
const textContent = this.getTextContent(script);
|
|
12
|
+
if (!textContent)
|
|
13
|
+
continue;
|
|
14
|
+
try {
|
|
15
|
+
const data = JSON.parse(textContent);
|
|
16
|
+
// Handle both single objects and arrays
|
|
17
|
+
if (Array.isArray(data)) {
|
|
18
|
+
results.push(...data);
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
results.push(data);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
// Skip invalid JSON
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return results.length > 0 ? results : null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=schema-org-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema-org-parser.js","sourceRoot":"","sources":["../../src/parsers/schema-org-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAWtE;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,qBAAsC;IACzE,KAAK,CAAC,QAAkB;QACtB,MAAM,OAAO,GAAoB,EAAE,CAAC;QAEpC,+BAA+B;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CACnC,QAAQ,EACR,oCAAoC,CACrC,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;YAChD,IAAI,CAAC,WAAW;gBAAE,SAAS;YAE3B,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAoC,CAAC;gBAExE,wCAAwC;gBACxC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBACxB,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;gBACxB,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrB,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,oBAAoB;gBACpB,SAAS;YACX,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IAC7C,CAAC;CACF"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
interface StandardMetadata {
|
|
3
|
+
title?: string;
|
|
4
|
+
description?: string;
|
|
5
|
+
keywords?: string;
|
|
6
|
+
author?: string;
|
|
7
|
+
viewport?: string;
|
|
8
|
+
charset?: string;
|
|
9
|
+
[key: string]: string | undefined;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Parser for standard HTML meta tags (name="*" and charset).
|
|
13
|
+
*/
|
|
14
|
+
export declare class StandardMetaParser extends BaseHTMLElementParser<StandardMetadata> {
|
|
15
|
+
parse(document: Document): StandardMetadata | null;
|
|
16
|
+
}
|
|
17
|
+
export {};
|
|
18
|
+
//# sourceMappingURL=standard-meta-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"standard-meta-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/standard-meta-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,gBAAgB;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;CACnC;AAED;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,qBAAqB,CAAC,gBAAgB,CAAC;IAC7E,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,gBAAgB,GAAG,IAAI;CA+BnD"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { BaseHTMLElementParser } from './base-html-element-parser.js';
|
|
2
|
+
/**
|
|
3
|
+
* Parser for standard HTML meta tags (name="*" and charset).
|
|
4
|
+
*/
|
|
5
|
+
export class StandardMetaParser extends BaseHTMLElementParser {
|
|
6
|
+
parse(document) {
|
|
7
|
+
const data = {};
|
|
8
|
+
// Extract charset
|
|
9
|
+
const charsetMeta = this.querySelector(document, 'meta[charset]');
|
|
10
|
+
if (charsetMeta) {
|
|
11
|
+
data.charset = this.getAttribute(charsetMeta, 'charset');
|
|
12
|
+
}
|
|
13
|
+
// Extract standard meta tags
|
|
14
|
+
const metaTags = this.querySelectorAll(document, 'meta[name][content]');
|
|
15
|
+
for (const tag of metaTags) {
|
|
16
|
+
const name = this.getAttribute(tag, 'name');
|
|
17
|
+
const content = this.getAttribute(tag, 'content');
|
|
18
|
+
if (name && content) {
|
|
19
|
+
data[name] = content;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
// Extract title if not already present
|
|
23
|
+
if (!data.title) {
|
|
24
|
+
const titleEl = this.querySelector(document, 'title');
|
|
25
|
+
if (titleEl) {
|
|
26
|
+
data.title = this.getTextContent(titleEl);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return Object.keys(data).length > 0 ? data : null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=standard-meta-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"standard-meta-parser.js","sourceRoot":"","sources":["../../src/parsers/standard-meta-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAYtE;;GAEG;AACH,MAAM,OAAO,kBAAmB,SAAQ,qBAAuC;IAC7E,KAAK,CAAC,QAAkB;QACtB,MAAM,IAAI,GAAqB,EAAE,CAAC;QAElC,kBAAkB;QAClB,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC;QAClE,IAAI,WAAW,EAAE,CAAC;YAChB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QAC3D,CAAC;QAED,6BAA6B;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,qBAAqB,CAAC,CAAC;QAExE,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAElD,IAAI,IAAI,IAAI,OAAO,EAAE,CAAC;gBACpB,IAAI,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC;YACvB,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YACtD,IAAI,OAAO,EAAE,CAAC;gBACZ,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACpD,CAAC;CACF"}
|