@j0hanz/superfetch 1.0.6 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/README.md +228 -36
  2. package/dist/config/index.d.ts +10 -5
  3. package/dist/config/index.d.ts.map +1 -1
  4. package/dist/config/index.js +73 -19
  5. package/dist/config/index.js.map +1 -1
  6. package/dist/config/types.d.ts +98 -57
  7. package/dist/config/types.d.ts.map +1 -1
  8. package/dist/errors/app-error.d.ts +4 -28
  9. package/dist/errors/app-error.d.ts.map +1 -1
  10. package/dist/errors/app-error.js +10 -51
  11. package/dist/errors/app-error.js.map +1 -1
  12. package/dist/index.js +10 -55
  13. package/dist/index.js.map +1 -1
  14. package/dist/middleware/error-handler.d.ts +2 -2
  15. package/dist/middleware/error-handler.d.ts.map +1 -1
  16. package/dist/middleware/error-handler.js +12 -14
  17. package/dist/middleware/error-handler.js.map +1 -1
  18. package/dist/middleware/rate-limiter.d.ts.map +1 -1
  19. package/dist/middleware/rate-limiter.js +0 -8
  20. package/dist/middleware/rate-limiter.js.map +1 -1
  21. package/dist/parsers/base-html-element-parser.d.ts +43 -0
  22. package/dist/parsers/base-html-element-parser.d.ts.map +1 -0
  23. package/dist/parsers/base-html-element-parser.js +59 -0
  24. package/dist/parsers/base-html-element-parser.js.map +1 -0
  25. package/dist/parsers/heading-element-parser.d.ts +14 -0
  26. package/dist/parsers/heading-element-parser.d.ts.map +1 -0
  27. package/dist/parsers/heading-element-parser.js +26 -0
  28. package/dist/parsers/heading-element-parser.js.map +1 -0
  29. package/dist/parsers/image-element-parser.d.ts +16 -0
  30. package/dist/parsers/image-element-parser.d.ts.map +1 -0
  31. package/dist/parsers/image-element-parser.js +33 -0
  32. package/dist/parsers/image-element-parser.js.map +1 -0
  33. package/dist/parsers/link-element-parser.d.ts +15 -0
  34. package/dist/parsers/link-element-parser.d.ts.map +1 -0
  35. package/dist/parsers/link-element-parser.js +28 -0
  36. package/dist/parsers/link-element-parser.js.map +1 -0
  37. package/dist/parsers/open-graph-parser.d.ts +17 -0
  38. package/dist/parsers/open-graph-parser.d.ts.map +1 -0
  39. package/dist/parsers/open-graph-parser.js +41 -0
  40. package/dist/parsers/open-graph-parser.js.map +1 -0
  41. package/dist/parsers/schema-org-parser.d.ts +17 -0
  42. package/dist/parsers/schema-org-parser.d.ts.map +1 -0
  43. package/dist/parsers/schema-org-parser.js +32 -0
  44. package/dist/parsers/schema-org-parser.js.map +1 -0
  45. package/dist/parsers/standard-meta-parser.d.ts +18 -0
  46. package/dist/parsers/standard-meta-parser.d.ts.map +1 -0
  47. package/dist/parsers/standard-meta-parser.js +32 -0
  48. package/dist/parsers/standard-meta-parser.js.map +1 -0
  49. package/dist/parsers/twitter-card-parser.d.ts +17 -0
  50. package/dist/parsers/twitter-card-parser.d.ts.map +1 -0
  51. package/dist/parsers/twitter-card-parser.js +41 -0
  52. package/dist/parsers/twitter-card-parser.js.map +1 -0
  53. package/dist/resources/cached-content.d.ts +0 -1
  54. package/dist/resources/cached-content.d.ts.map +1 -1
  55. package/dist/resources/cached-content.js +3 -9
  56. package/dist/resources/cached-content.js.map +1 -1
  57. package/dist/resources/index.d.ts.map +1 -1
  58. package/dist/resources/index.js +8 -8
  59. package/dist/resources/index.js.map +1 -1
  60. package/dist/server.d.ts.map +1 -1
  61. package/dist/server.js +10 -10
  62. package/dist/server.js.map +1 -1
  63. package/dist/services/cache.d.ts +0 -28
  64. package/dist/services/cache.d.ts.map +1 -1
  65. package/dist/services/cache.js +10 -173
  66. package/dist/services/cache.js.map +1 -1
  67. package/dist/services/extractor.d.ts +1 -11
  68. package/dist/services/extractor.d.ts.map +1 -1
  69. package/dist/services/extractor.js +86 -84
  70. package/dist/services/extractor.js.map +1 -1
  71. package/dist/services/fetcher.d.ts +2 -13
  72. package/dist/services/fetcher.d.ts.map +1 -1
  73. package/dist/services/fetcher.js +195 -211
  74. package/dist/services/fetcher.js.map +1 -1
  75. package/dist/services/logger.d.ts +5 -4
  76. package/dist/services/logger.d.ts.map +1 -1
  77. package/dist/services/logger.js +27 -42
  78. package/dist/services/logger.js.map +1 -1
  79. package/dist/services/parser.d.ts.map +1 -1
  80. package/dist/services/parser.js +35 -26
  81. package/dist/services/parser.js.map +1 -1
  82. package/dist/services/session-manager.d.ts +18 -0
  83. package/dist/services/session-manager.d.ts.map +1 -0
  84. package/dist/services/session-manager.js +73 -0
  85. package/dist/services/session-manager.js.map +1 -0
  86. package/dist/strategies/exponential-backoff-strategy.d.ts +13 -0
  87. package/dist/strategies/exponential-backoff-strategy.d.ts.map +1 -0
  88. package/dist/strategies/exponential-backoff-strategy.js +32 -0
  89. package/dist/strategies/exponential-backoff-strategy.js.map +1 -0
  90. package/dist/tools/handlers/fetch-links.tool.d.ts +2 -9
  91. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  92. package/dist/tools/handlers/fetch-links.tool.js +0 -1
  93. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  94. package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -2
  95. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  96. package/dist/tools/handlers/fetch-markdown.tool.js +23 -33
  97. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  98. package/dist/tools/handlers/fetch-url.tool.d.ts +2 -9
  99. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  100. package/dist/tools/handlers/fetch-url.tool.js +15 -20
  101. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  102. package/dist/tools/handlers/fetch-urls.tool.d.ts +2 -9
  103. package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
  104. package/dist/tools/handlers/fetch-urls.tool.js +124 -105
  105. package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
  106. package/dist/tools/index.d.ts.map +1 -1
  107. package/dist/tools/index.js +0 -4
  108. package/dist/tools/index.js.map +1 -1
  109. package/dist/tools/utils/common.d.ts +6 -7
  110. package/dist/tools/utils/common.d.ts.map +1 -1
  111. package/dist/tools/utils/common.js +8 -8
  112. package/dist/tools/utils/common.js.map +1 -1
  113. package/dist/tools/utils/fetch-pipeline.d.ts +8 -0
  114. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
  115. package/dist/tools/utils/fetch-pipeline.js +47 -79
  116. package/dist/tools/utils/fetch-pipeline.js.map +1 -1
  117. package/dist/transformers/jsonl.transformer.d.ts +1 -1
  118. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  119. package/dist/transformers/jsonl.transformer.js +15 -10
  120. package/dist/transformers/jsonl.transformer.js.map +1 -1
  121. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  122. package/dist/transformers/markdown.transformer.js +58 -62
  123. package/dist/transformers/markdown.transformer.js.map +1 -1
  124. package/dist/utils/concurrency.d.ts +2 -5
  125. package/dist/utils/concurrency.d.ts.map +1 -1
  126. package/dist/utils/concurrency.js +19 -19
  127. package/dist/utils/concurrency.js.map +1 -1
  128. package/dist/utils/content-cleaner.d.ts +0 -25
  129. package/dist/utils/content-cleaner.d.ts.map +1 -1
  130. package/dist/utils/content-cleaner.js +12 -187
  131. package/dist/utils/content-cleaner.js.map +1 -1
  132. package/dist/utils/html-truncator.d.ts +2 -0
  133. package/dist/utils/html-truncator.d.ts.map +1 -0
  134. package/dist/utils/html-truncator.js +14 -0
  135. package/dist/utils/html-truncator.js.map +1 -0
  136. package/dist/utils/language-detector.d.ts +0 -3
  137. package/dist/utils/language-detector.d.ts.map +1 -1
  138. package/dist/utils/language-detector.js +0 -11
  139. package/dist/utils/language-detector.js.map +1 -1
  140. package/dist/utils/sanitizer.d.ts.map +1 -1
  141. package/dist/utils/sanitizer.js +7 -5
  142. package/dist/utils/sanitizer.js.map +1 -1
  143. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  144. package/dist/utils/tool-error-handler.js +15 -42
  145. package/dist/utils/tool-error-handler.js.map +1 -1
  146. package/dist/utils/url-validator.d.ts +0 -6
  147. package/dist/utils/url-validator.d.ts.map +1 -1
  148. package/dist/utils/url-validator.js +12 -81
  149. package/dist/utils/url-validator.js.map +1 -1
  150. package/package.json +5 -6
@@ -1,26 +1,24 @@
1
- import { AppError, RateLimitError, ValidationError, } from '../errors/app-error.js';
1
+ import { FetchError } from '../errors/app-error.js';
2
2
  import { logError } from '../services/logger.js';
3
- export function errorHandler(err, _req, res,
4
- // eslint-disable-next-line @typescript-eslint/no-unused-vars -- Required by Express error handler signature
5
- _next) {
6
- const isAppError = err instanceof AppError;
7
- const statusCode = isAppError ? err.statusCode : 500;
8
- const code = isAppError ? err.code : 'INTERNAL_ERROR';
9
- const message = isAppError && err.isOperational ? err.message : 'Internal Server Error';
10
- logError(`HTTP ${statusCode}: ${err.message}`, err);
11
- if (err instanceof RateLimitError) {
12
- res.set('Retry-After', String(err.retryAfter));
3
+ export function errorHandler(err, req, res) {
4
+ const isFetchError = err instanceof FetchError;
5
+ const statusCode = isFetchError ? err.statusCode : 500;
6
+ const code = isFetchError ? err.code : 'INTERNAL_ERROR';
7
+ const message = isFetchError ? err.message : 'Internal Server Error';
8
+ logError(`HTTP ${statusCode}: ${err.message} - ${req.method} ${req.path}`, err);
9
+ if (isFetchError && err.code === 'RATE_LIMITED' && err.details.retryAfter) {
10
+ const retryAfter = err.details.retryAfter;
11
+ res.set('Retry-After', String(retryAfter));
13
12
  }
14
13
  const response = {
15
14
  error: {
16
15
  message,
17
16
  code,
18
17
  statusCode,
18
+ ...(isFetchError &&
19
+ Object.keys(err.details).length > 0 && { details: err.details }),
19
20
  },
20
21
  };
21
- if (err instanceof ValidationError && err.details) {
22
- response.error.details = err.details;
23
- }
24
22
  if (process.env.NODE_ENV === 'development') {
25
23
  response.error.stack = err.stack;
26
24
  }
@@ -1 +1 @@
1
- {"version":3,"file":"error-handler.js","sourceRoot":"","sources":["../../src/middleware/error-handler.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,QAAQ,EACR,cAAc,EACd,eAAe,GAChB,MAAM,wBAAwB,CAAC;AAEhC,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAEjD,MAAM,UAAU,YAAY,CAC1B,GAAU,EACV,IAAa,EACb,GAAa;AACb,4GAA4G;AAC5G,KAAmB;IAEnB,MAAM,UAAU,GAAG,GAAG,YAAY,QAAQ,CAAC;IAC3C,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC;IACrD,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,gBAAgB,CAAC;IACtD,MAAM,OAAO,GACX,UAAU,IAAI,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,uBAAuB,CAAC;IAE1E,QAAQ,CAAC,QAAQ,UAAU,KAAK,GAAG,CAAC,OAAO,EAAE,EAAE,GAAG,CAAC,CAAC;IAEpD,IAAI,GAAG,YAAY,cAAc,EAAE,CAAC;QAClC,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC;IACjD,CAAC;IAED,MAAM,QAAQ,GAAkB;QAC9B,KAAK,EAAE;YACL,OAAO;YACP,IAAI;YACJ,UAAU;SACX;KACF,CAAC;IAEF,IAAI,GAAG,YAAY,eAAe,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;QAClD,QAAQ,CAAC,KAAK,CAAC,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;IACvC,CAAC;IAED,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,aAAa,EAAE,CAAC;QAC3C,QAAQ,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;IACnC,CAAC;IAED,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AACxC,CAAC"}
1
+ {"version":3,"file":"error-handler.js","sourceRoot":"","sources":["../../src/middleware/error-handler.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAEpD,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAEjD,MAAM,UAAU,YAAY,CAAC,GAAU,EAAE,GAAY,EAAE,GAAa;IAClE,MAAM,YAAY,GAAG,GAAG,YAAY,UAAU,CAAC;IAC/C,MAAM,UAAU,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC;IACvD,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,gBAAgB,CAAC;IACxD,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,uBAAuB,CAAC;IAErE,QAAQ,CACN,QAAQ,UAAU,KAAK,GAAG,CAAC,OAAO,MAAM,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,IAAI,EAAE,EAChE,GAAG,CACJ,CAAC;IAEF,IAAI,YAAY,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,IAAI,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;QAC1E,MAAM,UAAU,GAAG,GAAG,CAAC,OAAO,CAAC,UAAoB,CAAC;QACpD,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;IAC7C,CAAC;IAED,MAAM,QAAQ,GAAkB;QAC9B,KAAK,EAAE;YACL,OAAO;YACP,IAAI;YACJ,UAAU;YACV,GAAG,CAAC,YAAY;gBACd,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC;SACnE;KACF,CAAC;IAEF,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,aAAa,EAAE,CAAC;QAC3C,QAAQ,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC;IACnC,CAAC;IAED,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AACxC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"rate-limiter.d.ts","sourceRoot":"","sources":["../../src/middleware/rate-limiter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAE/D,OAAO,KAAK,EAAkB,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAqB7E,cAAM,WAAW;IACf,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqC;IAC3D,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,eAAe,CAA+C;gBAE1D,OAAO,GAAE,OAAO,CAAC,kBAAkB,CAAM;IAmBrD,OAAO,IAAI,IAAI;IAQf,UAAU,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,YAAY,KAAK,IAAI;IAkDvE,OAAO,CAAC,MAAM;IAyBd,OAAO,CAAC,OAAO;CAUhB;AAID,eAAO,MAAM,WAAW,aAGtB,CAAC"}
1
+ {"version":3,"file":"rate-limiter.d.ts","sourceRoot":"","sources":["../../src/middleware/rate-limiter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAE/D,OAAO,KAAK,EAAkB,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAoB7E,cAAM,WAAW;IACf,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqC;IAC3D,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,eAAe,CAA+C;gBAE1D,OAAO,GAAE,OAAO,CAAC,kBAAkB,CAAM;IAmBrD,OAAO,IAAI,IAAI;IAQf,UAAU,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,YAAY,KAAK,IAAI;IA8CvE,OAAO,CAAC,MAAM;IAwBd,OAAO,CAAC,OAAO;CAUhB;AAED,eAAO,MAAM,WAAW,aAGtB,CAAC"}
@@ -7,7 +7,6 @@ const MIN_MAX_REQUESTS = 1;
7
7
  const MAX_MAX_REQUESTS = 10000;
8
8
  const MIN_WINDOW_MS = 1000;
9
9
  const MAX_WINDOW_MS = 3600000;
10
- // Trusted proxy IPs - only trust proxy headers from these sources
11
10
  const TRUSTED_PROXIES = new Set((process.env.TRUSTED_PROXIES ?? '')
12
11
  .split(',')
13
12
  .map((ip) => ip.trim())
@@ -40,7 +39,6 @@ class RateLimiter {
40
39
  const now = Date.now();
41
40
  let entry = this.store.get(key);
42
41
  if (!entry || now > entry.resetTime) {
43
- // New window: start with count=1 (this request)
44
42
  entry = {
45
43
  count: 1,
46
44
  resetTime: now + this.windowMs,
@@ -49,7 +47,6 @@ class RateLimiter {
49
47
  this.store.set(key, entry);
50
48
  }
51
49
  else {
52
- // Check limit BEFORE increment to prevent race condition allowing maxRequests+1
53
50
  if (entry.count >= this.maxRequests) {
54
51
  const retryAfter = Math.ceil((entry.resetTime - now) / 1000);
55
52
  res.set('Retry-After', String(retryAfter));
@@ -59,11 +56,9 @@ class RateLimiter {
59
56
  });
60
57
  return;
61
58
  }
62
- // Existing window: increment after check
63
59
  entry.count++;
64
60
  entry.lastAccessed = now;
65
61
  }
66
- // Check should never trigger now, but keep as defensive programming
67
62
  if (entry.count > this.maxRequests) {
68
63
  const retryAfter = Math.ceil((entry.resetTime - now) / 1000);
69
64
  res.set('Retry-After', String(retryAfter));
@@ -81,7 +76,6 @@ class RateLimiter {
81
76
  }
82
77
  getKey(req) {
83
78
  const fallback = req.ip ?? req.socket.remoteAddress ?? 'unknown';
84
- // Only trust proxy headers if request comes from trusted proxy
85
79
  const sourceIp = req.socket.remoteAddress ?? '';
86
80
  const isTrustedProxy = TRUSTED_PROXIES.size === 0 || TRUSTED_PROXIES.has(sourceIp);
87
81
  let ip = fallback;
@@ -110,8 +104,6 @@ class RateLimiter {
110
104
  }
111
105
  }
112
106
  }
113
- // Create default rate limiter instance
114
- // Override via RateLimiter constructor if different values needed
115
107
  export const rateLimiter = new RateLimiter({
116
108
  maxRequests: 100,
117
109
  windowMs: 60000,
@@ -1 +1 @@
1
- {"version":3,"file":"rate-limiter.js","sourceRoot":"","sources":["../../src/middleware/rate-limiter.ts"],"names":[],"mappings":"AAIA,MAAM,eAAe,GAAuB;IAC1C,WAAW,EAAE,GAAG;IAChB,QAAQ,EAAE,KAAK;IACf,iBAAiB,EAAE,KAAK;CACzB,CAAC;AAEF,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAC3B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,aAAa,GAAG,IAAI,CAAC;AAC3B,MAAM,aAAa,GAAG,OAAO,CAAC;AAE9B,kEAAkE;AAClE,MAAM,eAAe,GAAG,IAAI,GAAG,CAC7B,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC;KAChC,KAAK,CAAC,GAAG,CAAC;KACV,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,EAAE,CAAC;KACtB,MAAM,CAAC,OAAO,CAAC,CACnB,CAAC;AAEF,MAAM,WAAW;IACE,KAAK,GAAG,IAAI,GAAG,EAA0B,CAAC;IAC1C,WAAW,CAAS;IACpB,QAAQ,CAAS;IAC1B,eAAe,GAA0C,IAAI,CAAC;IAEtE,YAAY,UAAuC,EAAE;QACnD,MAAM,IAAI,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;QAEhD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CACzB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,gBAAgB,CAAC,EAC5C,gBAAgB,CACjB,CAAC;QACF,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,CACtB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,EACtC,aAAa,CACd,CAAC;QACF,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;QAExE,IAAI,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;YACtC,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC,EAAE,eAAe,CAAC,CAAC;QACpB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;IAED,OAAO;QACL,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACpC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC9B,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;IAED,UAAU;QACR,OAAO,CAAC,GAAY,EAAE,GAAa,EAAE,IAAkB,EAAQ,EAAE;YAC/D,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEvB,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEhC,IAAI,CAAC,KAAK,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;gBACpC,gDAAgD;gBAChD,KAAK,GAAG;oBACN,KAAK,EAAE,CAAC;oBACR,SAAS,EAAE,GAAG,GAAG,IAAI,CAAC,QAAQ;oBAC9B,YAAY,EAAE,GAAG;iBAClB,CAAC;gBACF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,gFAAgF;gBAChF,IAAI,KAAK,CAAC,KAAK,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;oBACpC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;oBAC7D,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;oBAC3C,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;wBACnB,KAAK,EAAE,mBAAmB;wBAC1B,UAAU;qBACX,CAAC,CAAC;oBACH,OAAO;gBACT,CAAC;gBACD,yCAAyC;gBACzC,KAAK,CAAC,KAAK,EAAE,CAAC;gBACd,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC;YAC3B,CAAC;YAED,oEAAoE;YACpE,IAAI,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;gBACnC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;gBAC7D,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC3C,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,mBAAmB;oBAC1B,UAAU;iBACX,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,GAAG,CAAC,GAAG,CAAC,mBAAmB,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;YACvD,GAAG,CAAC,GAAG,CAAC,uBAAuB,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;YACzE,GAAG,CAAC,GAAG,CAAC,mBAAmB,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAExE,IAAI,EAAE,CAAC;QACT,CAAC,CAAC;IACJ,CAAC;IAEO,MAAM,CAAC,GAAY;QACzB,MAAM,QAAQ,GAAG,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,aAAa,IAAI,SAAS,CAAC;QAEjE,+DAA+D;QAC/D,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,CAAC;QAChD,MAAM,cAAc,GAClB,eAAe,CAAC,IAAI,KAAK,CAAC,IAAI,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE9D,IAAI,EAAE,GAAW,QAAQ,CAAC;QAC1B,IAAI,cAAc,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YACxC,MAAM,YAAY,GAAG,GAAG,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC;YAEpD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,EAAE,CAAC;gBACzC,EAAE,GAAG,MAAM,CAAC;YACd,CAAC;iBAAM,IAAI,OAAO,YAAY,KAAK,QAAQ,EAAE,CAAC;gBAC5C,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBACnD,IAAI,OAAO;oBAAE,EAAE,GAAG,OAAO,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrE,OAAO,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;IACtD,CAAC;IAEO,OAAO;QACb,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,aAAa,GAAG,OAAO,CAAC,CAAC,SAAS;QAExC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACtC,IAAI,KAAK,CAAC,SAAS,GAAG,GAAG,IAAI,GAAG,GAAG,KAAK,CAAC,YAAY,GAAG,aAAa,EAAE,CAAC;gBACtE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;CACF;AAED,uCAAuC;AACvC,kEAAkE;AAClE,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,CAAC;IACzC,WAAW,EAAE,GAAG;IAChB,QAAQ,EAAE,KAAK;CAChB,CAAC,CAAC"}
1
+ {"version":3,"file":"rate-limiter.js","sourceRoot":"","sources":["../../src/middleware/rate-limiter.ts"],"names":[],"mappings":"AAIA,MAAM,eAAe,GAAuB;IAC1C,WAAW,EAAE,GAAG;IAChB,QAAQ,EAAE,KAAK;IACf,iBAAiB,EAAE,KAAK;CACzB,CAAC;AAEF,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAC3B,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,aAAa,GAAG,IAAI,CAAC;AAC3B,MAAM,aAAa,GAAG,OAAO,CAAC;AAE9B,MAAM,eAAe,GAAG,IAAI,GAAG,CAC7B,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC;KAChC,KAAK,CAAC,GAAG,CAAC;KACV,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,EAAE,CAAC;KACtB,MAAM,CAAC,OAAO,CAAC,CACnB,CAAC;AAEF,MAAM,WAAW;IACE,KAAK,GAAG,IAAI,GAAG,EAA0B,CAAC;IAC1C,WAAW,CAAS;IACpB,QAAQ,CAAS;IAC1B,eAAe,GAA0C,IAAI,CAAC;IAEtE,YAAY,UAAuC,EAAE;QACnD,MAAM,IAAI,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;QAEhD,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CACzB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,gBAAgB,CAAC,EAC5C,gBAAgB,CACjB,CAAC;QACF,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,CACtB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,aAAa,CAAC,EACtC,aAAa,CACd,CAAC;QACF,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;QAExE,IAAI,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;YACtC,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC,EAAE,eAAe,CAAC,CAAC;QACpB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;IAED,OAAO;QACL,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACpC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC9B,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;IAED,UAAU;QACR,OAAO,CAAC,GAAY,EAAE,GAAa,EAAE,IAAkB,EAAQ,EAAE;YAC/D,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEvB,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEhC,IAAI,CAAC,KAAK,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;gBACpC,KAAK,GAAG;oBACN,KAAK,EAAE,CAAC;oBACR,SAAS,EAAE,GAAG,GAAG,IAAI,CAAC,QAAQ;oBAC9B,YAAY,EAAE,GAAG;iBAClB,CAAC;gBACF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,IAAI,KAAK,CAAC,KAAK,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;oBACpC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;oBAC7D,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;oBAC3C,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;wBACnB,KAAK,EAAE,mBAAmB;wBAC1B,UAAU;qBACX,CAAC,CAAC;oBACH,OAAO;gBACT,CAAC;gBACD,KAAK,CAAC,KAAK,EAAE,CAAC;gBACd,KAAK,CAAC,YAAY,GAAG,GAAG,CAAC;YAC3B,CAAC;YAED,IAAI,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;gBACnC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;gBAC7D,GAAG,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC3C,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;oBACnB,KAAK,EAAE,mBAAmB;oBAC1B,UAAU;iBACX,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,GAAG,CAAC,GAAG,CAAC,mBAAmB,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;YACvD,GAAG,CAAC,GAAG,CAAC,uBAAuB,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;YACzE,GAAG,CAAC,GAAG,CAAC,mBAAmB,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAExE,IAAI,EAAE,CAAC;QACT,CAAC,CAAC;IACJ,CAAC;IAEO,MAAM,CAAC,GAAY;QACzB,MAAM,QAAQ,GAAG,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,aAAa,IAAI,SAAS,CAAC;QAEjE,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,aAAa,IAAI,EAAE,CAAC;QAChD,MAAM,cAAc,GAClB,eAAe,CAAC,IAAI,KAAK,CAAC,IAAI,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE9D,IAAI,EAAE,GAAW,QAAQ,CAAC;QAC1B,IAAI,cAAc,EAAE,CAAC;YACnB,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YACxC,MAAM,YAAY,GAAG,GAAG,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC;YAEpD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,EAAE,CAAC;gBACzC,EAAE,GAAG,MAAM,CAAC;YACd,CAAC;iBAAM,IAAI,OAAO,YAAY,KAAK,QAAQ,EAAE,CAAC;gBAC5C,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBACnD,IAAI,OAAO;oBAAE,EAAE,GAAG,OAAO,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACrE,OAAO,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;IACtD,CAAC;IAEO,OAAO;QACb,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,MAAM,aAAa,GAAG,OAAO,CAAC,CAAC,SAAS;QAExC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACtC,IAAI,KAAK,CAAC,SAAS,GAAG,GAAG,IAAI,GAAG,GAAG,KAAK,CAAC,YAAY,GAAG,aAAa,EAAE,CAAC;gBACtE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;CACF;AAED,MAAM,CAAC,MAAM,WAAW,GAAG,IAAI,WAAW,CAAC;IACzC,WAAW,EAAE,GAAG;IAChB,QAAQ,EAAE,KAAK;CAChB,CAAC,CAAC"}
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Base class for HTML element parsers that extract specific content types.
3
+ * Provides common selector querying and attribute extraction functionality.
4
+ */
5
+ export declare abstract class BaseHTMLElementParser<T> {
6
+ /**
7
+ * Parse HTML content and extract structured data.
8
+ * @param document - The JSDOM document to parse
9
+ * @returns Parsed data of type T or null if extraction fails
10
+ */
11
+ abstract parse(document: Document): T | null;
12
+ /**
13
+ * Validate the parsed data conforms to expected type.
14
+ * @param data - The data to validate
15
+ * @returns True if data is valid
16
+ */
17
+ protected isValidData(data: T | null): data is T;
18
+ /**
19
+ * Query a single element using CSS selector.
20
+ */
21
+ protected querySelector(parent: Document | Element, selector: string): Element | null;
22
+ /**
23
+ * Query multiple elements using CSS selector.
24
+ */
25
+ protected querySelectorAll(parent: Document | Element, selector: string): Element[];
26
+ /**
27
+ * Safely extract text content from an element.
28
+ */
29
+ protected getTextContent(element: Element | null): string | undefined;
30
+ /**
31
+ * Safely extract attribute value from an element.
32
+ */
33
+ protected getAttribute(element: Element | null, attr: string): string | undefined;
34
+ /**
35
+ * Extract multiple attributes from an element.
36
+ */
37
+ protected getAttributes(element: Element | null, attrs: string[]): Record<string, string>;
38
+ /**
39
+ * Check if element exists and has content.
40
+ */
41
+ protected hasContent(element: Element | null): boolean;
42
+ }
43
+ //# sourceMappingURL=base-html-element-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-html-element-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/base-html-element-parser.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,8BAAsB,qBAAqB,CAAC,CAAC;IAC3C;;;;OAIG;IACH,QAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,CAAC,GAAG,IAAI;IAE5C;;;;OAIG;IACH,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,GAAG,IAAI,GAAG,IAAI,IAAI,CAAC;IAIhD;;OAEG;IACH,SAAS,CAAC,aAAa,CACrB,MAAM,EAAE,QAAQ,GAAG,OAAO,EAC1B,QAAQ,EAAE,MAAM,GACf,OAAO,GAAG,IAAI;IAIjB;;OAEG;IACH,SAAS,CAAC,gBAAgB,CACxB,MAAM,EAAE,QAAQ,GAAG,OAAO,EAC1B,QAAQ,EAAE,MAAM,GACf,OAAO,EAAE;IAIZ;;OAEG;IACH,SAAS,CAAC,cAAc,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,GAAG,MAAM,GAAG,SAAS;IAIrE;;OAEG;IACH,SAAS,CAAC,YAAY,CACpB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,IAAI,EAAE,MAAM,GACX,MAAM,GAAG,SAAS;IAIrB;;OAEG;IACH,SAAS,CAAC,aAAa,CACrB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,KAAK,EAAE,MAAM,EAAE,GACd,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAWzB;;OAEG;IACH,SAAS,CAAC,UAAU,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,GAAG,OAAO;CAGvD"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Base class for HTML element parsers that extract specific content types.
3
+ * Provides common selector querying and attribute extraction functionality.
4
+ */
5
+ export class BaseHTMLElementParser {
6
+ /**
7
+ * Validate the parsed data conforms to expected type.
8
+ * @param data - The data to validate
9
+ * @returns True if data is valid
10
+ */
11
+ isValidData(data) {
12
+ return data !== null;
13
+ }
14
+ /**
15
+ * Query a single element using CSS selector.
16
+ */
17
+ querySelector(parent, selector) {
18
+ return parent.querySelector(selector);
19
+ }
20
+ /**
21
+ * Query multiple elements using CSS selector.
22
+ */
23
+ querySelectorAll(parent, selector) {
24
+ return Array.from(parent.querySelectorAll(selector));
25
+ }
26
+ /**
27
+ * Safely extract text content from an element.
28
+ */
29
+ getTextContent(element) {
30
+ return element?.textContent.trim() ?? undefined;
31
+ }
32
+ /**
33
+ * Safely extract attribute value from an element.
34
+ */
35
+ getAttribute(element, attr) {
36
+ return element?.getAttribute(attr)?.trim() ?? undefined;
37
+ }
38
+ /**
39
+ * Extract multiple attributes from an element.
40
+ */
41
+ getAttributes(element, attrs) {
42
+ const result = {};
43
+ if (!element)
44
+ return result;
45
+ for (const attr of attrs) {
46
+ const value = this.getAttribute(element, attr);
47
+ if (value)
48
+ result[attr] = value;
49
+ }
50
+ return result;
51
+ }
52
+ /**
53
+ * Check if element exists and has content.
54
+ */
55
+ hasContent(element) {
56
+ return !!element && !!element.textContent.trim();
57
+ }
58
+ }
59
+ //# sourceMappingURL=base-html-element-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"base-html-element-parser.js","sourceRoot":"","sources":["../../src/parsers/base-html-element-parser.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,OAAgB,qBAAqB;IAQzC;;;;OAIG;IACO,WAAW,CAAC,IAAc;QAClC,OAAO,IAAI,KAAK,IAAI,CAAC;IACvB,CAAC;IAED;;OAEG;IACO,aAAa,CACrB,MAA0B,EAC1B,QAAgB;QAEhB,OAAO,MAAM,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC;IAED;;OAEG;IACO,gBAAgB,CACxB,MAA0B,EAC1B,QAAgB;QAEhB,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACO,cAAc,CAAC,OAAuB;QAC9C,OAAO,OAAO,EAAE,WAAW,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC;IAClD,CAAC;IAED;;OAEG;IACO,YAAY,CACpB,OAAuB,EACvB,IAAY;QAEZ,OAAO,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;IAC1D,CAAC;IAED;;OAEG;IACO,aAAa,CACrB,OAAuB,EACvB,KAAe;QAEf,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,IAAI,CAAC,OAAO;YAAE,OAAO,MAAM,CAAC;QAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YAC/C,IAAI,KAAK;gBAAE,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;QAClC,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACO,UAAU,CAAC,OAAuB;QAC1C,OAAO,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;IACnD,CAAC;CACF"}
@@ -0,0 +1,14 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ interface HeadingData {
3
+ level: number;
4
+ text: string;
5
+ id?: string;
6
+ }
7
+ /**
8
+ * Parser for HTML heading elements (h1-h6) with hierarchy.
9
+ */
10
+ export declare class HeadingElementParser extends BaseHTMLElementParser<HeadingData[]> {
11
+ parse(document: Document): HeadingData[] | null;
12
+ }
13
+ export {};
14
+ //# sourceMappingURL=heading-element-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"heading-element-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/heading-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,WAAW;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,CAAC,EAAE,MAAM,CAAC;CACb;AAED;;GAEG;AACH,qBAAa,oBAAqB,SAAQ,qBAAqB,CAAC,WAAW,EAAE,CAAC;IAC5E,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,WAAW,EAAE,GAAG,IAAI;CAuBhD"}
@@ -0,0 +1,26 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ /**
3
+ * Parser for HTML heading elements (h1-h6) with hierarchy.
4
+ */
5
+ export class HeadingElementParser extends BaseHTMLElementParser {
6
+ parse(document) {
7
+ const headings = [];
8
+ // Extract all heading elements
9
+ for (let level = 1; level <= 6; level++) {
10
+ const elements = this.querySelectorAll(document, `h${level}`);
11
+ for (const el of elements) {
12
+ const text = this.getTextContent(el);
13
+ if (!text)
14
+ continue;
15
+ const heading = { level, text };
16
+ // Optional id attribute
17
+ const id = this.getAttribute(el, 'id');
18
+ if (id)
19
+ heading.id = id;
20
+ headings.push(heading);
21
+ }
22
+ }
23
+ return headings.length > 0 ? headings : null;
24
+ }
25
+ }
26
+ //# sourceMappingURL=heading-element-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"heading-element-parser.js","sourceRoot":"","sources":["../../src/parsers/heading-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAQtE;;GAEG;AACH,MAAM,OAAO,oBAAqB,SAAQ,qBAAoC;IAC5E,KAAK,CAAC,QAAkB;QACtB,MAAM,QAAQ,GAAkB,EAAE,CAAC;QAEnC,+BAA+B;QAC/B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,IAAI,KAAK,EAAE,CAAC,CAAC;YAE9D,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC;gBACrC,IAAI,CAAC,IAAI;oBAAE,SAAS;gBAEpB,MAAM,OAAO,GAAgB,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;gBAE7C,wBAAwB;gBACxB,MAAM,EAAE,GAAG,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;gBACvC,IAAI,EAAE;oBAAE,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;gBAExB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC;IAC/C,CAAC;CACF"}
@@ -0,0 +1,16 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ interface ImageData {
3
+ src?: string;
4
+ alt?: string;
5
+ width?: string;
6
+ height?: string;
7
+ loading?: string;
8
+ }
9
+ /**
10
+ * Parser for HTML image elements with metadata.
11
+ */
12
+ export declare class ImageElementParser extends BaseHTMLElementParser<ImageData[]> {
13
+ parse(document: Document): ImageData[] | null;
14
+ }
15
+ export {};
16
+ //# sourceMappingURL=image-element-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image-element-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/image-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,SAAS;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,qBAAqB,CAAC,SAAS,EAAE,CAAC;IACxE,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,SAAS,EAAE,GAAG,IAAI;CA8B9C"}
@@ -0,0 +1,33 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ /**
3
+ * Parser for HTML image elements with metadata.
4
+ */
5
+ export class ImageElementParser extends BaseHTMLElementParser {
6
+ parse(document) {
7
+ const images = [];
8
+ // Extract all img elements with src
9
+ const imgElements = this.querySelectorAll(document, 'img[src]');
10
+ for (const img of imgElements) {
11
+ const src = this.getAttribute(img, 'src');
12
+ if (!src)
13
+ continue;
14
+ const imageData = { src };
15
+ // Optional attributes
16
+ const alt = this.getAttribute(img, 'alt');
17
+ if (alt)
18
+ imageData.alt = alt;
19
+ const width = this.getAttribute(img, 'width');
20
+ if (width)
21
+ imageData.width = width;
22
+ const height = this.getAttribute(img, 'height');
23
+ if (height)
24
+ imageData.height = height;
25
+ const loading = this.getAttribute(img, 'loading');
26
+ if (loading)
27
+ imageData.loading = loading;
28
+ images.push(imageData);
29
+ }
30
+ return images.length > 0 ? images : null;
31
+ }
32
+ }
33
+ //# sourceMappingURL=image-element-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image-element-parser.js","sourceRoot":"","sources":["../../src/parsers/image-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAUtE;;GAEG;AACH,MAAM,OAAO,kBAAmB,SAAQ,qBAAkC;IACxE,KAAK,CAAC,QAAkB;QACtB,MAAM,MAAM,GAAgB,EAAE,CAAC;QAE/B,oCAAoC;QACpC,MAAM,WAAW,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAEhE,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC1C,IAAI,CAAC,GAAG;gBAAE,SAAS;YAEnB,MAAM,SAAS,GAAc,EAAE,GAAG,EAAE,CAAC;YAErC,sBAAsB;YACtB,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAC1C,IAAI,GAAG;gBAAE,SAAS,CAAC,GAAG,GAAG,GAAG,CAAC;YAE7B,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAC9C,IAAI,KAAK;gBAAE,SAAS,CAAC,KAAK,GAAG,KAAK,CAAC;YAEnC,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;YAChD,IAAI,MAAM;gBAAE,SAAS,CAAC,MAAM,GAAG,MAAM,CAAC;YAEtC,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAClD,IAAI,OAAO;gBAAE,SAAS,CAAC,OAAO,GAAG,OAAO,CAAC;YAEzC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACzB,CAAC;QAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;IAC3C,CAAC;CACF"}
@@ -0,0 +1,15 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ interface LinkData {
3
+ rel?: string;
4
+ href?: string;
5
+ type?: string;
6
+ title?: string;
7
+ }
8
+ /**
9
+ * Parser for HTML link elements (canonical, alternate, icon, etc.).
10
+ */
11
+ export declare class LinkElementParser extends BaseHTMLElementParser<LinkData[]> {
12
+ parse(document: Document): LinkData[] | null;
13
+ }
14
+ export {};
15
+ //# sourceMappingURL=link-element-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"link-element-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/link-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,QAAQ;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,qBAAqB,CAAC,QAAQ,EAAE,CAAC;IACtE,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,QAAQ,EAAE,GAAG,IAAI;CA0B7C"}
@@ -0,0 +1,28 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ /**
3
+ * Parser for HTML link elements (canonical, alternate, icon, etc.).
4
+ */
5
+ export class LinkElementParser extends BaseHTMLElementParser {
6
+ parse(document) {
7
+ const links = [];
8
+ // Extract all link elements with rel and href
9
+ const linkElements = this.querySelectorAll(document, 'link[rel][href]');
10
+ for (const link of linkElements) {
11
+ const rel = this.getAttribute(link, 'rel');
12
+ const href = this.getAttribute(link, 'href');
13
+ if (!rel || !href)
14
+ continue;
15
+ const linkData = { rel, href };
16
+ // Optional attributes
17
+ const type = this.getAttribute(link, 'type');
18
+ if (type)
19
+ linkData.type = type;
20
+ const title = this.getAttribute(link, 'title');
21
+ if (title)
22
+ linkData.title = title;
23
+ links.push(linkData);
24
+ }
25
+ return links.length > 0 ? links : null;
26
+ }
27
+ }
28
+ //# sourceMappingURL=link-element-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"link-element-parser.js","sourceRoot":"","sources":["../../src/parsers/link-element-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAStE;;GAEG;AACH,MAAM,OAAO,iBAAkB,SAAQ,qBAAiC;IACtE,KAAK,CAAC,QAAkB;QACtB,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,8CAA8C;QAC9C,MAAM,YAAY,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;QAExE,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAE7C,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI;gBAAE,SAAS;YAE5B,MAAM,QAAQ,GAAa,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC;YAEzC,sBAAsB;YACtB,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC7C,IAAI,IAAI;gBAAE,QAAQ,CAAC,IAAI,GAAG,IAAI,CAAC;YAE/B,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC/C,IAAI,KAAK;gBAAE,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC;YAElC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACvB,CAAC;QAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;IACzC,CAAC;CACF"}
@@ -0,0 +1,17 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ interface OpenGraphData {
3
+ title?: string;
4
+ description?: string;
5
+ image?: string;
6
+ url?: string;
7
+ siteName?: string;
8
+ type?: string;
9
+ }
10
+ /**
11
+ * Parser for Open Graph meta tags (og:*).
12
+ */
13
+ export declare class OpenGraphParser extends BaseHTMLElementParser<OpenGraphData> {
14
+ parse(document: Document): OpenGraphData | null;
15
+ }
16
+ export {};
17
+ //# sourceMappingURL=open-graph-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"open-graph-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/open-graph-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,aAAa;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,qBAAqB,CAAC,aAAa,CAAC;IACvE,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,aAAa,GAAG,IAAI;CAuChD"}
@@ -0,0 +1,41 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ /**
3
+ * Parser for Open Graph meta tags (og:*).
4
+ */
5
+ export class OpenGraphParser extends BaseHTMLElementParser {
6
+ parse(document) {
7
+ const data = {};
8
+ // Extract all Open Graph meta tags
9
+ const ogTags = this.querySelectorAll(document, 'meta[property^="og:"]');
10
+ for (const tag of ogTags) {
11
+ const property = this.getAttribute(tag, 'property');
12
+ const content = this.getAttribute(tag, 'content');
13
+ if (!property || !content)
14
+ continue;
15
+ // Extract property name (e.g., "og:title" → "title")
16
+ const key = property.replace('og:', '');
17
+ switch (key) {
18
+ case 'title':
19
+ data.title = content;
20
+ break;
21
+ case 'description':
22
+ data.description = content;
23
+ break;
24
+ case 'image':
25
+ data.image = content;
26
+ break;
27
+ case 'url':
28
+ data.url = content;
29
+ break;
30
+ case 'site_name':
31
+ data.siteName = content;
32
+ break;
33
+ case 'type':
34
+ data.type = content;
35
+ break;
36
+ }
37
+ }
38
+ return Object.keys(data).length > 0 ? data : null;
39
+ }
40
+ }
41
+ //# sourceMappingURL=open-graph-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"open-graph-parser.js","sourceRoot":"","sources":["../../src/parsers/open-graph-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAWtE;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,qBAAoC;IACvE,KAAK,CAAC,QAAkB;QACtB,MAAM,IAAI,GAAkB,EAAE,CAAC;QAE/B,mCAAmC;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,uBAAuB,CAAC,CAAC;QAExE,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;YACpD,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAElD,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO;gBAAE,SAAS;YAEpC,qDAAqD;YACrD,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAExC,QAAQ,GAAG,EAAE,CAAC;gBACZ,KAAK,OAAO;oBACV,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC;oBACrB,MAAM;gBACR,KAAK,aAAa;oBAChB,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC;oBAC3B,MAAM;gBACR,KAAK,OAAO;oBACV,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC;oBACrB,MAAM;gBACR,KAAK,KAAK;oBACR,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC;oBACnB,MAAM;gBACR,KAAK,WAAW;oBACd,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;oBACxB,MAAM;gBACR,KAAK,MAAM;oBACT,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC;oBACpB,MAAM;YACV,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACpD,CAAC;CACF"}
@@ -0,0 +1,17 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ interface SchemaOrgData {
3
+ type?: string;
4
+ name?: string;
5
+ description?: string;
6
+ image?: string;
7
+ url?: string;
8
+ [key: string]: unknown;
9
+ }
10
+ /**
11
+ * Parser for JSON-LD Schema.org structured data.
12
+ */
13
+ export declare class SchemaOrgParser extends BaseHTMLElementParser<SchemaOrgData[]> {
14
+ parse(document: Document): SchemaOrgData[] | null;
15
+ }
16
+ export {};
17
+ //# sourceMappingURL=schema-org-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema-org-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/schema-org-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,aAAa;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,qBAAqB,CAAC,aAAa,EAAE,CAAC;IACzE,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,aAAa,EAAE,GAAG,IAAI;CA8BlD"}
@@ -0,0 +1,32 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ /**
3
+ * Parser for JSON-LD Schema.org structured data.
4
+ */
5
+ export class SchemaOrgParser extends BaseHTMLElementParser {
6
+ parse(document) {
7
+ const results = [];
8
+ // Find all JSON-LD script tags
9
+ const scripts = this.querySelectorAll(document, 'script[type="application/ld+json"]');
10
+ for (const script of scripts) {
11
+ const textContent = this.getTextContent(script);
12
+ if (!textContent)
13
+ continue;
14
+ try {
15
+ const data = JSON.parse(textContent);
16
+ // Handle both single objects and arrays
17
+ if (Array.isArray(data)) {
18
+ results.push(...data);
19
+ }
20
+ else {
21
+ results.push(data);
22
+ }
23
+ }
24
+ catch {
25
+ // Skip invalid JSON
26
+ continue;
27
+ }
28
+ }
29
+ return results.length > 0 ? results : null;
30
+ }
31
+ }
32
+ //# sourceMappingURL=schema-org-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema-org-parser.js","sourceRoot":"","sources":["../../src/parsers/schema-org-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAWtE;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,qBAAsC;IACzE,KAAK,CAAC,QAAkB;QACtB,MAAM,OAAO,GAAoB,EAAE,CAAC;QAEpC,+BAA+B;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CACnC,QAAQ,EACR,oCAAoC,CACrC,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;YAChD,IAAI,CAAC,WAAW;gBAAE,SAAS;YAE3B,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAoC,CAAC;gBAExE,wCAAwC;gBACxC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBACxB,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;gBACxB,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACrB,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,oBAAoB;gBACpB,SAAS;YACX,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IAC7C,CAAC;CACF"}
@@ -0,0 +1,18 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ interface StandardMetadata {
3
+ title?: string;
4
+ description?: string;
5
+ keywords?: string;
6
+ author?: string;
7
+ viewport?: string;
8
+ charset?: string;
9
+ [key: string]: string | undefined;
10
+ }
11
+ /**
12
+ * Parser for standard HTML meta tags (name="*" and charset).
13
+ */
14
+ export declare class StandardMetaParser extends BaseHTMLElementParser<StandardMetadata> {
15
+ parse(document: Document): StandardMetadata | null;
16
+ }
17
+ export {};
18
+ //# sourceMappingURL=standard-meta-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"standard-meta-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/standard-meta-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,gBAAgB;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;CACnC;AAED;;GAEG;AACH,qBAAa,kBAAmB,SAAQ,qBAAqB,CAAC,gBAAgB,CAAC;IAC7E,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,gBAAgB,GAAG,IAAI;CA+BnD"}
@@ -0,0 +1,32 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ /**
3
+ * Parser for standard HTML meta tags (name="*" and charset).
4
+ */
5
+ export class StandardMetaParser extends BaseHTMLElementParser {
6
+ parse(document) {
7
+ const data = {};
8
+ // Extract charset
9
+ const charsetMeta = this.querySelector(document, 'meta[charset]');
10
+ if (charsetMeta) {
11
+ data.charset = this.getAttribute(charsetMeta, 'charset');
12
+ }
13
+ // Extract standard meta tags
14
+ const metaTags = this.querySelectorAll(document, 'meta[name][content]');
15
+ for (const tag of metaTags) {
16
+ const name = this.getAttribute(tag, 'name');
17
+ const content = this.getAttribute(tag, 'content');
18
+ if (name && content) {
19
+ data[name] = content;
20
+ }
21
+ }
22
+ // Extract title if not already present
23
+ if (!data.title) {
24
+ const titleEl = this.querySelector(document, 'title');
25
+ if (titleEl) {
26
+ data.title = this.getTextContent(titleEl);
27
+ }
28
+ }
29
+ return Object.keys(data).length > 0 ? data : null;
30
+ }
31
+ }
32
+ //# sourceMappingURL=standard-meta-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"standard-meta-parser.js","sourceRoot":"","sources":["../../src/parsers/standard-meta-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAYtE;;GAEG;AACH,MAAM,OAAO,kBAAmB,SAAQ,qBAAuC;IAC7E,KAAK,CAAC,QAAkB;QACtB,MAAM,IAAI,GAAqB,EAAE,CAAC;QAElC,kBAAkB;QAClB,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC;QAClE,IAAI,WAAW,EAAE,CAAC;YAChB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QAC3D,CAAC;QAED,6BAA6B;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,qBAAqB,CAAC,CAAC;QAExE,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;YAElD,IAAI,IAAI,IAAI,OAAO,EAAE,CAAC;gBACpB,IAAI,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC;YACvB,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YACtD,IAAI,OAAO,EAAE,CAAC;gBACZ,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACpD,CAAC;CACF"}
@@ -0,0 +1,17 @@
1
+ import { BaseHTMLElementParser } from './base-html-element-parser.js';
2
+ interface TwitterCardData {
3
+ card?: string;
4
+ site?: string;
5
+ title?: string;
6
+ description?: string;
7
+ image?: string;
8
+ creator?: string;
9
+ }
10
+ /**
11
+ * Parser for Twitter Card meta tags (twitter:*).
12
+ */
13
+ export declare class TwitterCardParser extends BaseHTMLElementParser<TwitterCardData> {
14
+ parse(document: Document): TwitterCardData | null;
15
+ }
16
+ export {};
17
+ //# sourceMappingURL=twitter-card-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"twitter-card-parser.d.ts","sourceRoot":"","sources":["../../src/parsers/twitter-card-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,UAAU,eAAe;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,qBAAqB,CAAC,eAAe,CAAC;IAC3E,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,eAAe,GAAG,IAAI;CA0ClD"}