magpie-html 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -3069,4 +3069,187 @@ declare class PluckContentTypeError extends PluckError {
3069
3069
  */
3070
3070
  declare function pluck(input: string | URL | Request, init?: PluckInit): Promise<PluckResponse>;
3071
3071
 
3072
- export { type AlternateLink, type AnalyticsMetadata, type AppLinks, type AppleTouchIcon, type Article, type AssetsMetadata, type CanonicalMetadata, type ConnectionHint, type ContentExtractionOptions, type ContentQuality, type ContentResult, type CopyrightMetadata, type DiscoveredFeed, type DublinCoreMetadata, type ExtractedContent, type ExtractedLink, type ExtractionErrorType, type ExtractionFailure, type Feed, type FeedAuthor, type FeedDiscoveryMetadata, type FeedEnclosure, type FeedFormat, type FeedItem, type GeoMetadata, type GeoPosition, type HTMLDocument, type HtmlToTextOptions, type IconsMetadata, type JsonLdBlock, type LanguageMetadata, type LinksExtractionOptions, type LinksMetadata, type MSTile, type MaskIcon, type MonetizationMetadata, type NewsMetadata, type OpenGraphArticle, type OpenGraphAudio, type OpenGraphBook, type OpenGraphImage, type OpenGraphMetadata, type OpenGraphProfile, type OpenGraphVideo, type PaginationMetadata, type ParseResult, PluckContentTypeError, PluckEncodingError, PluckError, PluckHttpError, type PluckInit, PluckNetworkError, PluckRedirectError, type PluckResponse, PluckSizeError, PluckTimeoutError, type PreloadResource, type RobotDirectives, type RobotsMetadata, type SEOMetadata, type SchemaOrgMetadata, type SecurityMetadata, type SitemapDiscoveryMetadata, type SocialProfilesMetadata, type TwitterApp, type TwitterAppPlatform, type TwitterCardMetadata, type TwitterPlayer, type VerificationMetadata, type Website, assessContentQuality, calculateReadingTime, countWords, detectFormat, extractAnalytics, extractAssets, extractCanonical, extractContent, extractCopyright, extractDublinCore, extractFeedDiscovery, extractGeo, extractIcons, extractLanguage, extractLinks, extractMonetization, extractNews, extractOpenGraph, extractPagination, extractRobots, extractSEO, extractSchemaOrg, extractSecurity, extractSitemapDiscovery, extractSocialProfiles, extractTwitterCard, extractVerification, gatherArticle, gatherFeed, gatherWebsite, htmlToText, isAtom, isFeed, isJSONFeed, isProbablyReaderable, isRSS, parseFeed, parseHTML, pluck };
3072
+ /**
3073
+ * Base error type for `swoop()`.
3074
+ */
3075
+ declare class SwoopError extends Error {
3076
+ name: string;
3077
+ }
3078
+ /**
3079
+ * Thrown when the current runtime cannot execute `swoop()`.
3080
+ */
3081
+ declare class SwoopEnvironmentError extends SwoopError {
3082
+ name: string;
3083
+ }
3084
+ /**
3085
+ * Thrown when `swoop()` exceeds its configured timeout.
3086
+ */
3087
+ declare class SwoopTimeoutError extends SwoopError {
3088
+ name: string;
3089
+ }
3090
+ /**
3091
+ * Thrown when script execution fails in a non-recoverable way.
3092
+ */
3093
+ declare class SwoopExecutionError extends SwoopError {
3094
+ name: string;
3095
+ }
3096
+ /**
3097
+ * Thrown when `swoop()` is asked to execute potentially unsafe scripts
3098
+ * in a context where the caller should explicitly acknowledge the risk.
3099
+ */
3100
+ declare class SwoopSecurityError extends SwoopError {
3101
+ name: string;
3102
+ }
3103
+
3104
+ /**
3105
+ * How `swoop()` decides when a client-rendered page is "done enough" to snapshot.
3106
+ *
3107
+ * @remarks
3108
+ * This is DOM-only rendering (no layout/paint). "Done" is best-effort.
3109
+ */
3110
+ type SwoopWaitStrategy = 'timeout' | 'networkidle';
3111
+ type SwoopEngine = 'vm';
3112
+ interface SwoopInit {
3113
+ /**
3114
+ * Execution engine used for running third-party scripts.
3115
+ *
3116
+ * @remarks
3117
+ * - `vm` (default): practical, supports `fetch` by reusing host globals
3118
+ *
3119
+ * @defaultValue 'vm'
3120
+ */
3121
+ engine?: SwoopEngine;
3122
+ /**
3123
+ * Pluck options used for the initial HTML request and external script fetching.
3124
+ */
3125
+ pluck?: PluckInit;
3126
+ /**
3127
+ * Execute inline and external scripts found in the HTML.
3128
+ *
3129
+ * @defaultValue true
3130
+ */
3131
+ executeScripts?: boolean;
3132
+ /**
3133
+ * Maximum time to wait for the page to "settle" before taking a snapshot.
3134
+ *
3135
+ * @defaultValue 3000
3136
+ */
3137
+ timeout?: number;
3138
+ /**
3139
+ * Which waiting strategy to use.
3140
+ *
3141
+ * - `timeout`: sleep for `timeout` and snapshot
3142
+ * - `networkidle`: wait until no tracked fetches are pending for `idleTime`
3143
+ *
3144
+ * @defaultValue 'networkidle'
3145
+ */
3146
+ waitStrategy?: SwoopWaitStrategy;
3147
+ /**
3148
+ * Required quiet period (ms) for `networkidle`.
3149
+ *
3150
+ * @defaultValue 250
3151
+ */
3152
+ idleTime?: number;
3153
+ /**
3154
+ * Poll interval (ms) for `networkidle`.
3155
+ *
3156
+ * @defaultValue 25
3157
+ */
3158
+ pollInterval?: number;
3159
+ /**
3160
+ * How many scripts to load/execute at most.
3161
+ *
3162
+ * @defaultValue 64
3163
+ */
3164
+ maxScripts?: number;
3165
+ /**
3166
+ * If true, forward console output from the isolated realm to the host console.
3167
+ *
3168
+ * @defaultValue false
3169
+ */
3170
+ forwardConsole?: boolean;
3171
+ /**
3172
+ * If true, installs permissive Proxy-based stubs for common missing browser APIs.
3173
+ *
3174
+ * @remarks
3175
+ * This may hide some failures (by turning hard crashes into no-ops), but improves
3176
+ * compatibility for a best-effort snapshotter.
3177
+ *
3178
+ * @defaultValue true
3179
+ */
3180
+ permissiveShims?: boolean;
3181
+ /**
3182
+ * Record all sandbox `fetch()` calls into the captured console output.
3183
+ *
3184
+ * @defaultValue false
3185
+ */
3186
+ debugFetch?: boolean;
3187
+ /**
3188
+ * Enable additional sandbox probes to help iterative shim development.
3189
+ *
3190
+ * @remarks
3191
+ * Collects lightweight runtime stats (DOM ops/mutations, app-root growth, etc.)
3192
+ * and emits them via captured console.
3193
+ *
3194
+ * @defaultValue false
3195
+ */
3196
+ debugProbes?: boolean;
3197
+ }
3198
+ interface SwoopConsoleEntry {
3199
+ level: 'debug' | 'info' | 'warn' | 'error' | 'log';
3200
+ message: string;
3201
+ args?: string[];
3202
+ time: number;
3203
+ }
3204
+ interface SwoopScriptError {
3205
+ stage: 'bootstrap' | 'script' | 'wait';
3206
+ scriptUrl?: string;
3207
+ message: string;
3208
+ stack?: string;
3209
+ }
3210
+ interface SwoopResult {
3211
+ /**
3212
+ * Final URL after redirects.
3213
+ */
3214
+ url: string;
3215
+ /**
3216
+ * Snapshot HTML (best-effort).
3217
+ */
3218
+ html: string;
3219
+ /**
3220
+ * Console output captured from the isolated execution environment.
3221
+ */
3222
+ console: SwoopConsoleEntry[];
3223
+ /**
3224
+ * Script/bootstrap errors captured during execution.
3225
+ */
3226
+ errors: SwoopScriptError[];
3227
+ /**
3228
+ * Timing metadata (ms).
3229
+ */
3230
+ timing: {
3231
+ start: number;
3232
+ end: number;
3233
+ duration: number;
3234
+ };
3235
+ }
3236
+
3237
+ /**
3238
+ * Execute client-side JavaScript against a DOM-only environment and snapshot the resulting HTML.
3239
+ *
3240
+ * @remarks
3241
+ * **Experimental feature**.
3242
+ *
3243
+ * @remarks
3244
+ * - Default engine (`vm`) works on regular Node.js.
3245
+ *
3246
+ * This is *not* a real browser engine:
3247
+ * - No layout/paint/CSS correctness
3248
+ * - No true navigation lifecycle
3249
+ * - Best-effort shims for browser APIs
3250
+ *
3251
+ * ⚠️ **Security**: This executes third-party JavaScript. Only use on trusted sources or in an OS sandbox.
3252
+ */
3253
+ declare function swoop(url: string | URL, init?: SwoopInit): Promise<SwoopResult>;
3254
+
3255
+ export { type AlternateLink, type AnalyticsMetadata, type AppLinks, type AppleTouchIcon, type Article, type AssetsMetadata, type CanonicalMetadata, type ConnectionHint, type ContentExtractionOptions, type ContentQuality, type ContentResult, type CopyrightMetadata, type DiscoveredFeed, type DublinCoreMetadata, type ExtractedContent, type ExtractedLink, type ExtractionErrorType, type ExtractionFailure, type Feed, type FeedAuthor, type FeedDiscoveryMetadata, type FeedEnclosure, type FeedFormat, type FeedItem, type GeoMetadata, type GeoPosition, type HTMLDocument, type HtmlToTextOptions, type IconsMetadata, type JsonLdBlock, type LanguageMetadata, type LinksExtractionOptions, type LinksMetadata, type MSTile, type MaskIcon, type MonetizationMetadata, type NewsMetadata, type OpenGraphArticle, type OpenGraphAudio, type OpenGraphBook, type OpenGraphImage, type OpenGraphMetadata, type OpenGraphProfile, type OpenGraphVideo, type PaginationMetadata, type ParseResult, PluckContentTypeError, PluckEncodingError, PluckError, PluckHttpError, type PluckInit, PluckNetworkError, PluckRedirectError, type PluckResponse, PluckSizeError, PluckTimeoutError, type PreloadResource, type RobotDirectives, type RobotsMetadata, type SEOMetadata, type SchemaOrgMetadata, type SecurityMetadata, type SitemapDiscoveryMetadata, type SocialProfilesMetadata, SwoopEnvironmentError, SwoopError, SwoopExecutionError, type SwoopInit, type SwoopResult, SwoopSecurityError, SwoopTimeoutError, type SwoopWaitStrategy, type TwitterApp, type TwitterAppPlatform, type TwitterCardMetadata, type TwitterPlayer, type VerificationMetadata, type Website, assessContentQuality, calculateReadingTime, countWords, detectFormat, extractAnalytics, extractAssets, extractCanonical, extractContent, extractCopyright, extractDublinCore, extractFeedDiscovery, extractGeo, extractIcons, extractLanguage, extractLinks, extractMonetization, extractNews, extractOpenGraph, extractPagination, extractRobots, extractSEO, extractSchemaOrg, extractSecurity, extractSitemapDiscovery, extractSocialProfiles, extractTwitterCard, extractVerification, gatherArticle, gatherFeed, gatherWebsite, htmlToText, isAtom, isFeed, isJSONFeed, isProbablyReaderable, isRSS, parseFeed, parseHTML, pluck, swoop };
package/dist/index.d.ts CHANGED
@@ -3069,4 +3069,187 @@ declare class PluckContentTypeError extends PluckError {
3069
3069
  */
3070
3070
  declare function pluck(input: string | URL | Request, init?: PluckInit): Promise<PluckResponse>;
3071
3071
 
3072
- export { type AlternateLink, type AnalyticsMetadata, type AppLinks, type AppleTouchIcon, type Article, type AssetsMetadata, type CanonicalMetadata, type ConnectionHint, type ContentExtractionOptions, type ContentQuality, type ContentResult, type CopyrightMetadata, type DiscoveredFeed, type DublinCoreMetadata, type ExtractedContent, type ExtractedLink, type ExtractionErrorType, type ExtractionFailure, type Feed, type FeedAuthor, type FeedDiscoveryMetadata, type FeedEnclosure, type FeedFormat, type FeedItem, type GeoMetadata, type GeoPosition, type HTMLDocument, type HtmlToTextOptions, type IconsMetadata, type JsonLdBlock, type LanguageMetadata, type LinksExtractionOptions, type LinksMetadata, type MSTile, type MaskIcon, type MonetizationMetadata, type NewsMetadata, type OpenGraphArticle, type OpenGraphAudio, type OpenGraphBook, type OpenGraphImage, type OpenGraphMetadata, type OpenGraphProfile, type OpenGraphVideo, type PaginationMetadata, type ParseResult, PluckContentTypeError, PluckEncodingError, PluckError, PluckHttpError, type PluckInit, PluckNetworkError, PluckRedirectError, type PluckResponse, PluckSizeError, PluckTimeoutError, type PreloadResource, type RobotDirectives, type RobotsMetadata, type SEOMetadata, type SchemaOrgMetadata, type SecurityMetadata, type SitemapDiscoveryMetadata, type SocialProfilesMetadata, type TwitterApp, type TwitterAppPlatform, type TwitterCardMetadata, type TwitterPlayer, type VerificationMetadata, type Website, assessContentQuality, calculateReadingTime, countWords, detectFormat, extractAnalytics, extractAssets, extractCanonical, extractContent, extractCopyright, extractDublinCore, extractFeedDiscovery, extractGeo, extractIcons, extractLanguage, extractLinks, extractMonetization, extractNews, extractOpenGraph, extractPagination, extractRobots, extractSEO, extractSchemaOrg, extractSecurity, extractSitemapDiscovery, extractSocialProfiles, extractTwitterCard, extractVerification, gatherArticle, gatherFeed, gatherWebsite, htmlToText, isAtom, isFeed, isJSONFeed, isProbablyReaderable, isRSS, parseFeed, parseHTML, pluck };
3072
+ /**
3073
+ * Base error type for `swoop()`.
3074
+ */
3075
+ declare class SwoopError extends Error {
3076
+ name: string;
3077
+ }
3078
+ /**
3079
+ * Thrown when the current runtime cannot execute `swoop()`.
3080
+ */
3081
+ declare class SwoopEnvironmentError extends SwoopError {
3082
+ name: string;
3083
+ }
3084
+ /**
3085
+ * Thrown when `swoop()` exceeds its configured timeout.
3086
+ */
3087
+ declare class SwoopTimeoutError extends SwoopError {
3088
+ name: string;
3089
+ }
3090
+ /**
3091
+ * Thrown when script execution fails in a non-recoverable way.
3092
+ */
3093
+ declare class SwoopExecutionError extends SwoopError {
3094
+ name: string;
3095
+ }
3096
+ /**
3097
+ * Thrown when `swoop()` is asked to execute potentially unsafe scripts
3098
+ * in a context where the caller should explicitly acknowledge the risk.
3099
+ */
3100
+ declare class SwoopSecurityError extends SwoopError {
3101
+ name: string;
3102
+ }
3103
+
3104
+ /**
3105
+ * How `swoop()` decides when a client-rendered page is "done enough" to snapshot.
3106
+ *
3107
+ * @remarks
3108
+ * This is DOM-only rendering (no layout/paint). "Done" is best-effort.
3109
+ */
3110
+ type SwoopWaitStrategy = 'timeout' | 'networkidle';
3111
+ type SwoopEngine = 'vm';
3112
+ interface SwoopInit {
3113
+ /**
3114
+ * Execution engine used for running third-party scripts.
3115
+ *
3116
+ * @remarks
3117
+ * - `vm` (default): practical, supports `fetch` by reusing host globals
3118
+ *
3119
+ * @defaultValue 'vm'
3120
+ */
3121
+ engine?: SwoopEngine;
3122
+ /**
3123
+ * Pluck options used for the initial HTML request and external script fetching.
3124
+ */
3125
+ pluck?: PluckInit;
3126
+ /**
3127
+ * Execute inline and external scripts found in the HTML.
3128
+ *
3129
+ * @defaultValue true
3130
+ */
3131
+ executeScripts?: boolean;
3132
+ /**
3133
+ * Maximum time to wait for the page to "settle" before taking a snapshot.
3134
+ *
3135
+ * @defaultValue 3000
3136
+ */
3137
+ timeout?: number;
3138
+ /**
3139
+ * Which waiting strategy to use.
3140
+ *
3141
+ * - `timeout`: sleep for `timeout` and snapshot
3142
+ * - `networkidle`: wait until no tracked fetches are pending for `idleTime`
3143
+ *
3144
+ * @defaultValue 'networkidle'
3145
+ */
3146
+ waitStrategy?: SwoopWaitStrategy;
3147
+ /**
3148
+ * Required quiet period (ms) for `networkidle`.
3149
+ *
3150
+ * @defaultValue 250
3151
+ */
3152
+ idleTime?: number;
3153
+ /**
3154
+ * Poll interval (ms) for `networkidle`.
3155
+ *
3156
+ * @defaultValue 25
3157
+ */
3158
+ pollInterval?: number;
3159
+ /**
3160
+ * How many scripts to load/execute at most.
3161
+ *
3162
+ * @defaultValue 64
3163
+ */
3164
+ maxScripts?: number;
3165
+ /**
3166
+ * If true, forward console output from the isolated realm to the host console.
3167
+ *
3168
+ * @defaultValue false
3169
+ */
3170
+ forwardConsole?: boolean;
3171
+ /**
3172
+ * If true, installs permissive Proxy-based stubs for common missing browser APIs.
3173
+ *
3174
+ * @remarks
3175
+ * This may hide some failures (by turning hard crashes into no-ops), but improves
3176
+ * compatibility for a best-effort snapshotter.
3177
+ *
3178
+ * @defaultValue true
3179
+ */
3180
+ permissiveShims?: boolean;
3181
+ /**
3182
+ * Record all sandbox `fetch()` calls into the captured console output.
3183
+ *
3184
+ * @defaultValue false
3185
+ */
3186
+ debugFetch?: boolean;
3187
+ /**
3188
+ * Enable additional sandbox probes to help iterative shim development.
3189
+ *
3190
+ * @remarks
3191
+ * Collects lightweight runtime stats (DOM ops/mutations, app-root growth, etc.)
3192
+ * and emits them via captured console.
3193
+ *
3194
+ * @defaultValue false
3195
+ */
3196
+ debugProbes?: boolean;
3197
+ }
3198
+ interface SwoopConsoleEntry {
3199
+ level: 'debug' | 'info' | 'warn' | 'error' | 'log';
3200
+ message: string;
3201
+ args?: string[];
3202
+ time: number;
3203
+ }
3204
+ interface SwoopScriptError {
3205
+ stage: 'bootstrap' | 'script' | 'wait';
3206
+ scriptUrl?: string;
3207
+ message: string;
3208
+ stack?: string;
3209
+ }
3210
+ interface SwoopResult {
3211
+ /**
3212
+ * Final URL after redirects.
3213
+ */
3214
+ url: string;
3215
+ /**
3216
+ * Snapshot HTML (best-effort).
3217
+ */
3218
+ html: string;
3219
+ /**
3220
+ * Console output captured from the isolated execution environment.
3221
+ */
3222
+ console: SwoopConsoleEntry[];
3223
+ /**
3224
+ * Script/bootstrap errors captured during execution.
3225
+ */
3226
+ errors: SwoopScriptError[];
3227
+ /**
3228
+ * Timing metadata (ms).
3229
+ */
3230
+ timing: {
3231
+ start: number;
3232
+ end: number;
3233
+ duration: number;
3234
+ };
3235
+ }
3236
+
3237
+ /**
3238
+ * Execute client-side JavaScript against a DOM-only environment and snapshot the resulting HTML.
3239
+ *
3240
+ * @remarks
3241
+ * **Experimental feature**.
3242
+ *
3243
+ * @remarks
3244
+ * - Default engine (`vm`) works on regular Node.js.
3245
+ *
3246
+ * This is *not* a real browser engine:
3247
+ * - No layout/paint/CSS correctness
3248
+ * - No true navigation lifecycle
3249
+ * - Best-effort shims for browser APIs
3250
+ *
3251
+ * ⚠️ **Security**: This executes third-party JavaScript. Only use on trusted sources or in an OS sandbox.
3252
+ */
3253
+ declare function swoop(url: string | URL, init?: SwoopInit): Promise<SwoopResult>;
3254
+
3255
+ export { type AlternateLink, type AnalyticsMetadata, type AppLinks, type AppleTouchIcon, type Article, type AssetsMetadata, type CanonicalMetadata, type ConnectionHint, type ContentExtractionOptions, type ContentQuality, type ContentResult, type CopyrightMetadata, type DiscoveredFeed, type DublinCoreMetadata, type ExtractedContent, type ExtractedLink, type ExtractionErrorType, type ExtractionFailure, type Feed, type FeedAuthor, type FeedDiscoveryMetadata, type FeedEnclosure, type FeedFormat, type FeedItem, type GeoMetadata, type GeoPosition, type HTMLDocument, type HtmlToTextOptions, type IconsMetadata, type JsonLdBlock, type LanguageMetadata, type LinksExtractionOptions, type LinksMetadata, type MSTile, type MaskIcon, type MonetizationMetadata, type NewsMetadata, type OpenGraphArticle, type OpenGraphAudio, type OpenGraphBook, type OpenGraphImage, type OpenGraphMetadata, type OpenGraphProfile, type OpenGraphVideo, type PaginationMetadata, type ParseResult, PluckContentTypeError, PluckEncodingError, PluckError, PluckHttpError, type PluckInit, PluckNetworkError, PluckRedirectError, type PluckResponse, PluckSizeError, PluckTimeoutError, type PreloadResource, type RobotDirectives, type RobotsMetadata, type SEOMetadata, type SchemaOrgMetadata, type SecurityMetadata, type SitemapDiscoveryMetadata, type SocialProfilesMetadata, SwoopEnvironmentError, SwoopError, SwoopExecutionError, type SwoopInit, type SwoopResult, SwoopSecurityError, SwoopTimeoutError, type SwoopWaitStrategy, type TwitterApp, type TwitterAppPlatform, type TwitterCardMetadata, type TwitterPlayer, type VerificationMetadata, type Website, assessContentQuality, calculateReadingTime, countWords, detectFormat, extractAnalytics, extractAssets, extractCanonical, extractContent, extractCopyright, extractDublinCore, extractFeedDiscovery, extractGeo, extractIcons, extractLanguage, extractLinks, extractMonetization, extractNews, extractOpenGraph, extractPagination, extractRobots, extractSEO, extractSchemaOrg, extractSecurity, extractSitemapDiscovery, extractSocialProfiles, extractTwitterCard, extractVerification, gatherArticle, gatherFeed, gatherWebsite, htmlToText, isAtom, isFeed, isJSONFeed, isProbablyReaderable, isRSS, parseFeed, parseHTML, pluck, swoop };