metanova 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +49 -0
- package/README.md +4 -0
- package/dist/index.cjs +993 -59
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +32 -1
- package/dist/index.d.ts +32 -1
- package/dist/index.js +992 -59
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/dist/index.d.cts
CHANGED
|
@@ -105,7 +105,11 @@ interface ExtractionDiagnostics {
|
|
|
105
105
|
contentType?: string;
|
|
106
106
|
redirects: RedirectEntry[];
|
|
107
107
|
sourcesUsed: string[];
|
|
108
|
+
adapterUsed?: string;
|
|
109
|
+
sourcePriority?: string[];
|
|
110
|
+
extractionMethod?: string;
|
|
108
111
|
warnings: string[];
|
|
112
|
+
fallbacksAttempted?: ExtractionFallbackAttempt[];
|
|
109
113
|
trace: string[];
|
|
110
114
|
adapter?: {
|
|
111
115
|
matched: boolean;
|
|
@@ -113,10 +117,35 @@ interface ExtractionDiagnostics {
|
|
|
113
117
|
confidence?: number;
|
|
114
118
|
};
|
|
115
119
|
errors?: string[];
|
|
120
|
+
retryInfo?: ExtractionRetryInfo;
|
|
116
121
|
selectedImageReason?: string;
|
|
122
|
+
confidenceBreakdown?: ConfidenceBreakdown;
|
|
117
123
|
fetchDurationMs?: number;
|
|
118
124
|
extractedAt: string;
|
|
119
125
|
}
|
|
126
|
+
interface ExtractionFallbackAttempt {
|
|
127
|
+
method: string;
|
|
128
|
+
url?: string;
|
|
129
|
+
ok: boolean;
|
|
130
|
+
statusCode?: number;
|
|
131
|
+
blocked?: boolean;
|
|
132
|
+
error?: string;
|
|
133
|
+
retryAfter?: string;
|
|
134
|
+
}
|
|
135
|
+
interface ExtractionRetryInfo {
|
|
136
|
+
retryable: boolean;
|
|
137
|
+
reason?: string;
|
|
138
|
+
retryAfter?: string;
|
|
139
|
+
retryAfterMs?: number;
|
|
140
|
+
attempts?: number;
|
|
141
|
+
}
|
|
142
|
+
interface ConfidenceBreakdown {
|
|
143
|
+
title: number;
|
|
144
|
+
description: number;
|
|
145
|
+
image: number;
|
|
146
|
+
structuredData: number;
|
|
147
|
+
adapter: number;
|
|
148
|
+
}
|
|
120
149
|
interface UnifiedMetadata {
|
|
121
150
|
ok: boolean;
|
|
122
151
|
url: string;
|
|
@@ -124,6 +153,7 @@ interface UnifiedMetadata {
|
|
|
124
153
|
type: MetadataType;
|
|
125
154
|
title?: string;
|
|
126
155
|
description?: string;
|
|
156
|
+
publishDate?: string;
|
|
127
157
|
siteName?: string;
|
|
128
158
|
canonicalUrl?: string;
|
|
129
159
|
confidence: number;
|
|
@@ -406,6 +436,7 @@ interface ReliabilityInput {
|
|
|
406
436
|
warnings: string[];
|
|
407
437
|
}
|
|
408
438
|
declare function calculateConfidence(input: ConfidenceEngineInput): number;
|
|
439
|
+
declare function calculateConfidenceBreakdown(input: ConfidenceEngineInput): ConfidenceBreakdown;
|
|
409
440
|
declare function calculateCompleteness(input: CompletenessInput): number;
|
|
410
441
|
declare function calculateReliability(input: ReliabilityInput): number;
|
|
411
442
|
|
|
@@ -490,4 +521,4 @@ interface MetaNovaRegistry {
|
|
|
490
521
|
declare function registerGlobalPlugin(plugin: MetaNovaPlugin): void;
|
|
491
522
|
declare function createRegistry(options?: ParseMetadataOptions): MetaNovaRegistry;
|
|
492
523
|
|
|
493
|
-
export { type AdapterContext, type AdapterExtractionResult, type AdapterRawData, type ApplicationMetadata, type ArticleMetadata, type CompletenessInput, type ConfidenceEngineInput, type CustomExtractor, DEFAULT_ACCEPT, DEFAULT_ACCEPT_ENCODING, DEFAULT_ACCEPT_LANGUAGE, DEFAULT_BROWSER_USER_AGENT, type EmbeddedDataItem, type EmbeddedDataMetadata, type Entity, type ExtractionDiagnostics, type ExtractorContext, type FetchMetadataOptions, type FetchedPage, type HtmlMetadata, type ImageScorer, type ImageScoringContext, type JsonLdMetadata, type JsonLdNode, type MediaAsset, type MediaDiscoveryResult, type MediaKind, MetaNova, type MetaNovaCache, type MetaNovaCacheEntry, type MetaNovaPlugin, type MetaNovaPluginApi, type MetaNovaRegistry, type MetadataSource, type MetadataSourceAttribution, type MetadataType, type OEmbedData, type OEmbedLink, type OEmbedMetadata, type OpenGraphMetadata, type ParseMetadataOptions, type PlaylistMetadata, type PlaylistVideo, type PluginExtractionResult, type PreviewCard, type ProductMetadata, type RawMetadataSources, type RedirectEntry, type RedirectResolution, type ReliabilityInput, SecurityError, type SiteAdapter, type TwitterMetadata, type UnifiedMetadata, type VideoMetadata, addWarning, assertSafeRequestUrl, behanceAdapter, calculateCompleteness, calculateConfidence, calculateReliability, createDiagnostics, createPreviewCard, createRegistry, MetaNova as default, defaultAdapters, detectShortUrl, discoverMedia, extractAudio, extractEmbeddedData, extractHtmlMetadata, extractImages, extractJsonLd, extractOEmbed, extractOpenGraph, extractTwitterCards, extractVideos, facebookAdapter, fetchMetadata, fetchPage, instagramAdapter, normalizeMetadata, normalizeUrl, parseMetadata, parseMetadataAsync, pinterestAdapter, redditAdapter, registerGlobalPlugin, resolveCanonicalUrl, resolveRedirects, resolveUrl, scoreImages, tiktokAdapter, twitterAdapter, validateUrl, youtubeAdapter };
|
|
524
|
+
export { type AdapterContext, type AdapterExtractionResult, type AdapterRawData, type ApplicationMetadata, type ArticleMetadata, type CompletenessInput, type ConfidenceBreakdown, type ConfidenceEngineInput, type CustomExtractor, DEFAULT_ACCEPT, DEFAULT_ACCEPT_ENCODING, DEFAULT_ACCEPT_LANGUAGE, DEFAULT_BROWSER_USER_AGENT, type EmbeddedDataItem, type EmbeddedDataMetadata, type Entity, type ExtractionDiagnostics, type ExtractionFallbackAttempt, type ExtractionRetryInfo, type ExtractorContext, type FetchMetadataOptions, type FetchedPage, type HtmlMetadata, type ImageScorer, type ImageScoringContext, type JsonLdMetadata, type JsonLdNode, type MediaAsset, type MediaDiscoveryResult, type MediaKind, MetaNova, type MetaNovaCache, type MetaNovaCacheEntry, type MetaNovaPlugin, type MetaNovaPluginApi, type MetaNovaRegistry, type MetadataSource, type MetadataSourceAttribution, type MetadataType, type OEmbedData, type OEmbedLink, type OEmbedMetadata, type OpenGraphMetadata, type ParseMetadataOptions, type PlaylistMetadata, type PlaylistVideo, type PluginExtractionResult, type PreviewCard, type ProductMetadata, type RawMetadataSources, type RedirectEntry, type RedirectResolution, type ReliabilityInput, SecurityError, type SiteAdapter, type TwitterMetadata, type UnifiedMetadata, type VideoMetadata, addWarning, assertSafeRequestUrl, behanceAdapter, calculateCompleteness, calculateConfidence, calculateConfidenceBreakdown, calculateReliability, createDiagnostics, createPreviewCard, createRegistry, MetaNova as default, defaultAdapters, detectShortUrl, discoverMedia, extractAudio, extractEmbeddedData, extractHtmlMetadata, extractImages, extractJsonLd, extractOEmbed, extractOpenGraph, extractTwitterCards, extractVideos, facebookAdapter, fetchMetadata, fetchPage, instagramAdapter, normalizeMetadata, normalizeUrl, parseMetadata, parseMetadataAsync, pinterestAdapter, redditAdapter, registerGlobalPlugin, resolveCanonicalUrl, resolveRedirects, resolveUrl, scoreImages, tiktokAdapter, twitterAdapter, validateUrl, youtubeAdapter };
|
package/dist/index.d.ts
CHANGED
|
@@ -105,7 +105,11 @@ interface ExtractionDiagnostics {
|
|
|
105
105
|
contentType?: string;
|
|
106
106
|
redirects: RedirectEntry[];
|
|
107
107
|
sourcesUsed: string[];
|
|
108
|
+
adapterUsed?: string;
|
|
109
|
+
sourcePriority?: string[];
|
|
110
|
+
extractionMethod?: string;
|
|
108
111
|
warnings: string[];
|
|
112
|
+
fallbacksAttempted?: ExtractionFallbackAttempt[];
|
|
109
113
|
trace: string[];
|
|
110
114
|
adapter?: {
|
|
111
115
|
matched: boolean;
|
|
@@ -113,10 +117,35 @@ interface ExtractionDiagnostics {
|
|
|
113
117
|
confidence?: number;
|
|
114
118
|
};
|
|
115
119
|
errors?: string[];
|
|
120
|
+
retryInfo?: ExtractionRetryInfo;
|
|
116
121
|
selectedImageReason?: string;
|
|
122
|
+
confidenceBreakdown?: ConfidenceBreakdown;
|
|
117
123
|
fetchDurationMs?: number;
|
|
118
124
|
extractedAt: string;
|
|
119
125
|
}
|
|
126
|
+
interface ExtractionFallbackAttempt {
|
|
127
|
+
method: string;
|
|
128
|
+
url?: string;
|
|
129
|
+
ok: boolean;
|
|
130
|
+
statusCode?: number;
|
|
131
|
+
blocked?: boolean;
|
|
132
|
+
error?: string;
|
|
133
|
+
retryAfter?: string;
|
|
134
|
+
}
|
|
135
|
+
interface ExtractionRetryInfo {
|
|
136
|
+
retryable: boolean;
|
|
137
|
+
reason?: string;
|
|
138
|
+
retryAfter?: string;
|
|
139
|
+
retryAfterMs?: number;
|
|
140
|
+
attempts?: number;
|
|
141
|
+
}
|
|
142
|
+
interface ConfidenceBreakdown {
|
|
143
|
+
title: number;
|
|
144
|
+
description: number;
|
|
145
|
+
image: number;
|
|
146
|
+
structuredData: number;
|
|
147
|
+
adapter: number;
|
|
148
|
+
}
|
|
120
149
|
interface UnifiedMetadata {
|
|
121
150
|
ok: boolean;
|
|
122
151
|
url: string;
|
|
@@ -124,6 +153,7 @@ interface UnifiedMetadata {
|
|
|
124
153
|
type: MetadataType;
|
|
125
154
|
title?: string;
|
|
126
155
|
description?: string;
|
|
156
|
+
publishDate?: string;
|
|
127
157
|
siteName?: string;
|
|
128
158
|
canonicalUrl?: string;
|
|
129
159
|
confidence: number;
|
|
@@ -406,6 +436,7 @@ interface ReliabilityInput {
|
|
|
406
436
|
warnings: string[];
|
|
407
437
|
}
|
|
408
438
|
declare function calculateConfidence(input: ConfidenceEngineInput): number;
|
|
439
|
+
declare function calculateConfidenceBreakdown(input: ConfidenceEngineInput): ConfidenceBreakdown;
|
|
409
440
|
declare function calculateCompleteness(input: CompletenessInput): number;
|
|
410
441
|
declare function calculateReliability(input: ReliabilityInput): number;
|
|
411
442
|
|
|
@@ -490,4 +521,4 @@ interface MetaNovaRegistry {
|
|
|
490
521
|
declare function registerGlobalPlugin(plugin: MetaNovaPlugin): void;
|
|
491
522
|
declare function createRegistry(options?: ParseMetadataOptions): MetaNovaRegistry;
|
|
492
523
|
|
|
493
|
-
export { type AdapterContext, type AdapterExtractionResult, type AdapterRawData, type ApplicationMetadata, type ArticleMetadata, type CompletenessInput, type ConfidenceEngineInput, type CustomExtractor, DEFAULT_ACCEPT, DEFAULT_ACCEPT_ENCODING, DEFAULT_ACCEPT_LANGUAGE, DEFAULT_BROWSER_USER_AGENT, type EmbeddedDataItem, type EmbeddedDataMetadata, type Entity, type ExtractionDiagnostics, type ExtractorContext, type FetchMetadataOptions, type FetchedPage, type HtmlMetadata, type ImageScorer, type ImageScoringContext, type JsonLdMetadata, type JsonLdNode, type MediaAsset, type MediaDiscoveryResult, type MediaKind, MetaNova, type MetaNovaCache, type MetaNovaCacheEntry, type MetaNovaPlugin, type MetaNovaPluginApi, type MetaNovaRegistry, type MetadataSource, type MetadataSourceAttribution, type MetadataType, type OEmbedData, type OEmbedLink, type OEmbedMetadata, type OpenGraphMetadata, type ParseMetadataOptions, type PlaylistMetadata, type PlaylistVideo, type PluginExtractionResult, type PreviewCard, type ProductMetadata, type RawMetadataSources, type RedirectEntry, type RedirectResolution, type ReliabilityInput, SecurityError, type SiteAdapter, type TwitterMetadata, type UnifiedMetadata, type VideoMetadata, addWarning, assertSafeRequestUrl, behanceAdapter, calculateCompleteness, calculateConfidence, calculateReliability, createDiagnostics, createPreviewCard, createRegistry, MetaNova as default, defaultAdapters, detectShortUrl, discoverMedia, extractAudio, extractEmbeddedData, extractHtmlMetadata, extractImages, extractJsonLd, extractOEmbed, extractOpenGraph, extractTwitterCards, extractVideos, facebookAdapter, fetchMetadata, fetchPage, instagramAdapter, normalizeMetadata, normalizeUrl, parseMetadata, parseMetadataAsync, pinterestAdapter, redditAdapter, registerGlobalPlugin, resolveCanonicalUrl, resolveRedirects, resolveUrl, scoreImages, tiktokAdapter, twitterAdapter, validateUrl, youtubeAdapter };
|
|
524
|
+
export { type AdapterContext, type AdapterExtractionResult, type AdapterRawData, type ApplicationMetadata, type ArticleMetadata, type CompletenessInput, type ConfidenceBreakdown, type ConfidenceEngineInput, type CustomExtractor, DEFAULT_ACCEPT, DEFAULT_ACCEPT_ENCODING, DEFAULT_ACCEPT_LANGUAGE, DEFAULT_BROWSER_USER_AGENT, type EmbeddedDataItem, type EmbeddedDataMetadata, type Entity, type ExtractionDiagnostics, type ExtractionFallbackAttempt, type ExtractionRetryInfo, type ExtractorContext, type FetchMetadataOptions, type FetchedPage, type HtmlMetadata, type ImageScorer, type ImageScoringContext, type JsonLdMetadata, type JsonLdNode, type MediaAsset, type MediaDiscoveryResult, type MediaKind, MetaNova, type MetaNovaCache, type MetaNovaCacheEntry, type MetaNovaPlugin, type MetaNovaPluginApi, type MetaNovaRegistry, type MetadataSource, type MetadataSourceAttribution, type MetadataType, type OEmbedData, type OEmbedLink, type OEmbedMetadata, type OpenGraphMetadata, type ParseMetadataOptions, type PlaylistMetadata, type PlaylistVideo, type PluginExtractionResult, type PreviewCard, type ProductMetadata, type RawMetadataSources, type RedirectEntry, type RedirectResolution, type ReliabilityInput, SecurityError, type SiteAdapter, type TwitterMetadata, type UnifiedMetadata, type VideoMetadata, addWarning, assertSafeRequestUrl, behanceAdapter, calculateCompleteness, calculateConfidence, calculateConfidenceBreakdown, calculateReliability, createDiagnostics, createPreviewCard, createRegistry, MetaNova as default, defaultAdapters, detectShortUrl, discoverMedia, extractAudio, extractEmbeddedData, extractHtmlMetadata, extractImages, extractJsonLd, extractOEmbed, extractOpenGraph, extractTwitterCards, extractVideos, facebookAdapter, fetchMetadata, fetchPage, instagramAdapter, normalizeMetadata, normalizeUrl, parseMetadata, parseMetadataAsync, pinterestAdapter, redditAdapter, registerGlobalPlugin, resolveCanonicalUrl, resolveRedirects, resolveUrl, scoreImages, tiktokAdapter, twitterAdapter, validateUrl, youtubeAdapter };
|