gdelt-ts-client 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +224 -4
- package/dist/client.d.ts +59 -1
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +188 -4
- package/dist/client.js.map +1 -1
- package/dist/config/content-fetcher-config.d.ts +16 -0
- package/dist/config/content-fetcher-config.d.ts.map +1 -0
- package/dist/config/content-fetcher-config.js +79 -0
- package/dist/config/content-fetcher-config.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/interfaces/api-parameters.d.ts +5 -0
- package/dist/interfaces/api-parameters.d.ts.map +1 -1
- package/dist/interfaces/content-fetcher.d.ts +121 -0
- package/dist/interfaces/content-fetcher.d.ts.map +1 -0
- package/dist/interfaces/content-fetcher.js +6 -0
- package/dist/interfaces/content-fetcher.js.map +1 -0
- package/dist/interfaces/content-responses.d.ts +183 -0
- package/dist/interfaces/content-responses.d.ts.map +1 -0
- package/dist/interfaces/content-responses.js +6 -0
- package/dist/interfaces/content-responses.js.map +1 -0
- package/dist/services/content-fetcher.d.ts +83 -0
- package/dist/services/content-fetcher.d.ts.map +1 -0
- package/dist/services/content-fetcher.js +280 -0
- package/dist/services/content-fetcher.js.map +1 -0
- package/dist/services/content-parser.d.ts +78 -0
- package/dist/services/content-parser.d.ts.map +1 -0
- package/dist/services/content-parser.js +317 -0
- package/dist/services/content-parser.js.map +1 -0
- package/dist/types/query-builder.d.ts +864 -0
- package/dist/types/query-builder.d.ts.map +1 -1
- package/dist/types/query-builder.js +988 -13
- package/dist/types/query-builder.js.map +1 -1
- package/dist/utils/content-scraper.d.ts +61 -0
- package/dist/utils/content-scraper.d.ts.map +1 -0
- package/dist/utils/content-scraper.js +217 -0
- package/dist/utils/content-scraper.js.map +1 -0
- package/dist/utils/rate-limiter.d.ts +76 -0
- package/dist/utils/rate-limiter.d.ts.map +1 -0
- package/dist/utils/rate-limiter.js +164 -0
- package/dist/utils/rate-limiter.js.map +1 -0
- package/package.json +14 -2
package/README.md
CHANGED
|
@@ -7,6 +7,7 @@ A comprehensive TypeScript client for the GDELT API that provides strongly-typed
|
|
|
7
7
|
- **Complete API Coverage**: Access all GDELT API endpoints including articles, images, timelines, tone analysis, and more
|
|
8
8
|
- **Type Safety**: Comprehensive TypeScript definitions for all parameters, responses, and configuration options
|
|
9
9
|
- **Query Building**: Fluent API for constructing complex search queries with type safety
|
|
10
|
+
- **Article Content**: Fetch and parse the full content of articles returned by GDELT API calls
|
|
10
11
|
- **Validation**: Built-in parameter validation with helpful error messages
|
|
11
12
|
- **Retry Logic**: Configurable retry mechanism for handling transient network errors
|
|
12
13
|
- **Response Validation**: Type guards and validation for ensuring data integrity
|
|
@@ -167,12 +168,88 @@ const query = client.query()
|
|
|
167
168
|
const articles = await client.getArticles(query);
|
|
168
169
|
```
|
|
169
170
|
|
|
171
|
+
#### Advanced Query Builder Capabilities
|
|
172
|
+
|
|
173
|
+
The query builder provides a comprehensive set of methods for constructing complex queries:
|
|
174
|
+
|
|
175
|
+
```typescript
|
|
176
|
+
// Basic query operations
|
|
177
|
+
const query = client.query()
|
|
178
|
+
.search('climate') // Simple search term
|
|
179
|
+
.phrase('global warming') // Exact phrase match
|
|
180
|
+
.anyOf('policy', 'agreement', 'treaty') // Match any of these terms
|
|
181
|
+
.allOf('emissions', 'reduction', 'targets') // Match all of these terms
|
|
182
|
+
.not('opinion') // Exclude this term
|
|
183
|
+
.build();
|
|
184
|
+
|
|
185
|
+
// Source filtering
|
|
186
|
+
const sourceQuery = client.query()
|
|
187
|
+
.phrase('artificial intelligence')
|
|
188
|
+
.fromDomain('techcrunch.com', true) // Exact domain match
|
|
189
|
+
.fromCountry('US') // Filter by country
|
|
190
|
+
.inLanguage('english') // Filter by language
|
|
191
|
+
.build();
|
|
192
|
+
|
|
193
|
+
// Tone analysis
|
|
194
|
+
const toneQuery = client.query()
|
|
195
|
+
.phrase('economic outlook')
|
|
196
|
+
.withTone('>', 5) // Articles with positive tone
|
|
197
|
+
.withAbsoluteTone('>', 7) // High emotional content
|
|
198
|
+
.withPositiveTone(3) // Alternative for positive tone
|
|
199
|
+
.withNegativeTone(-3) // Articles with negative tone
|
|
200
|
+
.withNeutralTone(1) // Articles with neutral tone
|
|
201
|
+
.withHighEmotion(5) // Articles with high emotional content
|
|
202
|
+
.build();
|
|
203
|
+
|
|
204
|
+
// Content filtering
|
|
205
|
+
const themeQuery = client.query()
|
|
206
|
+
.phrase('climate change')
|
|
207
|
+
.withTheme('ENV_CLIMATE') // Filter by GDELT theme
|
|
208
|
+
.build();
|
|
209
|
+
|
|
210
|
+
// Image-specific queries
|
|
211
|
+
const imageQuery = client.query()
|
|
212
|
+
.phrase('natural disaster')
|
|
213
|
+
.withImageTag('flood') // Filter by image tag
|
|
214
|
+
.withImageWebTag('emergency') // Filter by web image tag
|
|
215
|
+
.withImageOCR('rescue') // Filter by text in image
|
|
216
|
+
.withImageFaceCount('>', 3) // Images with more than 3 faces
|
|
217
|
+
.withImageFaceTone('<', 0) // Images with negative face expressions
|
|
218
|
+
.withImageWebCount('>', 10) // Images with high web presence
|
|
219
|
+
.withNovelImages(0.7) // Filter for novel images
|
|
220
|
+
.withPopularImages(0.8) // Filter for popular images
|
|
221
|
+
.build();
|
|
222
|
+
|
|
223
|
+
// Advanced text operations
|
|
224
|
+
const textQuery = client.query()
|
|
225
|
+
.withProximity(5, ['climate', 'action']) // Terms within 5 words of each other
|
|
226
|
+
.withRepeat(3, 'urgent') // Term appears at least 3 times
|
|
227
|
+
.build();
|
|
228
|
+
|
|
229
|
+
// Custom query components
|
|
230
|
+
const customQuery = client.query()
|
|
231
|
+
.custom('domain:nytimes.com OR domain:washingtonpost.com') // Custom query string
|
|
232
|
+
.build();
|
|
233
|
+
|
|
234
|
+
// Grouping for complex logic
|
|
235
|
+
const groupedQuery = client.query()
|
|
236
|
+
.phrase('climate change')
|
|
237
|
+
.group() // Group the previous terms
|
|
238
|
+
.anyOf('policy', 'legislation', 'regulation') // Match any of these terms
|
|
239
|
+
.build();
|
|
240
|
+
```
|
|
241
|
+
|
|
170
242
|
#### Article-specific Query Builder
|
|
171
243
|
|
|
244
|
+
The specialized ArticleQueryBuilder provides methods tailored for article searches:
|
|
245
|
+
|
|
172
246
|
```typescript
|
|
173
247
|
const articleQuery = client.articleQuery()
|
|
174
|
-
.breakingNews()
|
|
248
|
+
.breakingNews() // Filter for breaking news
|
|
249
|
+
.opinions() // Filter for opinion pieces
|
|
250
|
+
.localNews('New York') // Filter for local news
|
|
175
251
|
.fromDomain('cnn.com')
|
|
252
|
+
.withPositiveTone(2)
|
|
176
253
|
.build();
|
|
177
254
|
|
|
178
255
|
const articles = await client.getArticles(articleQuery);
|
|
@@ -180,26 +257,41 @@ const articles = await client.getArticles(articleQuery);
|
|
|
180
257
|
|
|
181
258
|
#### Image-specific Query Builder
|
|
182
259
|
|
|
260
|
+
The specialized ImageQueryBuilder provides methods tailored for image searches:
|
|
261
|
+
|
|
183
262
|
```typescript
|
|
184
263
|
const imageQuery = client.imageQuery()
|
|
185
|
-
.disasters()
|
|
264
|
+
.disasters() // Filter for disaster images
|
|
265
|
+
.politicalEvents() // Filter for political events
|
|
266
|
+
.medicalContent() // Filter for medical content
|
|
267
|
+
.positiveImages() // Filter for positive images
|
|
268
|
+
.negativeImages() // Filter for negative images
|
|
186
269
|
.withNovelImages()
|
|
187
270
|
.build();
|
|
188
271
|
|
|
189
272
|
const images = await client.getImages(imageQuery);
|
|
190
273
|
```
|
|
191
274
|
|
|
192
|
-
|
|
275
|
+
#### Query Validation and Optimization
|
|
193
276
|
|
|
194
|
-
|
|
277
|
+
The client provides methods to validate and optimize queries:
|
|
195
278
|
|
|
196
279
|
```typescript
|
|
280
|
+
// Validate a query before execution
|
|
197
281
|
const validation = client.validateQuery('climate change AND (weather OR temperature)');
|
|
198
282
|
|
|
199
283
|
if (!validation.valid) {
|
|
200
284
|
console.error('Query errors:', validation.errors);
|
|
201
285
|
}
|
|
202
286
|
|
|
287
|
+
// Check query complexity
|
|
288
|
+
const complexity = client.getQueryComplexity('climate change AND (weather OR temperature)');
|
|
289
|
+
console.log(`Query complexity: ${complexity}`);
|
|
290
|
+
|
|
291
|
+
// Check for balanced parentheses and quotes
|
|
292
|
+
const balanced = client.hasBalancedQuotes('climate "change');
|
|
293
|
+
console.log(`Query has balanced quotes: ${balanced}`);
|
|
294
|
+
|
|
203
295
|
// Get optimization suggestions
|
|
204
296
|
const suggestions = client.getQueryOptimizations('very complex query here');
|
|
205
297
|
suggestions.forEach(suggestion => {
|
|
@@ -212,6 +304,8 @@ suggestions.forEach(suggestion => {
|
|
|
212
304
|
| Method | Description |
|
|
213
305
|
|--------|-------------|
|
|
214
306
|
| `getArticles()` | Search for news articles |
|
|
307
|
+
| `getArticlesWithContent()` | Search for news articles and fetch their full content |
|
|
308
|
+
| `fetchContentForArticles()` | Fetch full content for existing articles |
|
|
215
309
|
| `getImages()` | Search for images and photos |
|
|
216
310
|
| `getTimeline()` | Get timeline of coverage volume |
|
|
217
311
|
| `getTimelineWithArticles()` | Get timeline with article details |
|
|
@@ -354,6 +448,132 @@ response.articles.forEach(article => {
|
|
|
354
448
|
});
|
|
355
449
|
```
|
|
356
450
|
|
|
451
|
+
### Article Content Fetching
|
|
452
|
+
|
|
453
|
+
The client provides methods to fetch and parse the full content of articles returned by GDELT API calls:
|
|
454
|
+
|
|
455
|
+
```typescript
|
|
456
|
+
import { GdeltClient } from 'gdelt-ts-client';
|
|
457
|
+
|
|
458
|
+
const client = new GdeltClient({
|
|
459
|
+
contentFetcher: {
|
|
460
|
+
concurrencyLimit: 3, // Maximum number of concurrent requests
|
|
461
|
+
requestDelay: 1500, // Delay between requests to the same domain (ms)
|
|
462
|
+
userAgent: 'MyApp/1.0.0', // Custom user agent
|
|
463
|
+
respectRobotsTxt: true // Whether to respect robots.txt
|
|
464
|
+
}
|
|
465
|
+
});
|
|
466
|
+
|
|
467
|
+
// Fetch articles with content in one call
|
|
468
|
+
const articlesWithContent = await client.getArticlesWithContent({
|
|
469
|
+
query: 'climate change',
|
|
470
|
+
timespan: '1d',
|
|
471
|
+
maxrecords: 10
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
console.log(`Fetched ${articlesWithContent.contentStats.successCount} articles with content`);
|
|
475
|
+
|
|
476
|
+
articlesWithContent.articles.forEach(article => {
|
|
477
|
+
if (article.content) {
|
|
478
|
+
console.log(`Title: ${article.title}`);
|
|
479
|
+
console.log(`Word Count: ${article.content.wordCount}`);
|
|
480
|
+
console.log(`Content Preview: ${article.content.text.substring(0, 200)}...`);
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
#### Fetching Content for Existing Articles
|
|
486
|
+
|
|
487
|
+
You can also fetch content for articles you've already retrieved:
|
|
488
|
+
|
|
489
|
+
```typescript
|
|
490
|
+
// Get articles first
|
|
491
|
+
const articles = await client.getArticles({
|
|
492
|
+
query: 'technology news',
|
|
493
|
+
timespan: '2h',
|
|
494
|
+
maxrecords: 20
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
// Then selectively fetch content
|
|
498
|
+
const articlesWithContent = await client.fetchContentForArticles(
|
|
499
|
+
articles.articles,
|
|
500
|
+
{
|
|
501
|
+
allowedDomains: ['bbc.com', 'reuters.com', 'apnews.com'], // Only fetch from these domains
|
|
502
|
+
concurrencyLimit: 2, // Limit concurrent requests
|
|
503
|
+
onProgress: (completed, total) => { // Track progress
|
|
504
|
+
console.log(`Progress: ${completed}/${total} articles processed`);
|
|
505
|
+
},
|
|
506
|
+
includeFailures: true, // Include articles where content fetching failed
|
|
507
|
+
parseContent: true, // Parse and clean the content
|
|
508
|
+
includeRawHTML: false // Don't include raw HTML in the response
|
|
509
|
+
}
|
|
510
|
+
);
|
|
511
|
+
|
|
512
|
+
// Filter successful content fetches
|
|
513
|
+
const successfulArticles = articlesWithContent.filter(
|
|
514
|
+
article => article.contentResult.success
|
|
515
|
+
);
|
|
516
|
+
|
|
517
|
+
console.log(`Successfully fetched content for ${successfulArticles.length} articles`);
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
#### Content Fetching Configuration
|
|
521
|
+
|
|
522
|
+
The content fetcher can be configured with various options:
|
|
523
|
+
|
|
524
|
+
```typescript
|
|
525
|
+
const client = new GdeltClient({
|
|
526
|
+
contentFetcher: {
|
|
527
|
+
// Request control
|
|
528
|
+
concurrencyLimit: 5, // Maximum concurrent requests
|
|
529
|
+
requestDelay: 1000, // Delay between requests to same domain (ms)
|
|
530
|
+
timeout: 10000, // Request timeout (ms)
|
|
531
|
+
maxRetries: 2, // Maximum retries per request
|
|
532
|
+
|
|
533
|
+
// Rate limiting
|
|
534
|
+
maxRequestsPerSecond: 1, // Maximum requests per second per domain
|
|
535
|
+
maxRequestsPerMinute: 30, // Maximum requests per minute per domain
|
|
536
|
+
|
|
537
|
+
// Ethical scraping
|
|
538
|
+
respectRobotsTxt: true, // Respect robots.txt files
|
|
539
|
+
userAgent: 'MyApp/1.0.0', // User agent string
|
|
540
|
+
|
|
541
|
+
// Domain filtering
|
|
542
|
+
skipDomains: ['paywall.com', 'subscription-only.com'], // Skip these domains
|
|
543
|
+
|
|
544
|
+
// Request behavior
|
|
545
|
+
followRedirects: true, // Follow redirects
|
|
546
|
+
maxRedirects: 5, // Maximum redirects to follow
|
|
547
|
+
customHeaders: { // Custom headers for requests
|
|
548
|
+
'Accept-Language': 'en-US,en;q=0.9'
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
});
|
|
552
|
+
```
|
|
553
|
+
|
|
554
|
+
#### Content Fetching Statistics
|
|
555
|
+
|
|
556
|
+
You can access statistics about the content fetching process:
|
|
557
|
+
|
|
558
|
+
```typescript
|
|
559
|
+
const result = await client.getArticlesWithContent({
|
|
560
|
+
query: 'breaking news',
|
|
561
|
+
timespan: '1h',
|
|
562
|
+
maxrecords: 50
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
// Check content fetching statistics
|
|
566
|
+
console.log('Content Fetching Statistics:');
|
|
567
|
+
console.log(`Success Rate: ${(result.contentStats.successCount / result.count * 100).toFixed(1)}%`);
|
|
568
|
+
console.log(`Average Fetch Time: ${result.contentStats.averageFetchTime}ms`);
|
|
569
|
+
console.log(`Total Time: ${result.contentStats.totalFetchTime}ms`);
|
|
570
|
+
|
|
571
|
+
// Log failure reasons
|
|
572
|
+
Object.entries(result.contentStats.failureReasons).forEach(([reason, count]) => {
|
|
573
|
+
console.log(`${reason}: ${count} failures`);
|
|
574
|
+
});
|
|
575
|
+
```
|
|
576
|
+
|
|
357
577
|
### Timeline Analysis
|
|
358
578
|
|
|
359
579
|
```typescript
|
package/dist/client.d.ts
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import { ETimespanUnit } from './constants';
|
|
2
2
|
import { IGdeltApiBaseParams, IGdeltClientConfig } from './interfaces/api-parameters';
|
|
3
|
-
import { IArticleListResponse, IImageCollageResponse, ITimelineResponse, ITimelineBreakdownResponse, IToneChartResponse, IWordCloudResponse } from './interfaces/api-responses';
|
|
3
|
+
import { IArticleListResponse, IImageCollageResponse, ITimelineResponse, ITimelineBreakdownResponse, IToneChartResponse, IWordCloudResponse, IArticle } from './interfaces/api-responses';
|
|
4
|
+
import { IArticleListWithContentResponse, IArticleWithContent } from './interfaces/content-responses';
|
|
5
|
+
import { IFetchContentOptions } from './interfaces/content-fetcher';
|
|
4
6
|
import { TimespanUnitType, TimespanString, ComplexQuery } from './types/enhanced-types';
|
|
5
7
|
import { QueryBuilder, ArticleQueryBuilder, ImageQueryBuilder } from './types/query-builder';
|
|
8
|
+
import { ContentFetcherService } from './services/content-fetcher';
|
|
6
9
|
/**
|
|
7
10
|
* GDELT API Client
|
|
8
11
|
* A strongly-typed client for interacting with the GDELT API
|
|
@@ -38,6 +41,11 @@ export declare class GdeltClient {
|
|
|
38
41
|
* @private
|
|
39
42
|
*/
|
|
40
43
|
private readonly _retryDelay;
|
|
44
|
+
/**
|
|
45
|
+
* The content fetcher service for article content retrieval
|
|
46
|
+
* @private
|
|
47
|
+
*/
|
|
48
|
+
private readonly _contentFetcher;
|
|
41
49
|
/**
|
|
42
50
|
* Creates a new GDELT API client
|
|
43
51
|
* @param config - The client configuration
|
|
@@ -59,6 +67,7 @@ export declare class GdeltClient {
|
|
|
59
67
|
/**
|
|
60
68
|
* Transforms API response data without mutating the original
|
|
61
69
|
* @param data - The original response data
|
|
70
|
+
* @param mode - The API mode used for the request
|
|
62
71
|
* @returns The transformed response data
|
|
63
72
|
* @private
|
|
64
73
|
*/
|
|
@@ -228,5 +237,54 @@ export declare class GdeltClient {
|
|
|
228
237
|
* Enhanced response validation and transformation
|
|
229
238
|
*/
|
|
230
239
|
private _transformAndValidateResponse;
|
|
240
|
+
/**
|
|
241
|
+
* Fetch articles with their full content
|
|
242
|
+
* @param params - Standard GDELT API parameters
|
|
243
|
+
* @param fetchOptions - Content fetching options
|
|
244
|
+
* @returns Articles with full content included
|
|
245
|
+
*/
|
|
246
|
+
getArticlesWithContent(params: IGdeltApiBaseParams, fetchOptions?: IFetchContentOptions): Promise<IArticleListWithContentResponse>;
|
|
247
|
+
/**
|
|
248
|
+
* Fetch articles with their full content (overload with query string)
|
|
249
|
+
* @param query - Query string or ComplexQuery
|
|
250
|
+
* @param fetchOptions - Content fetching options
|
|
251
|
+
* @returns Articles with full content included
|
|
252
|
+
*/
|
|
253
|
+
getArticlesWithContent(query: string | ComplexQuery, fetchOptions?: IFetchContentOptions): Promise<IArticleListWithContentResponse>;
|
|
254
|
+
/**
|
|
255
|
+
* Fetch content for existing articles
|
|
256
|
+
* @param articles - Array of article objects
|
|
257
|
+
* @param options - Content fetching options
|
|
258
|
+
* @returns Articles with content added
|
|
259
|
+
*/
|
|
260
|
+
fetchContentForArticles(articles: IArticle[], options?: IFetchContentOptions): Promise<IArticleWithContent[]>;
|
|
261
|
+
/**
|
|
262
|
+
* Get content fetcher service instance
|
|
263
|
+
* @returns Content fetcher service
|
|
264
|
+
*/
|
|
265
|
+
getContentFetcher(): ContentFetcherService;
|
|
266
|
+
/**
|
|
267
|
+
* Merge articles with content results
|
|
268
|
+
* @param articles - Original article response
|
|
269
|
+
* @param contentResults - Content fetch results
|
|
270
|
+
* @returns Merged response with content
|
|
271
|
+
* @private
|
|
272
|
+
*/
|
|
273
|
+
private _mergeArticlesWithContent;
|
|
274
|
+
/**
|
|
275
|
+
* Merge articles array with content results
|
|
276
|
+
* @param articles - Original articles array
|
|
277
|
+
* @param contentResults - Content fetch results
|
|
278
|
+
* @returns Merged articles with content
|
|
279
|
+
* @private
|
|
280
|
+
*/
|
|
281
|
+
private _mergeArticlesWithContentArray;
|
|
282
|
+
/**
|
|
283
|
+
* Calculate content fetching statistics
|
|
284
|
+
* @param contentResults - Content fetch results
|
|
285
|
+
* @returns Content statistics
|
|
286
|
+
* @private
|
|
287
|
+
*/
|
|
288
|
+
private _calculateContentStats;
|
|
231
289
|
}
|
|
232
290
|
//# sourceMappingURL=client.d.ts.map
|
package/dist/client.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":"AACA,OAAO,EAIL,aAAa,EACd,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,mBAAmB,EACnB,kBAAkB,EAEnB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,oBAAoB,EACpB,qBAAqB,EACrB,iBAAiB,EACjB,0BAA0B,EAC1B,kBAAkB,EAClB,kBAAkB,
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":"AACA,OAAO,EAIL,aAAa,EACd,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,mBAAmB,EACnB,kBAAkB,EAEnB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,oBAAoB,EACpB,qBAAqB,EACrB,iBAAiB,EACjB,0BAA0B,EAC1B,kBAAkB,EAClB,kBAAkB,EAClB,QAAQ,EACT,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,+BAA+B,EAC/B,mBAAmB,EAEpB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AACpE,OAAO,EAEL,gBAAgB,EAChB,cAAc,EACd,YAAY,EAKb,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,YAAY,EACZ,mBAAmB,EACnB,iBAAiB,EAElB,MAAM,uBAAuB,CAAC;AAY/B,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAEnE;;;GAGG;AACH,qBAAa,WAAW;IACtB;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAgB;IAE/C;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAElC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;IAEzC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IAEjC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IAErC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IAErC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAwB;IAExD;;;OAGG;gBACgB,MAAM,CAAC,EAAE,kBAAkB;IAuB9C;;;;;OAKG;IACH,OAAO,CAAC,qBAAqB;IAI7B;;;;OAIG;IACH,OAAO,CAAC,eAAe;IAsCvB;;;;;;OAMG;IACH,OAAO,CAAC,kBAAkB;IAkE1B;;;;;OAKG;IACH,OAAO,CAAC,iBAAiB;IAsDzB;;;;;OAKG;YACW,YAAY;IAmC1B;;;;OAIG;IACU,WAAW,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,oBAAoB,CAAC;IACpF;;;;;OAKG;IACU,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GAAG,OAAO,CAAC,oBAAoB,CAAC;IA8B7H;;;;OAIG;IACU,SAAS,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC;IACnF;;;;;OAKG;IACU,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GAAG,OAAO,CAAC,qBAAqB,CAAC;IA8B5H;;;;OAIG;IACU,WAAW,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IACjF;;;;;OAKG;IACU,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,iBAAiB,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,mBAAmB,CAAC,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAqCrJ;;;;OAIG;IACU,uBAAuB,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAC7F;;;;;OAKG;IACU,uBAAuB,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GAAG,OAAO,CAAC,iBAAiB,CAAC;IA6BtI;;;;OAIG;IACU,qBAAqB,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,0BAA0B,CAAC;IAQpG;;;;OAIG;IACU,oBAAoB,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,0BAA0B,CAAC;IAQnG;;;;OAIG;IACU,eAAe,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IACrF;;;;;OAKG;IACU,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GAAG,OAAO,CAAC,iBAAiB,CAAC;IA6B9H;;;;OAIG;IACU,YAAY,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IACnF;;;;;OAKG;IACU,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAsC5H;;;;OAIG;IACU,gBAAgB,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAQvF;;;;OAIG;IACU,mBAAmB,CAAC,MAAM,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAQ1F;;;;;OAKG;IACI,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,aAAa,GAAG,MAAM;IACjE;;;;;OAKG;IACI,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,gBAAgB,GAAG,cAAc;IAkB5E;;OAEG;IACI,KAAK,IAAI,YAAY;IAI5B;;OAEG;IACI,YAAY,IAAI,mBAAmB;IAI1C;;OAEG;IACI,UAAU,IAAI,iBAAiB;IAItC;;OAEG;IACI,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG;QAAE,KAAK,EAAE,OAAO,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAA;KAAE;IA0BzE;;OAEG;IACI,qBAAqB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE;IAyBrD;;OAEG;IACH,OAAO,CAAC,uBAAuB;IA+B/B;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAuE7B;;OAEG;IACH,OAAO,CAAC,6BAA6B;IAarC;;;;;OAKG;IACU,sBAAsB,CACjC,MAAM,EAAE,mBAAmB,EAC3B,YAAY,CAAC,EAAE,oBAAoB,GAClC,OAAO,CAAC,+BAA+B,CAAC;IAC3C;;;;;OAKG;IACU,sBAAsB,CACjC,KAAK,EAAE,MAAM,GAAG,YAAY,EAC5B,YAAY,CAAC,EAAE,oBAAoB,GAClC,OAAO,CAAC,+BAA+B,CAAC;IAgC3C;;;;;OAKG;IACU,uBAAuB,CAClC,QAAQ,EAAE,QAAQ,EAAE,EACpB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,OAAO,CAAC,mBAAmB,EAAE,CAAC;IAcjC;;;OAGG;IACI,iBAAiB,IAAI,qBAAqB;IAIjD;;;;;;OAMG;IACH,OAAO,CAAC,yBAAyB;IA2CjC;;;;;;OAMG;IACH,OAAO,CAAC,8BAA8B;IAyCtC;;;;;OAKG;IACH,OAAO,CAAC,sBAAsB;CA2C/B"}
|
package/dist/client.js
CHANGED
|
@@ -19,6 +19,7 @@ const enhanced_types_1 = require("./types/enhanced-types");
|
|
|
19
19
|
const query_builder_1 = require("./types/query-builder");
|
|
20
20
|
const type_guards_1 = require("./types/type-guards");
|
|
21
21
|
const lookups_1 = require("./types/lookups");
|
|
22
|
+
const content_fetcher_1 = require("./services/content-fetcher");
|
|
22
23
|
/**
|
|
23
24
|
* GDELT API Client
|
|
24
25
|
* A strongly-typed client for interacting with the GDELT API
|
|
@@ -46,6 +47,8 @@ class GdeltClient {
|
|
|
46
47
|
'Content-Type': 'application/json'
|
|
47
48
|
}
|
|
48
49
|
});
|
|
50
|
+
// Initialize content fetcher service
|
|
51
|
+
this._contentFetcher = new content_fetcher_1.ContentFetcherService(config === null || config === void 0 ? void 0 : config.contentFetcher);
|
|
49
52
|
}
|
|
50
53
|
/**
|
|
51
54
|
* Creates a timespan string from a timespan object
|
|
@@ -97,11 +100,18 @@ class GdeltClient {
|
|
|
97
100
|
/**
|
|
98
101
|
* Transforms API response data without mutating the original
|
|
99
102
|
* @param data - The original response data
|
|
103
|
+
* @param mode - The API mode used for the request
|
|
100
104
|
* @returns The transformed response data
|
|
101
105
|
* @private
|
|
102
106
|
*/
|
|
103
|
-
_transformResponse(data) {
|
|
104
|
-
|
|
107
|
+
_transformResponse(data, mode) {
|
|
108
|
+
// For backward compatibility with tests, still throw error for null/undefined
|
|
109
|
+
if (!data) {
|
|
110
|
+
throw new Error('Invalid response data: expected object');
|
|
111
|
+
}
|
|
112
|
+
// For empty responses that are objects, handle gracefully
|
|
113
|
+
// For non-objects, throw error for backward compatibility
|
|
114
|
+
if (typeof data !== 'object') {
|
|
105
115
|
throw new Error('Invalid response data: expected object');
|
|
106
116
|
}
|
|
107
117
|
// Create a new object without mutating the original
|
|
@@ -110,6 +120,28 @@ class GdeltClient {
|
|
|
110
120
|
if (!('status' in transformedData)) {
|
|
111
121
|
transformedData['status'] = 'ok';
|
|
112
122
|
}
|
|
123
|
+
// Ensure expected array properties exist based on the mode
|
|
124
|
+
if (mode === constants_1.EMode.articleList && !('articles' in transformedData)) {
|
|
125
|
+
transformedData['articles'] = [];
|
|
126
|
+
}
|
|
127
|
+
if (mode === constants_1.EMode.imageCollageInfo && !('images' in transformedData)) {
|
|
128
|
+
transformedData['images'] = [];
|
|
129
|
+
}
|
|
130
|
+
if ((mode === constants_1.EMode.timelineVolume || mode === constants_1.EMode.timelineVolumeInfo || mode === constants_1.EMode.timelineTone)
|
|
131
|
+
&& !('timeline' in transformedData)) {
|
|
132
|
+
transformedData['timeline'] = [];
|
|
133
|
+
}
|
|
134
|
+
if ((mode === constants_1.EMode.timelineLanguage || mode === constants_1.EMode.timelineSourceCountry)
|
|
135
|
+
&& !('data' in transformedData)) {
|
|
136
|
+
transformedData['data'] = [];
|
|
137
|
+
}
|
|
138
|
+
if (mode === constants_1.EMode.toneChart && !('tonechart' in transformedData)) {
|
|
139
|
+
transformedData['tonechart'] = [];
|
|
140
|
+
}
|
|
141
|
+
if ((mode === constants_1.EMode.wordCloudImageTags || mode === constants_1.EMode.wordCloudImageWebTags)
|
|
142
|
+
&& !('wordcloud' in transformedData)) {
|
|
143
|
+
transformedData['wordcloud'] = [];
|
|
144
|
+
}
|
|
113
145
|
// Add count property for article list responses
|
|
114
146
|
if ('articles' in transformedData && Array.isArray(transformedData["articles"]) && !('count' in transformedData)) {
|
|
115
147
|
transformedData['count'] = transformedData["articles"].length;
|
|
@@ -205,7 +237,7 @@ class GdeltClient {
|
|
|
205
237
|
throw new Error(response.data);
|
|
206
238
|
}
|
|
207
239
|
// Transform response data without mutating original
|
|
208
|
-
const transformedData = this._transformResponse(response.data);
|
|
240
|
+
const transformedData = this._transformResponse(response.data, params.mode);
|
|
209
241
|
return transformedData;
|
|
210
242
|
});
|
|
211
243
|
}
|
|
@@ -342,7 +374,7 @@ class GdeltClient {
|
|
|
342
374
|
const response = yield this._makeRequest(Object.assign(Object.assign({}, finalParams), { mode: constants_1.EMode.toneChart, format: constants_1.EFormat.json }));
|
|
343
375
|
// Enhanced validation with type guard
|
|
344
376
|
const validatedResponse = this._transformAndValidateResponse(response, type_guards_1.TypeGuards.isToneChartResponse);
|
|
345
|
-
// Ensure the response has the expected structure
|
|
377
|
+
// Ensure the response has the expected structure with tonechart property
|
|
346
378
|
if (!validatedResponse.tonechart) {
|
|
347
379
|
throw new Error('Invalid response format from GDELT API: missing tonechart property');
|
|
348
380
|
}
|
|
@@ -552,6 +584,158 @@ class GdeltClient {
|
|
|
552
584
|
}
|
|
553
585
|
return transformedData;
|
|
554
586
|
}
|
|
587
|
+
getArticlesWithContent(paramsOrQuery, fetchOptions) {
|
|
588
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
589
|
+
// 1. Get articles from GDELT API
|
|
590
|
+
let articles;
|
|
591
|
+
if (typeof paramsOrQuery === 'string') {
|
|
592
|
+
// Call the string overload
|
|
593
|
+
articles = yield this.getArticles(paramsOrQuery);
|
|
594
|
+
}
|
|
595
|
+
else if ('mode' in paramsOrQuery) {
|
|
596
|
+
// Call the IGdeltApiBaseParams overload
|
|
597
|
+
articles = yield this.getArticles(paramsOrQuery);
|
|
598
|
+
}
|
|
599
|
+
else {
|
|
600
|
+
// Call the ComplexQuery overload (which is handled by the string overload)
|
|
601
|
+
articles = yield this.getArticles(paramsOrQuery);
|
|
602
|
+
}
|
|
603
|
+
// 2. Extract URLs from articles
|
|
604
|
+
const urls = articles.articles.map(article => article.url);
|
|
605
|
+
// 3. Fetch content for all URLs
|
|
606
|
+
const contentResults = yield this._contentFetcher.fetchMultipleArticleContent(urls, fetchOptions);
|
|
607
|
+
// 4. Merge content with article metadata
|
|
608
|
+
return this._mergeArticlesWithContent(articles, contentResults);
|
|
609
|
+
});
|
|
610
|
+
}
|
|
611
|
+
/**
|
|
612
|
+
* Fetch content for existing articles
|
|
613
|
+
* @param articles - Array of article objects
|
|
614
|
+
* @param options - Content fetching options
|
|
615
|
+
* @returns Articles with content added
|
|
616
|
+
*/
|
|
617
|
+
fetchContentForArticles(articles, options) {
|
|
618
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
619
|
+
// Extract URLs from articles
|
|
620
|
+
const urls = articles.map(article => article.url);
|
|
621
|
+
// Fetch content for all URLs
|
|
622
|
+
const contentResults = yield this._contentFetcher.fetchMultipleArticleContent(urls, options);
|
|
623
|
+
// Merge content with article metadata
|
|
624
|
+
return this._mergeArticlesWithContentArray(articles, contentResults);
|
|
625
|
+
});
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* Get content fetcher service instance
|
|
629
|
+
* @returns Content fetcher service
|
|
630
|
+
*/
|
|
631
|
+
getContentFetcher() {
|
|
632
|
+
return this._contentFetcher;
|
|
633
|
+
}
|
|
634
|
+
/**
|
|
635
|
+
* Merge articles with content results
|
|
636
|
+
* @param articles - Original article response
|
|
637
|
+
* @param contentResults - Content fetch results
|
|
638
|
+
* @returns Merged response with content
|
|
639
|
+
* @private
|
|
640
|
+
*/
|
|
641
|
+
_mergeArticlesWithContent(articles, contentResults) {
|
|
642
|
+
var _a;
|
|
643
|
+
const articlesWithContent = [];
|
|
644
|
+
const resultMap = new Map();
|
|
645
|
+
// Create map of URL to content result
|
|
646
|
+
for (const result of contentResults) {
|
|
647
|
+
resultMap.set(result.url, result);
|
|
648
|
+
}
|
|
649
|
+
// Merge articles with content
|
|
650
|
+
for (const article of articles.articles) {
|
|
651
|
+
const contentResult = resultMap.get(article.url);
|
|
652
|
+
if (contentResult) {
|
|
653
|
+
const articleWithContent = Object.assign(Object.assign({}, article), { content: contentResult.success ? (_a = contentResult.content) !== null && _a !== void 0 ? _a : null : null });
|
|
654
|
+
if (!contentResult.success && contentResult.error) {
|
|
655
|
+
articleWithContent.contentError = contentResult.error;
|
|
656
|
+
}
|
|
657
|
+
if (contentResult.timing) {
|
|
658
|
+
articleWithContent.contentTiming = contentResult.timing;
|
|
659
|
+
}
|
|
660
|
+
articlesWithContent.push(articleWithContent);
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
// Calculate statistics
|
|
664
|
+
const contentStats = this._calculateContentStats(contentResults);
|
|
665
|
+
return Object.assign(Object.assign({}, articles), { articles: articlesWithContent, contentStats });
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Merge articles array with content results
|
|
669
|
+
* @param articles - Original articles array
|
|
670
|
+
* @param contentResults - Content fetch results
|
|
671
|
+
* @returns Merged articles with content
|
|
672
|
+
* @private
|
|
673
|
+
*/
|
|
674
|
+
_mergeArticlesWithContentArray(articles, contentResults) {
|
|
675
|
+
var _a;
|
|
676
|
+
// If there are no articles or no content results, return empty array
|
|
677
|
+
if (!articles.length || !(contentResults === null || contentResults === void 0 ? void 0 : contentResults.length)) {
|
|
678
|
+
return [];
|
|
679
|
+
}
|
|
680
|
+
const articlesWithContent = [];
|
|
681
|
+
const resultMap = new Map();
|
|
682
|
+
// Create map of URL to content result
|
|
683
|
+
for (const result of contentResults) {
|
|
684
|
+
resultMap.set(result.url, result);
|
|
685
|
+
}
|
|
686
|
+
// Merge articles with content
|
|
687
|
+
for (const article of articles) {
|
|
688
|
+
const contentResult = resultMap.get(article.url);
|
|
689
|
+
if (contentResult) {
|
|
690
|
+
const articleWithContent = Object.assign(Object.assign({}, article), { content: contentResult.success ? ((_a = contentResult.content) !== null && _a !== void 0 ? _a : null) : null });
|
|
691
|
+
if (!contentResult.success && contentResult.error) {
|
|
692
|
+
articleWithContent.contentError = contentResult.error;
|
|
693
|
+
}
|
|
694
|
+
if (contentResult.timing) {
|
|
695
|
+
articleWithContent.contentTiming = contentResult.timing;
|
|
696
|
+
}
|
|
697
|
+
articlesWithContent.push(articleWithContent);
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
return articlesWithContent;
|
|
701
|
+
}
|
|
702
|
+
/**
|
|
703
|
+
* Calculate content fetching statistics
|
|
704
|
+
* @param contentResults - Content fetch results
|
|
705
|
+
* @returns Content statistics
|
|
706
|
+
* @private
|
|
707
|
+
*/
|
|
708
|
+
_calculateContentStats(contentResults) {
|
|
709
|
+
var _a;
|
|
710
|
+
const totalArticles = contentResults.length;
|
|
711
|
+
const successfulFetches = contentResults.filter(r => r.success).length;
|
|
712
|
+
const failedFetches = contentResults.filter(r => !r.success).length;
|
|
713
|
+
const totalFetchTime = Math.max(...contentResults.map(r => r.timing.totalTime));
|
|
714
|
+
const averageFetchTime = contentResults.length > 0 ?
|
|
715
|
+
contentResults.reduce((sum, r) => sum + r.timing.fetchTime, 0) / contentResults.length : 0;
|
|
716
|
+
const averageParseTime = contentResults.length > 0 ?
|
|
717
|
+
contentResults.reduce((sum, r) => sum + r.timing.parseTime, 0) / contentResults.length : 0;
|
|
718
|
+
const totalWords = contentResults
|
|
719
|
+
.filter(r => r.success && r.content)
|
|
720
|
+
.reduce((sum, r) => { var _a, _b; return sum + ((_b = (_a = r.content) === null || _a === void 0 ? void 0 : _a.wordCount) !== null && _b !== void 0 ? _b : 0); }, 0);
|
|
721
|
+
const failureReasons = {};
|
|
722
|
+
for (const result of contentResults) {
|
|
723
|
+
if (!result.success && result.error) {
|
|
724
|
+
const reason = result.error.code || 'UNKNOWN';
|
|
725
|
+
failureReasons[reason] = ((_a = failureReasons[reason]) !== null && _a !== void 0 ? _a : 0) + 1;
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
return {
|
|
729
|
+
totalArticles,
|
|
730
|
+
successfulFetches,
|
|
731
|
+
failedFetches,
|
|
732
|
+
averageFetchTime,
|
|
733
|
+
averageParseTime,
|
|
734
|
+
totalFetchTime,
|
|
735
|
+
totalWords,
|
|
736
|
+
failureReasons
|
|
737
|
+
};
|
|
738
|
+
}
|
|
555
739
|
}
|
|
556
740
|
exports.GdeltClient = GdeltClient;
|
|
557
741
|
//# sourceMappingURL=client.js.map
|