adstxt-validator 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1273 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.getSupportedLocales = exports.isSupportedLocale = exports.configureMessages = exports.createValidationMessage = exports.getMessageProvider = exports.setMessageProvider = exports.DefaultMessageProvider = exports.ERROR_KEYS = exports.VALIDATION_KEYS = exports.Severity = void 0;
37
+ exports.isAdsTxtRecord = isAdsTxtRecord;
38
+ exports.isAdsTxtVariable = isAdsTxtVariable;
39
+ exports.parseAdsTxtVariable = parseAdsTxtVariable;
40
+ exports.parseAdsTxtLine = parseAdsTxtLine;
41
+ exports.parseAdsTxtContent = parseAdsTxtContent;
42
+ exports.crossCheckAdsTxtRecords = crossCheckAdsTxtRecords;
43
+ exports.checkForDuplicates = checkForDuplicates;
44
+ exports.optimizeAdsTxt = optimizeAdsTxt;
45
+ exports.isValidEmail = isValidEmail;
46
+ const psl = __importStar(require("psl"));
47
+ /**
48
+ * Utility to validate and parse Ads.txt data
49
+ */
50
+ // Severity enum to represent the importance level of validation results
51
+ var Severity;
52
+ (function (Severity) {
53
+ Severity["ERROR"] = "error";
54
+ Severity["WARNING"] = "warning";
55
+ Severity["INFO"] = "info";
56
+ })(Severity || (exports.Severity = Severity = {}));
57
+ // Validation keys without namespace prefixes for cleaner structure
58
+ exports.VALIDATION_KEYS = {
59
+ MISSING_FIELDS: 'missingFields',
60
+ INVALID_FORMAT: 'invalidFormat',
61
+ INVALID_RELATIONSHIP: 'invalidRelationship',
62
+ INVALID_DOMAIN: 'invalidDomain',
63
+ EMPTY_ACCOUNT_ID: 'emptyAccountId',
64
+ IMPLIMENTED: 'implimentedEntry',
65
+ NO_SELLERS_JSON: 'noSellersJson',
66
+ DIRECT_ACCOUNT_ID_NOT_IN_SELLERS_JSON: 'directAccountIdNotInSellersJson',
67
+ RESELLER_ACCOUNT_ID_NOT_IN_SELLERS_JSON: 'resellerAccountIdNotInSellersJson',
68
+ DOMAIN_MISMATCH: 'domainMismatch',
69
+ DIRECT_NOT_PUBLISHER: 'directNotPublisher',
70
+ SELLER_ID_NOT_UNIQUE: 'sellerIdNotUnique',
71
+ RESELLER_NOT_INTERMEDIARY: 'resellerNotIntermediary',
72
+ SELLERS_JSON_VALIDATION_ERROR: 'sellersJsonValidationError',
73
+ // Content validation
74
+ EMPTY_FILE: 'emptyFile',
75
+ INVALID_CHARACTERS: 'invalidCharacters',
76
+ };
77
+ // For backward compatibility
78
+ exports.ERROR_KEYS = exports.VALIDATION_KEYS;
79
+ // Export message system
80
+ var messages_1 = require("./messages");
81
+ Object.defineProperty(exports, "DefaultMessageProvider", { enumerable: true, get: function () { return messages_1.DefaultMessageProvider; } });
82
+ Object.defineProperty(exports, "setMessageProvider", { enumerable: true, get: function () { return messages_1.setMessageProvider; } });
83
+ Object.defineProperty(exports, "getMessageProvider", { enumerable: true, get: function () { return messages_1.getMessageProvider; } });
84
+ Object.defineProperty(exports, "createValidationMessage", { enumerable: true, get: function () { return messages_1.createValidationMessage; } });
85
+ Object.defineProperty(exports, "configureMessages", { enumerable: true, get: function () { return messages_1.configureMessages; } });
86
+ Object.defineProperty(exports, "isSupportedLocale", { enumerable: true, get: function () { return messages_1.isSupportedLocale; } });
87
+ Object.defineProperty(exports, "getSupportedLocales", { enumerable: true, get: function () { return messages_1.getSupportedLocales; } });
88
+ /**
89
+ * Type guard to check if an entry is a record
90
+ */
91
+ function isAdsTxtRecord(entry) {
92
+ return 'domain' in entry && 'account_id' in entry && 'account_type' in entry;
93
+ }
94
+ /**
95
+ * Type guard to check if an entry is a variable
96
+ */
97
+ function isAdsTxtVariable(entry) {
98
+ return ('variable_type' in entry &&
99
+ 'value' in entry &&
100
+ 'is_variable' in entry &&
101
+ entry.is_variable === true);
102
+ }
103
+ /**
104
+ * Creates an invalid record with specified validation issue
105
+ */
106
+ function createInvalidRecord(partialRecord, validationKey, severity = Severity.ERROR // Default to ERROR for validation issues
107
+ ) {
108
+ return {
109
+ domain: partialRecord.domain || '',
110
+ account_id: partialRecord.account_id || '',
111
+ account_type: partialRecord.account_type || '',
112
+ relationship: partialRecord.relationship || 'DIRECT',
113
+ line_number: partialRecord.line_number || 0,
114
+ raw_line: partialRecord.raw_line || '',
115
+ is_valid: false,
116
+ error: validationKey, // For backward compatibility
117
+ validation_key: validationKey, // New field
118
+ severity: severity, // New field
119
+ is_variable: false,
120
+ ...partialRecord, // Allow overriding defaults
121
+ };
122
+ }
123
+ /**
124
+ * Parse an ads.txt variable line
125
+ * @param line - The raw line from the file
126
+ * @param lineNumber - The line number in the file (for error reporting)
127
+ * @returns A parsed variable if recognized, null otherwise
128
+ */
129
+ function parseAdsTxtVariable(line, lineNumber) {
130
+ const trimmedLine = line.trim();
131
+ // Check if the line contains a variable definition
132
+ // Variables should be in the format: VARIABLE=value
133
+ const variableMatch = trimmedLine.match(/^(CONTACT|SUBDOMAIN|INVENTORYPARTNERDOMAIN|OWNERDOMAIN|MANAGERDOMAIN)=(.+)$/i);
134
+ if (variableMatch) {
135
+ const variableType = variableMatch[1].toUpperCase();
136
+ const value = variableMatch[2].trim();
137
+ return {
138
+ variable_type: variableType,
139
+ value,
140
+ line_number: lineNumber,
141
+ raw_line: line,
142
+ is_variable: true,
143
+ is_valid: true, // Variable entries are always considered valid
144
+ };
145
+ }
146
+ return null;
147
+ }
148
+ /**
149
+ * Validate line for invalid characters
150
+ * @param line - The raw line from the file
151
+ * @returns true if line contains invalid characters
152
+ */
153
+ function hasInvalidCharacters(line) {
154
+ // Check for control characters (except tab, newline, and carriage return which are normal)
155
+ // ASCII control characters: 0x00-0x1F (except 0x09 tab, 0x0A newline, 0x0D carriage return) and 0x7F
156
+ const controlCharRegex = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/;
157
+ // Check for non-printable Unicode characters, but exclude normal whitespace and line endings
158
+ // This includes various Unicode control and format characters but excludes:
159
+ // - 0x09 (tab), 0x0A (newline), 0x0D (carriage return)
160
+ // - Regular space characters in the 0x2000-0x200F range
161
+ const nonPrintableRegex = /[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u009F\u2028-\u202F\u205F-\u206F\uFEFF]/;
162
+ return controlCharRegex.test(line) || nonPrintableRegex.test(line);
163
+ }
164
+ /**
165
+ * Parse and validate a line from an Ads.txt file
166
+ * @param line - The raw line from the file
167
+ * @param lineNumber - The line number in the file (for error reporting)
168
+ * @returns A parsed record or variable, or null for comments and empty lines
169
+ */
170
+ function parseAdsTxtLine(line, lineNumber) {
171
+ // Check for invalid characters first
172
+ if (hasInvalidCharacters(line)) {
173
+ return createInvalidRecord({
174
+ line_number: lineNumber,
175
+ raw_line: line,
176
+ }, exports.VALIDATION_KEYS.INVALID_CHARACTERS, Severity.ERROR);
177
+ }
178
+ // Trim whitespace and ignore empty lines or comments
179
+ let trimmedLine = line.trim();
180
+ if (!trimmedLine || trimmedLine.startsWith('#')) {
181
+ return null;
182
+ }
183
+ // Strip inline comments (everything after #)
184
+ const commentIndex = trimmedLine.indexOf('#');
185
+ if (commentIndex !== -1) {
186
+ trimmedLine = trimmedLine.substring(0, commentIndex).trim();
187
+ // If the line becomes empty after removing the comment, ignore it
188
+ if (!trimmedLine) {
189
+ return null;
190
+ }
191
+ }
192
+ // Check if this is a variable definition
193
+ const variableRecord = parseAdsTxtVariable(line, lineNumber);
194
+ if (variableRecord) {
195
+ return variableRecord;
196
+ }
197
+ // Split the line into its components
198
+ // Format: domain, account_id, type, [certification_authority_id]
199
+ const parts = trimmedLine.split(',').map((part) => part.trim());
200
+ // Basic validation - must have at least domain, account ID, and type
201
+ if (parts.length < 3) {
202
+ return createInvalidRecord({
203
+ domain: parts[0] || '',
204
+ account_id: parts[1] || '',
205
+ account_type: parts[2] || '',
206
+ line_number: lineNumber,
207
+ raw_line: line,
208
+ }, exports.VALIDATION_KEYS.MISSING_FIELDS, Severity.ERROR);
209
+ }
210
+ // Check for invalid format (no commas)
211
+ if (parts.length === 1 && parts[0] === trimmedLine) {
212
+ return createInvalidRecord({
213
+ domain: parts[0],
214
+ line_number: lineNumber,
215
+ raw_line: line,
216
+ }, exports.VALIDATION_KEYS.INVALID_FORMAT, Severity.ERROR);
217
+ }
218
+ // Extract and normalize the values
219
+ const [domain, accountId, accountType, ...rest] = parts;
220
+ // Process relationship and certification authority ID
221
+ const { relationship, certAuthorityId, error } = processRelationship(accountType, rest);
222
+ if (error) {
223
+ return createInvalidRecord({
224
+ domain,
225
+ account_id: accountId,
226
+ account_type: accountType,
227
+ certification_authority_id: certAuthorityId,
228
+ relationship,
229
+ line_number: lineNumber,
230
+ raw_line: line,
231
+ }, error, Severity.ERROR);
232
+ }
233
+ // Validate domain using PSL
234
+ if (!psl.isValid(domain)) {
235
+ return createInvalidRecord({
236
+ domain,
237
+ account_id: accountId,
238
+ account_type: accountType,
239
+ certification_authority_id: certAuthorityId,
240
+ relationship,
241
+ line_number: lineNumber,
242
+ raw_line: line,
243
+ }, exports.VALIDATION_KEYS.INVALID_DOMAIN, Severity.ERROR);
244
+ }
245
+ // Validate account ID (must not be empty)
246
+ if (!accountId) {
247
+ return createInvalidRecord({
248
+ domain,
249
+ account_id: accountId,
250
+ account_type: accountType,
251
+ certification_authority_id: certAuthorityId,
252
+ relationship,
253
+ line_number: lineNumber,
254
+ raw_line: line,
255
+ }, exports.VALIDATION_KEYS.EMPTY_ACCOUNT_ID, Severity.ERROR);
256
+ }
257
+ // Return the valid record
258
+ return {
259
+ domain,
260
+ account_id: accountId,
261
+ account_type: accountType,
262
+ certification_authority_id: certAuthorityId,
263
+ relationship,
264
+ line_number: lineNumber,
265
+ raw_line: line,
266
+ is_valid: true,
267
+ };
268
+ }
269
+ /**
270
+ * Process relationship and certification authority ID from Ads.txt line parts
271
+ */
272
+ function processRelationship(accountType, rest) {
273
+ const upperAccountType = accountType.toUpperCase();
274
+ let relationship = 'DIRECT';
275
+ let certAuthorityId;
276
+ // Check if accountType contains the relationship
277
+ if (upperAccountType === 'DIRECT' || upperAccountType === 'RESELLER') {
278
+ relationship = upperAccountType;
279
+ }
280
+ else if (upperAccountType !== 'DIRECT' &&
281
+ upperAccountType !== 'RESELLER' &&
282
+ !['DIRECT', 'RESELLER'].includes(rest[0]?.toUpperCase())) {
283
+ // Invalid relationship type
284
+ return {
285
+ relationship,
286
+ error: exports.VALIDATION_KEYS.INVALID_RELATIONSHIP,
287
+ };
288
+ }
289
+ // Process remaining parts
290
+ if (rest.length > 0) {
291
+ // The next part could be relationship or cert authority
292
+ const firstRest = rest[0].toUpperCase();
293
+ if (firstRest === 'DIRECT' || firstRest === 'RESELLER') {
294
+ relationship = firstRest;
295
+ if (rest.length > 1) {
296
+ certAuthorityId = rest[1];
297
+ }
298
+ }
299
+ else {
300
+ certAuthorityId = rest[0];
301
+ }
302
+ }
303
+ return { relationship, certAuthorityId };
304
+ }
305
+ /**
306
+ * Parse and validate a complete Ads.txt file
307
+ * @param content - The full content of the Ads.txt file
308
+ * @param publisherDomain - Optional publisher domain for creating default OWNERDOMAIN if missing
309
+ * @returns Array of parsed records and variables with validation status
310
+ */
311
+ function parseAdsTxtContent(content, publisherDomain) {
312
+ // Check for empty file
313
+ if (!content || content.trim().length === 0) {
314
+ return [
315
+ {
316
+ line_number: 1,
317
+ raw_line: '',
318
+ is_valid: false,
319
+ error: exports.VALIDATION_KEYS.EMPTY_FILE,
320
+ validation_key: exports.VALIDATION_KEYS.EMPTY_FILE,
321
+ severity: Severity.ERROR,
322
+ domain: '',
323
+ account_id: '',
324
+ account_type: '',
325
+ relationship: 'DIRECT',
326
+ is_variable: false,
327
+ },
328
+ ];
329
+ }
330
+ const lines = content.split('\n');
331
+ const entries = [];
332
+ lines.forEach((line, index) => {
333
+ const parsedEntry = parseAdsTxtLine(line, index + 1);
334
+ if (parsedEntry) {
335
+ entries.push(parsedEntry);
336
+ }
337
+ });
338
+ // If publisherDomain is provided, check if OWNERDOMAIN is missing and add default value
339
+ if (publisherDomain) {
340
+ // Check if OWNERDOMAIN already exists
341
+ const hasOwnerDomain = entries.some((entry) => isAdsTxtVariable(entry) && entry.variable_type === 'OWNERDOMAIN');
342
+ // If no OWNERDOMAIN specified, add the root domain as default value
343
+ if (!hasOwnerDomain) {
344
+ try {
345
+ // Parse with psl to get the root domain (Public Suffix List + 1)
346
+ const parsed = psl.parse(publisherDomain);
347
+ const rootDomain = typeof parsed === 'object' && 'domain' in parsed ? parsed.domain : null;
348
+ if (rootDomain) {
349
+ // Create a default OWNERDOMAIN variable entry
350
+ const defaultOwnerDomain = {
351
+ variable_type: 'OWNERDOMAIN',
352
+ value: rootDomain,
353
+ line_number: -1, // Use -1 to indicate it's a default/generated value
354
+ raw_line: `OWNERDOMAIN=${rootDomain}`,
355
+ is_variable: true,
356
+ is_valid: true,
357
+ };
358
+ entries.push(defaultOwnerDomain);
359
+ }
360
+ }
361
+ catch (error) {
362
+ // If we can't parse the domain, just skip adding the default
363
+ console.error(`Could not parse domain for default OWNERDOMAIN: ${publisherDomain}`, error);
364
+ }
365
+ }
366
+ }
367
+ return entries;
368
+ }
369
+ /**
370
+ * Creates a warning record with specified parameters
371
+ */
372
+ function createWarningRecord(record, validationKey, params = {}, severity = Severity.WARNING, additionalProps = {}) {
373
+ return {
374
+ ...record,
375
+ is_valid: true, // Keep record valid but mark with warning
376
+ has_warning: true,
377
+ warning: validationKey, // For backward compatibility
378
+ validation_key: validationKey, // New field
379
+ severity: severity, // New field
380
+ warning_params: params,
381
+ is_variable: false, // Explicitly mark as not a variable
382
+ ...additionalProps,
383
+ };
384
+ }
385
+ /**
386
+ * Creates a duplicate warning record
387
+ */
388
+ function createDuplicateWarningRecord(record, publisherDomain, validationKey, severity = Severity.WARNING) {
389
+ return createWarningRecord(record, validationKey, { domain: publisherDomain }, severity, {
390
+ duplicate_domain: publisherDomain, // Store the domain where the duplicate was found
391
+ });
392
+ }
393
+ /**
394
+ * Create a standard logger
395
+ */
396
+ function createLogger() {
397
+ const isDevelopment = process.env.NODE_ENV === 'development';
398
+ return {
399
+ info: console.log,
400
+ error: console.error,
401
+ debug: isDevelopment ? console.log : () => { },
402
+ };
403
+ }
404
+ /**
405
+ * Implementation for both overloads
406
+ */
407
+ async function crossCheckAdsTxtRecords(publisherDomain, parsedEntries, cachedAdsTxtContent, sellersJsonProviderOrGetSellersJson) {
408
+ const logger = createLogger();
409
+ logger.info('=== crossCheckAdsTxtRecords called with ===');
410
+ logger.info(`publisherDomain: ${publisherDomain}`);
411
+ logger.info(`parsedEntries: ${parsedEntries.length}`);
412
+ // If no publisher domain provided, can't do cross-check
413
+ if (!publisherDomain) {
414
+ logger.info('No publisher domain provided, skipping cross-check');
415
+ return parsedEntries;
416
+ }
417
+ try {
418
+ // Separate variable entries from record entries using the type guards
419
+ const variableEntries = parsedEntries.filter(isAdsTxtVariable);
420
+ const recordEntries = parsedEntries.filter(isAdsTxtRecord);
421
+ // Step 1: Check for duplicates with existing ads.txt records (only for non-variable records)
422
+ const resultRecords = await checkForDuplicates(publisherDomain, recordEntries, cachedAdsTxtContent, logger);
423
+ // Step 2: Validate against sellers.json data (only for non-variable records)
424
+ const validatedRecords = await validateAgainstSellersJsonOptimized(publisherDomain, resultRecords, sellersJsonProviderOrGetSellersJson, logger, parsedEntries // Pass all entries including variables for domain validation
425
+ );
426
+ // Combine variable entries with validated record entries
427
+ return [...variableEntries, ...validatedRecords];
428
+ }
429
+ catch (error) {
430
+ // If there's any error during cross-check, log it but return entries as-is
431
+ logger.error('Error during ads.txt cross-check:', error);
432
+ return parsedEntries;
433
+ }
434
+ }
435
+ /**
436
+ * Check for duplicates in existing ads.txt records
437
+ */
438
+ async function checkForDuplicates(publisherDomain, parsedRecords, // Note: This expects only record entries, not variables
439
+ cachedAdsTxtContent, logger) {
440
+ logger.info(`Starting cross-check with publisher domain: ${publisherDomain}`);
441
+ // Log sample of input records
442
+ logSampleRecords(parsedRecords, logger);
443
+ // Create result array that we'll populate with validation results
444
+ let resultRecords = [...parsedRecords];
445
+ // Check for duplicates if we have valid cached data
446
+ if (cachedAdsTxtContent) {
447
+ logger.info(`Cached content length: ${cachedAdsTxtContent.length}`);
448
+ // Parse the cached ads.txt content
449
+ const existingRecords = parseAdsTxtContent(cachedAdsTxtContent);
450
+ // Log sample of existing records
451
+ logger.info("Sample of records from publisher's ads.txt:");
452
+ existingRecords.slice(0, 3).forEach((record, i) => {
453
+ if (isAdsTxtRecord(record)) {
454
+ logger.info(` ${i + 1}: domain=${record.domain}, account_id=${record.account_id}, type=${record.account_type}, relationship=${record.relationship}, valid=${record.is_valid}`);
455
+ }
456
+ else if (isAdsTxtVariable(record)) {
457
+ logger.info(` ${i + 1}: variable_type=${record.variable_type}, value=${record.value}, valid=${record.is_valid}`);
458
+ }
459
+ });
460
+ // Create lookup map from existing records (filter out variables)
461
+ const recordEntries = existingRecords.filter(isAdsTxtRecord);
462
+ const existingRecordMap = createExistingRecordsMap(recordEntries);
463
+ logger.info(`Created lookup map with ${existingRecordMap.size} entries`);
464
+ // Check for duplicates in input records
465
+ resultRecords = findDuplicateRecords(parsedRecords, existingRecordMap, publisherDomain, logger);
466
+ logger.info(`After duplicate check: ${resultRecords.length} records, ${resultRecords.filter((r) => r.has_warning).length} with warnings`);
467
+ }
468
+ return resultRecords;
469
+ }
470
+ /**
471
+ * Log a sample of records for debugging
472
+ */
473
+ function logSampleRecords(records, logger) {
474
+ records.slice(0, 5).forEach((record, i) => {
475
+ if (isAdsTxtRecord(record)) {
476
+ logger.info(`Input record ${i + 1}: domain=${record.domain}, account_id=${record.account_id}, type=${record.account_type}, relationship=${record.relationship}`);
477
+ }
478
+ else if (isAdsTxtVariable(record)) {
479
+ logger.info(`Input variable ${i + 1}: type=${record.variable_type}, value=${record.value}`);
480
+ }
481
+ });
482
+ }
483
+ /**
484
+ * Create a map of existing records for faster lookup
485
+ * Note: This function only works with ParsedAdsTxtRecord entries, not variables
486
+ */
487
+ function createExistingRecordsMap(existingRecords) {
488
+ const existingRecordMap = new Map();
489
+ for (const record of existingRecords) {
490
+ if (record.is_valid) {
491
+ const domainLower = record.domain.toLowerCase().trim();
492
+ const key = `${domainLower}|${record.account_id}|${record.relationship}`;
493
+ existingRecordMap.set(key, record);
494
+ }
495
+ }
496
+ return existingRecordMap;
497
+ }
498
+ /**
499
+ * Create lookup key for a record
500
+ */
501
+ function createLookupKey(record) {
502
+ // Make consistent comparison:
503
+ // - domain: case insensitive (lowercase)
504
+ // - account_id: case sensitive (as is)
505
+ // - relationship: already normalized to DIRECT/RESELLER
506
+ const lowerDomain = record.domain.toLowerCase().trim();
507
+ return `${lowerDomain}|${record.account_id}|${record.relationship}`;
508
+ }
509
+ /**
510
+ * Find and mark duplicate records
511
+ */
512
+ function findDuplicateRecords(records, existingRecordMap, publisherDomain, logger) {
513
+ logger.info(`Checking ${records.length} input records for duplicates against ${existingRecordMap.size} existing records`);
514
+ // Log a sample of map keys for debugging
515
+ const mapKeySample = Array.from(existingRecordMap.keys()).slice(0, 10);
516
+ logger.debug(`Sample of existing map keys: ${JSON.stringify(mapKeySample)}`);
517
+ return records.map((record) => {
518
+ if (!record.is_valid) {
519
+ return record; // Skip invalid records
520
+ }
521
+ // Create lookup key for this record
522
+ const key = createLookupKey(record);
523
+ // Check for exact implimented
524
+ if (existingRecordMap.has(key)) {
525
+ logger.debug(`Found implimented for: ${key}`);
526
+ return createDuplicateWarningRecord(record, publisherDomain, exports.VALIDATION_KEYS.IMPLIMENTED, Severity.INFO);
527
+ }
528
+ return record;
529
+ });
530
+ }
531
+ /**
532
+ * Validate records against sellers.json
533
+ */
534
+ async function validateAgainstSellersJson(publisherDomain, records, getSellersJson, logger, allEntries = [] // Add allEntries parameter to pass all entries including variables
535
+ ) {
536
+ // Cache for sellers.json data and seller ID counts
537
+ const sellersJsonCache = new Map();
538
+ const domainSellerIdCountsMap = new Map();
539
+ // Validate each record in parallel
540
+ const recordsWithSellerValidation = await Promise.all(records.map(async (record) => {
541
+ if (!record.is_valid) {
542
+ return record; // Skip invalid records
543
+ }
544
+ try {
545
+ return await validateSingleRecord(record, publisherDomain, sellersJsonCache, domainSellerIdCountsMap, getSellersJson, logger, allEntries // Pass all entries including variables
546
+ );
547
+ }
548
+ catch (error) {
549
+ logger.error(`Error validating against sellers.json for record (domain=${record.domain}, account_id=${record.account_id}):`, error);
550
+ // Return the original record with error warning
551
+ return createWarningRecord(record, exports.VALIDATION_KEYS.SELLERS_JSON_VALIDATION_ERROR, {
552
+ message: error.message,
553
+ domain: record.domain,
554
+ }, Severity.WARNING, {
555
+ validation_error: error.message,
556
+ });
557
+ }
558
+ }));
559
+ logger.info(`After sellers.json validation: ${recordsWithSellerValidation.length} records, ${recordsWithSellerValidation.filter((r) => r.has_warning).length} with warnings`);
560
+ return recordsWithSellerValidation;
561
+ }
562
+ /**
563
+ * Optimized validation function that uses SellersJsonProvider for efficient queries
564
+ */
565
+ async function validateAgainstSellersJsonOptimized(publisherDomain, records, sellersJsonProviderOrGetSellersJson, logger, allEntries = []) {
566
+ // Check if we have the new optimized provider
567
+ const isOptimizedProvider = typeof sellersJsonProviderOrGetSellersJson === 'object' &&
568
+ 'batchGetSellers' in sellersJsonProviderOrGetSellersJson;
569
+ if (isOptimizedProvider) {
570
+ const provider = sellersJsonProviderOrGetSellersJson;
571
+ return await validateWithOptimizedProvider(publisherDomain, records, provider, logger, allEntries);
572
+ }
573
+ else {
574
+ // Fall back to legacy function for backward compatibility
575
+ const getSellersJson = sellersJsonProviderOrGetSellersJson;
576
+ return await validateAgainstSellersJson(publisherDomain, records, getSellersJson, logger, allEntries);
577
+ }
578
+ }
579
+ /**
580
+ * Validate records using the optimized SellersJsonProvider
581
+ */
582
+ async function validateWithOptimizedProvider(publisherDomain, records, provider, logger, allEntries = []) {
583
+ logger.info(`Starting optimized sellers.json validation for ${records.length} records`);
584
+ // Group records by domain and collect required seller IDs
585
+ const domainToSellerIds = new Map();
586
+ const domainToRecords = new Map();
587
+ records.forEach((record) => {
588
+ if (!record.is_valid)
589
+ return; // Skip invalid records
590
+ const domain = record.domain.toLowerCase();
591
+ // Initialize arrays if not exists
592
+ if (!domainToSellerIds.has(domain)) {
593
+ domainToSellerIds.set(domain, []);
594
+ domainToRecords.set(domain, []);
595
+ }
596
+ // Add seller ID and record to respective maps
597
+ domainToSellerIds.get(domain).push(record.account_id);
598
+ domainToRecords.get(domain).push(record);
599
+ });
600
+ // Batch fetch sellers for all domains
601
+ const domainSellersMap = new Map();
602
+ const domainMetadataMap = new Map();
603
+ for (const [domain, sellerIds] of domainToSellerIds) {
604
+ try {
605
+ logger.info(`Fetching ${sellerIds.length} sellers for domain: ${domain}`);
606
+ // Check if domain has sellers.json first
607
+ const hasSellerJson = await provider.hasSellerJson(domain);
608
+ if (!hasSellerJson) {
609
+ logger.info(`No sellers.json found for domain: ${domain}`);
610
+ domainSellersMap.set(domain, new Map());
611
+ domainMetadataMap.set(domain, {});
612
+ continue;
613
+ }
614
+ // Batch fetch sellers
615
+ const batchResult = await provider.batchGetSellers(domain, sellerIds);
616
+ // Convert to Map for efficient lookup
617
+ const sellersMap = new Map();
618
+ batchResult.results.forEach((result) => {
619
+ if (result.found && result.seller) {
620
+ sellersMap.set(result.sellerId, result.seller);
621
+ }
622
+ });
623
+ domainSellersMap.set(domain, sellersMap);
624
+ domainMetadataMap.set(domain, batchResult.metadata);
625
+ logger.info(`Found ${batchResult.found_count}/${batchResult.requested_count} sellers for domain: ${domain}`);
626
+ }
627
+ catch (error) {
628
+ logger.error(`Error fetching sellers for domain ${domain}:`, error);
629
+ domainSellersMap.set(domain, new Map());
630
+ domainMetadataMap.set(domain, {});
631
+ }
632
+ }
633
+ // Validate each record using the fetched data
634
+ const validatedRecords = await Promise.all(records.map(async (record) => {
635
+ if (!record.is_valid) {
636
+ return record; // Skip invalid records
637
+ }
638
+ const domain = record.domain.toLowerCase();
639
+ const sellersMap = domainSellersMap.get(domain) || new Map();
640
+ const metadata = domainMetadataMap.get(domain) || {};
641
+ return await validateSingleRecordOptimized(record, publisherDomain, sellersMap, metadata, allEntries);
642
+ }));
643
+ logger.info(`After optimized sellers.json validation: ${validatedRecords.length} records, ${validatedRecords.filter((r) => r.has_warning).length} with warnings`);
644
+ return validatedRecords;
645
+ }
646
+ /**
647
+ * Validate a single record using optimized data structures
648
+ */
649
+ async function validateSingleRecordOptimized(record, publisherDomain, sellersMap, metadata, allEntries) {
650
+ // Initialize validation result
651
+ const validationResult = createInitialValidationResult();
652
+ // Check if sellers.json exists for this domain
653
+ validationResult.hasSellerJson = sellersMap.size > 0 || Object.keys(metadata).length > 0;
654
+ if (!validationResult.hasSellerJson) {
655
+ return createWarningRecord(record, exports.VALIDATION_KEYS.NO_SELLERS_JSON, { domain: record.domain }, Severity.WARNING, { validation_results: validationResult });
656
+ }
657
+ // Find matching seller
658
+ const normalizedAccountId = record.account_id.toString().trim();
659
+ const matchingSeller = sellersMap.get(normalizedAccountId);
660
+ validationResult.sellerData = matchingSeller || null;
661
+ // Set account ID match results
662
+ validationResult.directAccountIdInSellersJson = !!matchingSeller;
663
+ // Create seller ID counts map from metadata for uniqueness validation
664
+ const sellerIdCounts = new Map();
665
+ if (metadata.seller_count && metadata.seller_count > 0) {
666
+ // For optimized validation, we assume each seller ID appears once
667
+ // unless we have specific count information
668
+ sellersMap.forEach((seller, sellerId) => {
669
+ sellerIdCounts.set(sellerId, 1);
670
+ });
671
+ }
672
+ // Run relationship-specific validations
673
+ if (record.relationship === 'DIRECT') {
674
+ validateDirectRelationship(validationResult, matchingSeller, publisherDomain, normalizedAccountId, sellerIdCounts, allEntries);
675
+ }
676
+ else if (record.relationship === 'RESELLER') {
677
+ validateResellerRelationship(validationResult, matchingSeller, publisherDomain, normalizedAccountId, sellerIdCounts, allEntries);
678
+ }
679
+ // Generate warnings based on validation results
680
+ const warnings = generateWarnings(record, validationResult, publisherDomain);
681
+ // Add warnings to record if any found
682
+ if (warnings.length > 0) {
683
+ return {
684
+ ...record,
685
+ has_warning: true,
686
+ warning: warnings[0].key, // Primary warning key (legacy)
687
+ warning_params: warnings[0].params, // Parameters for primary warning
688
+ validation_key: warnings[0].key, // New field
689
+ severity: warnings[0].severity || Severity.WARNING, // New field
690
+ all_warnings: warnings, // Store all warnings with params
691
+ validation_results: validationResult, // Store all validation details
692
+ };
693
+ }
694
+ // No warnings, but still attach the validation results
695
+ return {
696
+ ...record,
697
+ validation_results: validationResult,
698
+ };
699
+ }
700
+ // Legacy function removed and replaced with enhanced version at line ~870
701
+ /**
702
+ * Validate a single record against sellers.json
703
+ */
704
+ async function validateSingleRecord(record, publisherDomain, sellersJsonCache, domainSellerIdCountsMap, getSellersJson, logger, allEntries = [] // Add allEntries parameter
705
+ ) {
706
+ // Extract advertising system domain from the record
707
+ const adSystemDomain = record.domain.toLowerCase();
708
+ // Initialize validation result
709
+ const validationResult = createInitialValidationResult();
710
+ // Get sellers.json data
711
+ const sellersJsonData = await getSellersJsonData(adSystemDomain, sellersJsonCache, getSellersJson, validationResult, logger);
712
+ // If no sellers.json available, add warning and return
713
+ if (!sellersJsonData || !Array.isArray(sellersJsonData.sellers)) {
714
+ return createWarningRecord(record, exports.VALIDATION_KEYS.NO_SELLERS_JSON, {
715
+ domain: record.domain,
716
+ }, Severity.WARNING, {
717
+ validation_results: validationResult,
718
+ });
719
+ }
720
+ // Get seller ID counts for this domain
721
+ const sellerIdCounts = getSellerIdCounts(adSystemDomain, domainSellerIdCountsMap, sellersJsonData.sellers);
722
+ // Normalize account ID for comparison
723
+ const normalizedAccountId = record.account_id.toString().trim();
724
+ // Find matching seller record
725
+ const matchingSeller = findMatchingSeller(sellersJsonData.sellers, normalizedAccountId);
726
+ validationResult.sellerData = matchingSeller || null;
727
+ // For DIRECT entries, set the account ID match result
728
+ // This will be used for Case 12 (DIRECT) - later logic will handle Case 17 (RESELLER)
729
+ validationResult.directAccountIdInSellersJson = !!matchingSeller;
730
+ // Run relationship-specific validations
731
+ if (record.relationship === 'DIRECT') {
732
+ validateDirectRelationship(validationResult, matchingSeller, publisherDomain, normalizedAccountId, sellerIdCounts, allEntries // Pass all entries including variables
733
+ );
734
+ }
735
+ else if (record.relationship === 'RESELLER') {
736
+ validateResellerRelationship(validationResult, matchingSeller, publisherDomain, normalizedAccountId, sellerIdCounts, allEntries // Pass all entries including variables
737
+ );
738
+ }
739
+ // Generate warnings based on validation results
740
+ const warnings = generateWarnings(record, validationResult, publisherDomain);
741
+ // Add warnings to record if any found
742
+ if (warnings.length > 0) {
743
+ return {
744
+ ...record,
745
+ has_warning: true,
746
+ warning: warnings[0].key, // Primary warning key (legacy)
747
+ warning_params: warnings[0].params, // Parameters for primary warning
748
+ validation_key: warnings[0].key, // New field
749
+ severity: warnings[0].severity || Severity.WARNING, // New field
750
+ all_warnings: warnings, // Store all warnings with params
751
+ validation_results: validationResult, // Store all validation details
752
+ };
753
+ }
754
+ // No warnings, but still attach the validation results
755
+ return {
756
+ ...record,
757
+ validation_results: validationResult,
758
+ };
759
+ }
760
+ /**
761
+ * Create initial validation result object
762
+ */
763
+ function createInitialValidationResult() {
764
+ return {
765
+ hasSellerJson: false,
766
+ directAccountIdInSellersJson: false,
767
+ directDomainMatchesSellerJsonEntry: null,
768
+ directEntryHasPublisherType: null,
769
+ directSellerIdIsUnique: null, // Changed from false to null to indicate unknown state
770
+ resellerAccountIdInSellersJson: null,
771
+ resellerDomainMatchesSellerJsonEntry: null, // Added for Case 18
772
+ resellerEntryHasIntermediaryType: null,
773
+ resellerSellerIdIsUnique: null,
774
+ };
775
+ }
776
+ /**
777
+ * Get sellers.json data for a domain
778
+ */
779
+ async function getSellersJsonData(adSystemDomain, sellersJsonCache, getSellersJson, validationResult, logger) {
780
+ if (sellersJsonCache.has(adSystemDomain)) {
781
+ return sellersJsonCache.get(adSystemDomain);
782
+ }
783
+ logger.info(`Fetching sellers.json for domain: ${adSystemDomain}`);
784
+ const sellersJsonData = await getSellersJson(adSystemDomain);
785
+ if (sellersJsonData) {
786
+ validationResult.hasSellerJson = true;
787
+ }
788
+ else {
789
+ validationResult.hasSellerJson = false;
790
+ }
791
+ // Cache the result
792
+ sellersJsonCache.set(adSystemDomain, sellersJsonData);
793
+ return sellersJsonData;
794
+ }
795
+ /**
796
+ * Get or create seller ID counts map for a domain
797
+ */
798
+ function getSellerIdCounts(adSystemDomain, domainSellerIdCountsMap, sellers) {
799
+ if (domainSellerIdCountsMap.has(adSystemDomain)) {
800
+ return domainSellerIdCountsMap.get(adSystemDomain);
801
+ }
802
+ // Create a new counts map for this domain
803
+ const sellerIdCounts = new Map();
804
+ // Count seller IDs
805
+ sellers.forEach((seller) => {
806
+ if (seller.seller_id) {
807
+ const currentId = seller.seller_id.toString().trim();
808
+ sellerIdCounts.set(currentId, (sellerIdCounts.get(currentId) || 0) + 1);
809
+ }
810
+ });
811
+ // Store in domain map
812
+ domainSellerIdCountsMap.set(adSystemDomain, sellerIdCounts);
813
+ return sellerIdCounts;
814
+ }
815
+ /**
816
+ * Find a matching seller record in sellers.json
817
+ */
818
+ function findMatchingSeller(sellers, normalizedAccountId) {
819
+ return sellers.find((seller) => seller.seller_id && seller.seller_id.toString().trim() === normalizedAccountId);
820
+ }
821
+ /**
822
+ * Extract and normalize domain values from variable entries
823
+ * @param variableEntries Array of variable entries from the ads.txt
824
+ * @param variableType Type of variable to extract (OWNERDOMAIN or MANAGERDOMAIN)
825
+ * @returns Array of domain values from the specified variable
826
+ */
827
+ function extractDomainsFromVariables(parsedEntries, variableType) {
828
+ const variableEntries = parsedEntries
829
+ .filter(isAdsTxtVariable)
830
+ .filter((entry) => entry.variable_type === variableType);
831
+ return variableEntries.map((entry) => {
832
+ // For MANAGERDOMAIN, it can be in format "domain" or "domain,CountryCode"
833
+ if (variableType === 'MANAGERDOMAIN' && entry.value.includes(',')) {
834
+ // Return only the domain part before comma
835
+ return entry.value.split(',')[0].toLowerCase().trim();
836
+ }
837
+ return entry.value.toLowerCase().trim();
838
+ });
839
+ }
840
+ /**
841
+ * Validate a DIRECT relationship
842
+ */
843
+ function validateDirectRelationship(validationResult, matchingSeller, publisherDomain, normalizedAccountId, sellerIdCounts, parsedEntries = [] // Added parsedEntries parameter
844
+ ) {
845
+ // Reset RESELLER-specific fields
846
+ validationResult.resellerAccountIdInSellersJson = null;
847
+ validationResult.resellerDomainMatchesSellerJsonEntry = null; // Reset Case 18 field
848
+ validationResult.resellerEntryHasIntermediaryType = null;
849
+ validationResult.resellerSellerIdIsUnique = null;
850
+ if (matchingSeller) {
851
+ // Case 13: For DIRECT entries, check if seller domain matches OWNERDOMAIN or MANAGERDOMAIN
852
+ if (matchingSeller.is_confidential === 1 || !matchingSeller.domain) {
853
+ validationResult.directDomainMatchesSellerJsonEntry = null; // Confidential or no domain
854
+ }
855
+ else {
856
+ // Get OWNERDOMAIN and MANAGERDOMAIN values from variables
857
+ const ownerDomains = extractDomainsFromVariables(parsedEntries, 'OWNERDOMAIN');
858
+ const managerDomains = extractDomainsFromVariables(parsedEntries, 'MANAGERDOMAIN');
859
+ // Normalize seller domain
860
+ const sellerDomainLower = matchingSeller.domain.toLowerCase().trim();
861
+ // Check if seller domain matches any OWNERDOMAIN or MANAGERDOMAIN
862
+ const matchesOwnerDomain = ownerDomains.some((domain) => domain === sellerDomainLower);
863
+ const matchesManagerDomain = managerDomains.some((domain) => domain === sellerDomainLower);
864
+ validationResult.directDomainMatchesSellerJsonEntry =
865
+ matchesOwnerDomain || matchesManagerDomain;
866
+ // If no OWNERDOMAIN or MANAGERDOMAIN variables found, fall back to original behavior
867
+ if (ownerDomains.length === 0 && managerDomains.length === 0) {
868
+ // Compare publisher domain with seller domain (case insensitive)
869
+ const publisherDomainLower = publisherDomain.toLowerCase();
870
+ validationResult.directDomainMatchesSellerJsonEntry =
871
+ publisherDomainLower === sellerDomainLower;
872
+ }
873
+ }
874
+ // Case 14: For DIRECT entries, check if seller_type is PUBLISHER
875
+ const sellerType = matchingSeller.seller_type?.toUpperCase() || '';
876
+ validationResult.directEntryHasPublisherType =
877
+ sellerType === 'PUBLISHER' || sellerType === 'BOTH';
878
+ // Case 15: Check if seller_id is unique in the file
879
+ if (sellerIdCounts.has(normalizedAccountId)) {
880
+ const count = sellerIdCounts.get(normalizedAccountId);
881
+ validationResult.directSellerIdIsUnique = count === 1;
882
+ console.log(`Seller ID ${normalizedAccountId} appears ${count} times, unique: ${validationResult.directSellerIdIsUnique}`);
883
+ }
884
+ else {
885
+ // This should not happen if we found a matching seller
886
+ console.warn(`Seller ID ${normalizedAccountId} not found in counts map`);
887
+ validationResult.directSellerIdIsUnique = null;
888
+ }
889
+ }
890
+ else {
891
+ // If no matching seller found, we can't determine uniqueness
892
+ validationResult.directSellerIdIsUnique = null;
893
+ }
894
+ }
895
+ /**
896
+ * Validate a RESELLER relationship
897
+ */
898
+ function validateResellerRelationship(validationResult, matchingSeller, publisherDomain, normalizedAccountId, sellerIdCounts, parsedEntries = [] // Added parsedEntries parameter
899
+ ) {
900
+ // Reset DIRECT-specific fields
901
+ validationResult.directEntryHasPublisherType = null;
902
+ validationResult.directDomainMatchesSellerJsonEntry = null;
903
+ validationResult.directSellerIdIsUnique = null; // Reset Case 15 field
904
+ // Case 17: For RESELLER entries, check if account_id is in sellers.json
905
+ validationResult.resellerAccountIdInSellersJson = !!matchingSeller;
906
+ // Case 18: For RESELLER entries, check if seller domain matches OWNERDOMAIN or MANAGERDOMAIN
907
+ if (matchingSeller) {
908
+ if (matchingSeller.is_confidential === 1 || !matchingSeller.domain) {
909
+ validationResult.resellerDomainMatchesSellerJsonEntry = null; // Confidential or no domain
910
+ }
911
+ else {
912
+ // Get OWNERDOMAIN and MANAGERDOMAIN values from variables
913
+ const ownerDomains = extractDomainsFromVariables(parsedEntries, 'OWNERDOMAIN');
914
+ const managerDomains = extractDomainsFromVariables(parsedEntries, 'MANAGERDOMAIN');
915
+ // Normalize seller domain
916
+ const sellerDomainLower = matchingSeller.domain.toLowerCase().trim();
917
+ // Check if seller domain matches any OWNERDOMAIN or MANAGERDOMAIN
918
+ const matchesOwnerDomain = ownerDomains.some((domain) => domain === sellerDomainLower);
919
+ const matchesManagerDomain = managerDomains.some((domain) => domain === sellerDomainLower);
920
+ validationResult.resellerDomainMatchesSellerJsonEntry =
921
+ matchesOwnerDomain || matchesManagerDomain;
922
+ // If no OWNERDOMAIN or MANAGERDOMAIN variables found, fall back to original behavior
923
+ if (ownerDomains.length === 0 && managerDomains.length === 0) {
924
+ // Compare publisher domain with seller domain (case insensitive)
925
+ const publisherDomainLower = publisherDomain.toLowerCase();
926
+ validationResult.resellerDomainMatchesSellerJsonEntry =
927
+ publisherDomainLower === sellerDomainLower;
928
+ }
929
+ }
930
+ }
931
+ if (matchingSeller) {
932
+ // Case 19: For RESELLER entries, check if seller_type is INTERMEDIARY
933
+ const sellerType = matchingSeller.seller_type?.toUpperCase() || '';
934
+ validationResult.resellerEntryHasIntermediaryType =
935
+ sellerType === 'INTERMEDIARY' || sellerType === 'BOTH';
936
+ // Case 20: Check if seller_id is unique in the file
937
+ if (sellerIdCounts.has(normalizedAccountId)) {
938
+ const count = sellerIdCounts.get(normalizedAccountId);
939
+ validationResult.resellerSellerIdIsUnique = count === 1;
940
+ console.log(`Reseller ID ${normalizedAccountId} appears ${count} times, unique: ${validationResult.resellerSellerIdIsUnique}`);
941
+ }
942
+ else {
943
+ // This should not happen if we found a matching seller
944
+ console.warn(`Reseller ID ${normalizedAccountId} not found in counts map`);
945
+ validationResult.resellerSellerIdIsUnique = null;
946
+ }
947
+ }
948
+ else {
949
+ validationResult.resellerEntryHasIntermediaryType = null;
950
+ validationResult.resellerSellerIdIsUnique = null; // Changed from false to null
951
+ }
952
+ }
953
+ /**
954
+ * Create a warning object with key, parameters and severity
955
+ */
956
+ function createWarning(validationKey, params = {}, severity = Severity.WARNING) {
957
+ return {
958
+ key: validationKey,
959
+ params,
960
+ severity,
961
+ };
962
+ }
963
+ /**
964
+ * Generate warnings based on validation results
965
+ */
966
+ function generateWarnings(record, validationResult, publisherDomain) {
967
+ const warnings = [];
968
+ // Case 11/16: Missing sellers.json
969
+ if (!validationResult.hasSellerJson) {
970
+ warnings.push(createWarning(exports.VALIDATION_KEYS.NO_SELLERS_JSON, { domain: record.domain }));
971
+ return warnings; // Return early if no sellers.json - don't add other warnings
972
+ }
973
+ // Case 12/17 Account ID not found
974
+ if (!validationResult.directAccountIdInSellersJson) {
975
+ if (record.relationship === 'DIRECT') {
976
+ warnings.push(createWarning(exports.VALIDATION_KEYS.DIRECT_ACCOUNT_ID_NOT_IN_SELLERS_JSON, {
977
+ domain: record.domain,
978
+ account_id: record.account_id,
979
+ }));
980
+ }
981
+ else {
982
+ warnings.push(createWarning(exports.VALIDATION_KEYS.RESELLER_ACCOUNT_ID_NOT_IN_SELLERS_JSON, {
983
+ domain: record.domain,
984
+ account_id: record.account_id,
985
+ }));
986
+ }
987
+ // Skip further checks that require a match if account ID not found
988
+ return warnings;
989
+ }
990
+ // Case 13: Domain mismatch for DIRECT - now checks against OWNERDOMAIN and MANAGERDOMAIN as well
991
+ if (record.relationship === 'DIRECT' &&
992
+ validationResult.directDomainMatchesSellerJsonEntry === false) {
993
+ warnings.push(createWarning(exports.VALIDATION_KEYS.DOMAIN_MISMATCH, {
994
+ domain: record.domain,
995
+ publisher_domain: publisherDomain,
996
+ seller_domain: validationResult.sellerData?.domain || 'unknown',
997
+ }));
998
+ }
999
+ // Case 18: Domain mismatch for RESELLER - checks against OWNERDOMAIN and MANAGERDOMAIN as well
1000
+ // ただし、RESELLERの場合はINTERMEDIARYまたはBOTHの場合はドメインが一致しなくても許容する
1001
+ if (record.relationship === 'RESELLER' &&
1002
+ validationResult.resellerDomainMatchesSellerJsonEntry === false &&
1003
+ validationResult.sellerData &&
1004
+ validationResult.sellerData.seller_type &&
1005
+ !['INTERMEDIARY', 'BOTH'].includes(validationResult.sellerData.seller_type.toUpperCase())) {
1006
+ warnings.push(createWarning(exports.VALIDATION_KEYS.DOMAIN_MISMATCH, {
1007
+ domain: record.domain,
1008
+ publisher_domain: publisherDomain,
1009
+ seller_domain: validationResult.sellerData?.domain || 'unknown',
1010
+ }));
1011
+ }
1012
+ // Case 14: DIRECT entry not marked as PUBLISHER
1013
+ if (record.relationship === 'DIRECT' && validationResult.directEntryHasPublisherType === false) {
1014
+ warnings.push(createWarning(exports.VALIDATION_KEYS.DIRECT_NOT_PUBLISHER, {
1015
+ domain: record.domain,
1016
+ account_id: record.account_id,
1017
+ seller_type: validationResult.sellerData?.seller_type || 'unknown',
1018
+ }));
1019
+ }
1020
+ // Case 5/8: Seller ID not unique
1021
+ const hasDuplicateDirectSellerId = record.relationship === 'DIRECT' &&
1022
+ validationResult.directAccountIdInSellersJson &&
1023
+ validationResult.directSellerIdIsUnique === false;
1024
+ const hasDuplicateResellerSellerId = record.relationship === 'RESELLER' &&
1025
+ validationResult.resellerAccountIdInSellersJson &&
1026
+ validationResult.resellerSellerIdIsUnique === false;
1027
+ if (hasDuplicateDirectSellerId || hasDuplicateResellerSellerId) {
1028
+ console.log(`Adding SELLER_ID_NOT_UNIQUE warning for ${record.account_id} in ${record.domain}`);
1029
+ warnings.push(createWarning(exports.VALIDATION_KEYS.SELLER_ID_NOT_UNIQUE, {
1030
+ domain: record.domain,
1031
+ account_id: record.account_id,
1032
+ }));
1033
+ }
1034
+ // Case 19: RESELLER entry not marked as INTERMEDIARY
1035
+ if (record.relationship === 'RESELLER' &&
1036
+ validationResult.directAccountIdInSellersJson &&
1037
+ validationResult.resellerEntryHasIntermediaryType === false) {
1038
+ warnings.push(createWarning(exports.VALIDATION_KEYS.RESELLER_NOT_INTERMEDIARY, {
1039
+ domain: record.domain,
1040
+ account_id: record.account_id,
1041
+ seller_type: validationResult.sellerData?.seller_type || 'unknown',
1042
+ }));
1043
+ }
1044
+ return warnings;
1045
+ }
1046
+ /**
1047
+ * Ads.txt Level 1 Optimization
1048
+ * Optimizes ads.txt content by:
1049
+ * 1. Removing duplicates
1050
+ * 2. Standardizing format
1051
+ * 3. Preserving comments and variables
1052
+ *
1053
+ * @param content - The original ads.txt content
1054
+ * @param publisherDomain - Optional publisher domain for OWNERDOMAIN default
1055
+ * @returns Optimized ads.txt content as a string
1056
+ */
1057
+ function optimizeAdsTxt(content, publisherDomain) {
1058
+ // キャパシティの大きいファイルも処理できるよう、ストリーム的に処理
1059
+ const lines = content.split('\n');
1060
+ // 重複検出のためのマップを準備
1061
+ const uniqueRecordMap = new Map();
1062
+ const uniqueVariableMap = new Map();
1063
+ const comments = [];
1064
+ const parsedEntries = []; // 一時的なストレージ
1065
+ let hasOwnerDomain = false;
1066
+ // ストリーム的に処理しながら重複を検出して除外
1067
+ lines.forEach((line, index) => {
1068
+ try {
1069
+ const trimmedLine = line.trim();
1070
+ // コメント行を記録
1071
+ if (trimmedLine.startsWith('#')) {
1072
+ comments.push({ index, text: line });
1073
+ return;
1074
+ }
1075
+ // 空行は無視
1076
+ if (!trimmedLine) {
1077
+ return;
1078
+ }
1079
+ // エントリを解析
1080
+ const parsedEntry = parseAdsTxtLine(line, index + 1);
1081
+ if (!parsedEntry)
1082
+ return; // 解析できない行は無視
1083
+ if (!parsedEntry.is_valid)
1084
+ return; // 無効なエントリは無視
1085
+ // 変数エントリの場合
1086
+ if (isAdsTxtVariable(parsedEntry)) {
1087
+ // OWNERDOMAINの有無を確認
1088
+ if (parsedEntry.variable_type === 'OWNERDOMAIN') {
1089
+ hasOwnerDomain = true;
1090
+ }
1091
+ // 変数の重複を検出(同じタイプと値の組み合わせ)
1092
+ const key = `${parsedEntry.variable_type}|${parsedEntry.value.toLowerCase()}`;
1093
+ // まだ見たことがない変数なら追加
1094
+ if (!uniqueVariableMap.has(key)) {
1095
+ uniqueVariableMap.set(key, parsedEntry);
1096
+ parsedEntries.push(parsedEntry);
1097
+ }
1098
+ }
1099
+ // レコードエントリの場合
1100
+ else if (isAdsTxtRecord(parsedEntry)) {
1101
+ // レコードの重複を検出(ドメイン、アカウントID、関係の組み合わせ)
1102
+ try {
1103
+ const key = `${parsedEntry.domain.toLowerCase()}|${parsedEntry.account_id}|${parsedEntry.relationship}`;
1104
+ // まだ見たことがないレコードなら追加
1105
+ if (!uniqueRecordMap.has(key)) {
1106
+ uniqueRecordMap.set(key, parsedEntry);
1107
+ parsedEntries.push(parsedEntry);
1108
+ }
1109
+ }
1110
+ catch (error) {
1111
+ // エラーが発生したエントリは無視して続行
1112
+ const errorMsg = error instanceof Error ? error.message : String(error);
1113
+ console.error(`Error processing record at line ${index + 1}: ${errorMsg}`);
1114
+ }
1115
+ }
1116
+ }
1117
+ catch (error) {
1118
+ // 行単位の処理中にエラーが発生しても、次の行の処理を続行
1119
+ const errorMsg = error instanceof Error ? error.message : String(error);
1120
+ console.error(`Error at line ${index + 1}: ${errorMsg}`);
1121
+ }
1122
+ });
1123
+ // パブリッシャードメインが提供されていて、OWNERDOMAINが指定されていない場合はデフォルト値を追加
1124
+ if (publisherDomain && !hasOwnerDomain) {
1125
+ try {
1126
+ const parsed = psl.parse(publisherDomain);
1127
+ const rootDomain = typeof parsed === 'object' && 'domain' in parsed ? parsed.domain : null;
1128
+ if (rootDomain) {
1129
+ // OWNERDOMAINのデフォルト値を作成
1130
+ const defaultOwnerDomain = {
1131
+ variable_type: 'OWNERDOMAIN',
1132
+ value: rootDomain,
1133
+ line_number: -1, // Use -1 to indicate it's a default/generated value
1134
+ raw_line: `OWNERDOMAIN=${rootDomain}`,
1135
+ is_variable: true,
1136
+ is_valid: true,
1137
+ };
1138
+ parsedEntries.push(defaultOwnerDomain);
1139
+ }
1140
+ }
1141
+ catch (error) {
1142
+ console.error(`Could not parse domain for default OWNERDOMAIN: ${publisherDomain}`, error);
1143
+ }
1144
+ }
1145
+ // Sort entries:
1146
+ // 1. Variables first (sorted by variable_type)
1147
+ // 2. Records after (sorted by domain)
1148
+ parsedEntries.sort((a, b) => {
1149
+ // If both are variables, sort by variable_type
1150
+ if (isAdsTxtVariable(a) && isAdsTxtVariable(b)) {
1151
+ return a.variable_type.localeCompare(b.variable_type);
1152
+ }
1153
+ // Variables come before records
1154
+ if (isAdsTxtVariable(a) && isAdsTxtRecord(b)) {
1155
+ return -1;
1156
+ }
1157
+ // Records come after variables
1158
+ if (isAdsTxtRecord(a) && isAdsTxtVariable(b)) {
1159
+ return 1;
1160
+ }
1161
+ // If both are records, sort by domain
1162
+ if (isAdsTxtRecord(a) && isAdsTxtRecord(b)) {
1163
+ return a.domain.localeCompare(b.domain);
1164
+ }
1165
+ return 0;
1166
+ });
1167
+ // Generate optimized output
1168
+ const optimizedLines = [];
1169
+ // Add initial comment if one exists
1170
+ if (comments.length > 0 && comments[0].index === 0) {
1171
+ optimizedLines.push(comments[0].text);
1172
+ comments.shift(); // Remove the first comment as it's been added
1173
+ }
1174
+ // Add empty line after header comment if there was one
1175
+ if (optimizedLines.length > 0) {
1176
+ optimizedLines.push('');
1177
+ }
1178
+ // Add variables in standardized format
1179
+ const variableEntries = parsedEntries.filter(isAdsTxtVariable);
1180
+ if (variableEntries.length > 0) {
1181
+ // Group variables by type and sort them
1182
+ const groupedVariables = new Map();
1183
+ variableEntries.forEach((variable) => {
1184
+ const group = groupedVariables.get(variable.variable_type) || [];
1185
+ group.push(variable);
1186
+ groupedVariables.set(variable.variable_type, group);
1187
+ });
1188
+ // Process each variable type group
1189
+ Array.from(groupedVariables.keys())
1190
+ .sort()
1191
+ .forEach((variableType) => {
1192
+ const variables = groupedVariables.get(variableType);
1193
+ // Add a comment header for each variable type group
1194
+ optimizedLines.push(`# ${variableType} Variables`);
1195
+ // Add the variables in standardized format
1196
+ variables.forEach((variable) => {
1197
+ optimizedLines.push(`${variable.variable_type}=${variable.value}`);
1198
+ });
1199
+ // Add an empty line after each variable type group
1200
+ optimizedLines.push('');
1201
+ });
1202
+ }
1203
+ // Add record entries in standardized format
1204
+ const recordEntries = parsedEntries.filter(isAdsTxtRecord);
1205
+ // Always add a header for records section, even if there are no records
1206
+ optimizedLines.push('# Advertising System Records');
1207
+ if (recordEntries.length > 0) {
1208
+ // Group records by domain and sort them
1209
+ const groupedRecords = new Map();
1210
+ recordEntries.forEach((record) => {
1211
+ const domainLower = record.domain.toLowerCase().trim();
1212
+ const group = groupedRecords.get(domainLower) || [];
1213
+ group.push(record);
1214
+ groupedRecords.set(domainLower, group);
1215
+ });
1216
+ // Process each domain group
1217
+ Array.from(groupedRecords.keys())
1218
+ .sort()
1219
+ .forEach((domain) => {
1220
+ const records = groupedRecords.get(domain);
1221
+ // Sort records within the same domain by relationship (DIRECT first)
1222
+ records.sort((a, b) => {
1223
+ if (a.relationship === 'DIRECT' && b.relationship === 'RESELLER') {
1224
+ return -1;
1225
+ }
1226
+ if (a.relationship === 'RESELLER' && b.relationship === 'DIRECT') {
1227
+ return 1;
1228
+ }
1229
+ return a.account_id.localeCompare(b.account_id);
1230
+ });
1231
+ // Add the records in standardized format
1232
+ records.forEach((record) => {
1233
+ try {
1234
+ let line = `${record.domain}, ${record.account_id}, ${record.relationship}`;
1235
+ if (record.certification_authority_id) {
1236
+ line += `, ${record.certification_authority_id}`;
1237
+ }
1238
+ // 注意: certification_authority_id の補完機能は同期関数では実装が難しいため、
1239
+ // この関数ではレコードに既に含まれている certification_authority_id のみ処理します。
1240
+ // TAG-ID の補完処理はコントローラーの generateAdsTxtContent で行われるべきです。
1241
+ optimizedLines.push(line);
1242
+ }
1243
+ catch (error) {
1244
+ // エラーが発生したレコードは無視して続行
1245
+ const errorMsg = error instanceof Error ? error.message : String(error);
1246
+ console.error(`Error formatting record: ${errorMsg}`);
1247
+ }
1248
+ });
1249
+ });
1250
+ }
1251
+ // Join all lines and return the optimized content
1252
+ return optimizedLines.join('\n');
1253
+ }
1254
+ /**
1255
+ * Check if an email address is valid
1256
+ * @param email - The email address to validate
1257
+ * @returns Boolean indicating if the email is valid
1258
+ */
1259
+ function isValidEmail(email) {
1260
+ // More comprehensive email validation
1261
+ const emailRegex = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
1262
+ // Check for invalid email patterns first
1263
+ if (!email ||
1264
+ email.includes('..') ||
1265
+ email.includes(' ') ||
1266
+ !email.includes('@') ||
1267
+ email.indexOf('@') === 0 ||
1268
+ email.indexOf('@') === email.length - 1 ||
1269
+ !email.includes('.', email.indexOf('@'))) {
1270
+ return false;
1271
+ }
1272
+ return emailRegex.test(email);
1273
+ }