@yglin/tw-env-records 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/lib/authentication.d.ts +8 -0
  2. package/lib/configuration.d.ts +34 -1
  3. package/lib/{jobs → google}/gemini-client-factory.d.ts +5 -8
  4. package/lib/hooks/after-askai-create.d.ts +2 -0
  5. package/lib/hooks/before-askai-create.d.ts +2 -0
  6. package/lib/internal/duplicate-check-batch-endpoints.d.ts +2 -0
  7. package/lib/jobs/audit-batch-process.d.ts +2 -8
  8. package/lib/jobs/audit-batch-state.d.ts +4 -0
  9. package/lib/jobs/audit-batch-submit.d.ts +2 -10
  10. package/lib/jobs/audit-batch.d.ts +3 -3
  11. package/lib/jobs/crawler-batch-process.d.ts +5 -14
  12. package/lib/jobs/crawler-batch-state.d.ts +33 -2
  13. package/lib/jobs/crawler-batch-submit.d.ts +2 -12
  14. package/lib/jobs/crawler-batch.d.ts +5 -6
  15. package/lib/jobs/crawler-prompt.d.ts +6 -0
  16. package/lib/jobs/duplicate-check/find-candidates.d.ts +29 -0
  17. package/lib/jobs/duplicate-check/index.d.ts +6 -0
  18. package/lib/jobs/duplicate-check/log-failed-backup.d.ts +17 -0
  19. package/lib/jobs/duplicate-check/merge-records.d.ts +20 -0
  20. package/lib/jobs/duplicate-check/prompt.d.ts +12 -0
  21. package/lib/jobs/duplicate-check-batch-process.d.ts +27 -0
  22. package/lib/jobs/duplicate-check-batch-state.d.ts +52 -0
  23. package/lib/jobs/duplicate-check-batch-submit.d.ts +26 -0
  24. package/lib/jobs/duplicate-check-batch.d.ts +41 -0
  25. package/lib/jobs/shared/batch-common.d.ts +90 -0
  26. package/lib/jobs/shared/batch-orchestration.d.ts +79 -0
  27. package/lib/jobs/shared/batch-processing.d.ts +37 -0
  28. package/lib/jobs/shared/batch-request.d.ts +54 -0
  29. package/lib/jobs/shared/batch-state.d.ts +72 -0
  30. package/lib/jobs/shared/index.d.ts +9 -0
  31. package/lib/logger.d.ts +2 -0
  32. package/lib/services/batch-jobs/batch-jobs.schema.d.ts +218 -136
  33. package/lib/services/meta/ask-ai-statistics.d.ts +5 -0
  34. package/lib/services/record/record.class.d.ts +49 -0
  35. package/lib/services/record/record.schema.d.ts +134 -4
  36. package/lib/services/users/users.class.d.ts +11 -0
  37. package/lib/services/users/users.d.ts +11 -0
  38. package/lib/services/users/users.schema.d.ts +356 -0
  39. package/lib/services/users/users.shared.d.ts +13 -0
  40. package/lib/services/users/users.shared.js +13 -0
  41. package/package.json +5 -1
  42. package/lib/maids/collate-place-names.d.ts +0 -1
  43. package/lib/maids/fix-place-names.d.ts +0 -7
  44. package/lib/maids/full-database-analyze.d.ts +0 -1
  45. package/lib/maids/geocode.d.ts +0 -4
@@ -0,0 +1,8 @@
1
+ import { AuthenticationService } from '@feathersjs/authentication';
2
+ import type { Application } from './declarations';
3
+ declare module './declarations' {
4
+ interface ServiceTypes {
5
+ authentication: AuthenticationService;
6
+ }
7
+ }
8
+ export declare const authentication: (app: Application) => void;
@@ -114,10 +114,13 @@ export declare const configurationSchema: import("@sinclair/typebox").TIntersect
114
114
  askAi: import("@sinclair/typebox").TObject<{
115
115
  ai_model: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
116
116
  ai_api_key: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
117
- recordsLimit: import("@sinclair/typebox").TNumber;
117
+ recordsLimit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
118
+ maxRequestsPerDay: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
119
+ keepDailyStatsDays: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
118
120
  }>;
119
121
  }>;
120
122
  jobs: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
123
+ internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
121
124
  crawler: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
122
125
  enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
123
126
  schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
@@ -184,6 +187,36 @@ export declare const configurationSchema: import("@sinclair/typebox").TIntersect
184
187
  }>>;
185
188
  internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
186
189
  }>>;
190
+ duplicateCheckBatch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
191
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
192
+ timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
193
+ max_records_per_window: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
194
+ max_candidates_per_record: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
195
+ similarity_threshold: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
196
+ schedules: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
197
+ submit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
198
+ process: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
199
+ }>>;
200
+ batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
201
+ use_file_input: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
202
+ requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
203
+ display_name_prefix: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
204
+ }>>;
205
+ processing: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
206
+ max_retry_attempts: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
207
+ retry_interval_ms: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
208
+ confidence_threshold: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
209
+ }>>;
210
+ merge: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
211
+ story_min_length: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
212
+ }>>;
213
+ backup_failure_log: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
214
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
215
+ path: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
216
+ retention_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
217
+ }>>;
218
+ internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
219
+ }>>;
187
220
  }>>;
188
221
  }>]>;
189
222
  export type ApplicationConfiguration = Static<typeof configurationSchema>;
@@ -14,13 +14,10 @@ export interface IGeminiClient {
14
14
  }
15
15
  /**
16
16
  * Default configuration applied to all Gemini requests
17
- * Includes Google Search grounding tool
17
+ * NOTE: Google Search grounding is NOT included because the Batch API
18
+ * does not return grounding metadata, making it ineffective for batch jobs.
18
19
  */
19
- export declare const GEMINI_DEFAULT_CONFIG: {
20
- tools: {
21
- googleSearch: {};
22
- }[];
23
- };
20
+ export declare const GEMINI_DEFAULT_CONFIG: {};
24
21
  /**
25
22
  * Default Gemini model
26
23
  */
@@ -44,8 +41,8 @@ declare class GeminiClientFactory {
44
41
  */
45
42
  getModel(): string;
46
43
  /**
47
- * Get default configuration with Google Search grounding
48
- * Use this in all generateContent calls
44
+ * Get default configuration for Gemini API requests
45
+ * NOTE: Does not include Google Search grounding since Batch API doesn't support it
49
46
  */
50
47
  getDefaultConfig(): typeof GEMINI_DEFAULT_CONFIG;
51
48
  /**
@@ -0,0 +1,2 @@
1
+ import type { HookContext } from '@feathersjs/feathers';
2
+ export declare const afterAskAiCreate: (context: HookContext) => Promise<HookContext<import("@feathersjs/feathers").Application<any, any>, any>>;
@@ -0,0 +1,2 @@
1
+ import type { HookContext } from '@feathersjs/feathers';
2
+ export declare const beforeAskAiCreate: (context: HookContext) => Promise<HookContext<import("@feathersjs/feathers").Application<any, any>, any>>;
@@ -0,0 +1,2 @@
1
+ import { Application } from '../declarations';
2
+ export declare const duplicateCheckBatchEndpoints: (app: Application) => void;
@@ -1,15 +1,9 @@
1
1
  import { Application } from '../declarations';
2
- import { type IGeminiClient } from './gemini-client-factory';
3
- export interface ProcessStats {
4
- processed: number;
2
+ import { GenericBatchProcessStats } from './shared/index';
3
+ export interface ProcessStats extends GenericBatchProcessStats {
5
4
  recordsUpdated: number;
6
5
  recordsSkipped: number;
7
- errors: number;
8
6
  }
9
- /**
10
- * Download and parse results from a completed batch job
11
- */
12
- export declare function downloadAndParseResults(client: IGeminiClient, jobName: string, isFileResult: boolean, resultFileName?: string): Promise<any[]>;
13
7
  /**
14
8
  * Process all pending audit batch jobs
15
9
  */
@@ -36,3 +36,7 @@ export declare function updateCleanupTime(): void;
36
36
  * WARNING: Only use in tests!
37
37
  */
38
38
  export declare function resetStateForTesting(): void;
39
+ /**
40
+ * Reconstruct state from database on service startup
41
+ */
42
+ export declare function reconstructFromDatabase(batchJobsService: any): Promise<void>;
@@ -1,20 +1,12 @@
1
1
  import { Application } from '../declarations';
2
- export interface SubmitStats {
3
- submitted: number;
2
+ import { GenericBatchSubmitStats } from './shared/index';
3
+ export interface SubmitStats extends GenericBatchSubmitStats {
4
4
  recordsQueued: number;
5
- jobNames: string[];
6
5
  }
7
6
  /**
8
7
  * Build the audit prompt for a single record
9
8
  */
10
9
  export declare function buildAuditPrompt(record: any): string;
11
- /**
12
- * Create a batch request object for audit
13
- */
14
- /**
15
- * Create a batch request object for audit
16
- */
17
- export declare function createBatchRequest(prompt: string, recordId: number): any;
18
10
  /**
19
11
  * Submit a new audit batch job
20
12
  */
@@ -1,9 +1,8 @@
1
1
  import { Application } from '../declarations';
2
2
  import { type SubmitStats } from './audit-batch-submit';
3
3
  import { type ProcessStats } from './audit-batch-process';
4
- export interface CleanupStats {
5
- removed: number;
6
- }
4
+ import { getBatchAuditState, reconstructFromDatabase } from './audit-batch-state';
5
+ import { type CleanupStats } from './shared/index';
7
6
  export interface CycleStats {
8
7
  submit: SubmitStats;
9
8
  process: ProcessStats;
@@ -34,3 +33,4 @@ export declare function getAuditBatchStatus(app: Application): Promise<{
34
33
  * Run a complete audit batch cycle (submit + process + cleanup)
35
34
  */
36
35
  export declare function runAuditBatchCycle(app: Application): Promise<CycleStats>;
36
+ export { getBatchAuditState, reconstructFromDatabase };
@@ -1,19 +1,10 @@
1
1
  import { Application } from '../declarations';
2
- import { type IGeminiClient } from './gemini-client-factory';
3
- export interface ProcessStats {
4
- processed: number;
5
- recordsExtracted: number;
6
- duplicateCheckJobsCreated: number;
7
- recordsCreated: number;
8
- recordsUpdated: number;
9
- duplicatesChecked: number;
10
- errors: number;
2
+ import { GenericBatchProcessStats } from './shared/index';
3
+ export interface ProcessStats extends GenericBatchProcessStats {
4
+ recordsExtracted?: number;
5
+ recordsCreated?: number;
11
6
  }
12
7
  /**
13
- * Download and parse results from a completed batch job
14
- */
15
- export declare function downloadAndParseResults(client: IGeminiClient, jobName: string, isFileResult: boolean, resultFileName?: string): Promise<any[]>;
16
- /**
17
- * Process all pending batch jobs
8
+ * Process all pending crawl batch jobs
18
9
  */
19
10
  export declare function processPendingBatches(app: Application): Promise<ProcessStats>;
@@ -23,17 +23,48 @@ export type BatchCrawlerState = {
23
23
  runningJobs?: number;
24
24
  succeededJobs?: number;
25
25
  };
26
+ /**
27
+ * Get current crawler batch state
28
+ */
26
29
  export declare function getBatchCrawlerState(): BatchCrawlerState;
30
+ /**
31
+ * Mark submit phase as started
32
+ */
27
33
  export declare function startSubmit(): void;
34
+ /**
35
+ * Mark submit phase as completed
36
+ */
28
37
  export declare function finishSubmit(result: BatchCrawlerStats, durationMs: number): void;
38
+ /**
39
+ * Mark submit phase as failed
40
+ */
29
41
  export declare function failSubmit(error: unknown, durationMs: number): void;
42
+ /**
43
+ * Mark process phase as started
44
+ */
30
45
  export declare function startProcess(): void;
46
+ /**
47
+ * Mark process phase as completed
48
+ */
31
49
  export declare function finishProcess(result: BatchCrawlerStats, durationMs: number): void;
50
+ /**
51
+ * Mark process phase as failed
52
+ */
32
53
  export declare function failProcess(error: unknown, durationMs: number): void;
54
+ /**
55
+ * Update job counts from database
56
+ */
33
57
  export declare function updateJobCounts(pending: number, running: number, succeeded: number): void;
58
+ /**
59
+ * Mark cleanup operation with current timestamp
60
+ */
34
61
  export declare function updateCleanupTime(): void;
35
62
  /**
36
- * Reset state for testing purposes
37
- * WARNING: Only use in tests!
63
+ * Reset state for testing
38
64
  */
39
65
  export declare function resetStateForTesting(): void;
66
+ /**
67
+ * Reconstruct state from database
68
+ * Used on service startup
69
+ */
70
+ export declare function reconstructFromDatabase(batchJobsService: any): Promise<void>;
@@ -1,17 +1,7 @@
1
1
  import { Application } from '../declarations';
2
- export interface SubmitStats {
3
- submitted: number;
4
- jobNames: string[];
2
+ import { GenericBatchSubmitStats } from './shared/index';
3
+ export interface SubmitStats extends GenericBatchSubmitStats {
5
4
  }
6
- /**
7
- * Build the crawl prompt for batch request
8
- * Reuses the same prompt logic as the synchronous crawler
9
- */
10
- export declare function buildCrawlPrompt(schema: any): string;
11
- /**
12
- * Create a batch request object
13
- */
14
- export declare function createBatchRequest(prompt: string, config: any): any;
15
5
  /**
16
6
  * Submit a new crawl batch job
17
7
  */
@@ -1,7 +1,9 @@
1
1
  import { Application } from '../declarations';
2
2
  import { SubmitStats } from './crawler-batch-submit';
3
3
  import { ProcessStats } from './crawler-batch-process';
4
- import { getBatchCrawlerState, BatchCrawlerState } from './crawler-batch-state';
4
+ import { getBatchCrawlerState, BatchCrawlerState, reconstructFromDatabase } from './crawler-batch-state';
5
+ import { CleanupStats as SharedCleanupStats } from './shared/index';
6
+ export type CleanupStats = SharedCleanupStats;
5
7
  export interface CycleStats {
6
8
  submitted: number;
7
9
  processed: number;
@@ -12,9 +14,6 @@ export interface CycleStats {
12
14
  cleanedUp: number;
13
15
  errors: number;
14
16
  }
15
- export interface CleanupStats {
16
- cleanedUp: number;
17
- }
18
17
  export type { SubmitStats } from './crawler-batch-submit';
19
18
  export type { ProcessStats } from './crawler-batch-process';
20
19
  export interface BatchStatus {
@@ -38,7 +37,7 @@ export declare function submitPhase(app: Application): Promise<SubmitStats>;
38
37
  */
39
38
  export declare function processPhase(app: Application): Promise<ProcessStats>;
40
39
  /**
41
- * Cleanup phase - remove old completed jobs
40
+ * Cleanup phase - remove old completed jobs based on retention policy
42
41
  */
43
42
  export declare function cleanupPhase(app: Application): Promise<CleanupStats>;
44
43
  /**
@@ -49,4 +48,4 @@ export declare function getBatchStatus(app: Application): Promise<BatchStatus[]>
49
48
  * Run full batch crawl cycle (submit → process → cleanup)
50
49
  */
51
50
  export declare function runBatchCrawlCycle(app: Application): Promise<CycleStats>;
52
- export { getBatchCrawlerState, BatchCrawlerState };
51
+ export { getBatchCrawlerState, BatchCrawlerState, reconstructFromDatabase };
@@ -0,0 +1,6 @@
1
+ import { Application } from '../declarations';
2
+ /**
3
+ * Build the crawl prompt for batch request
4
+ * Reuses the same prompt logic as the synchronous crawler
5
+ */
6
+ export declare function buildCrawlPrompt(app: Application): Promise<string>;
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Find records needing duplicate check and their potential duplicate candidates
3
+ */
4
+ import { Application } from '../../declarations';
5
+ /**
6
+ * Find records that need duplicate checking
7
+ * @param app - Feathers application
8
+ * @param limit - Maximum number of records to return (default: 50)
9
+ * @returns Array of records with duplicate_check_status = 'pending'
10
+ */
11
+ export declare function findRecordsNeedingDuplicateCheck(app: Application, limit?: number): Promise<any[]>;
12
+ /**
13
+ * Calculate Jaccard similarity between two arrays
14
+ * @param arr1 - First array
15
+ * @param arr2 - Second array
16
+ * @returns Similarity score between 0.0 and 1.0
17
+ */
18
+ export declare function calculateJaccardSimilarity(arr1: string[], arr2: string[]): number;
19
+ /**
20
+ * Find potential duplicate candidates for a given record
21
+ * Uses two-phase filtering: database filter (date + place) + in-memory similarity (tags + people)
22
+ *
23
+ * @param app - Feathers application
24
+ * @param record - The record to find duplicates for
25
+ * @param maxCandidates - Maximum number of candidates to return (default: 20)
26
+ * @param similarityThreshold - Minimum Jaccard similarity threshold (default: 0.5)
27
+ * @returns Array of candidate records that might be duplicates
28
+ */
29
+ export declare function findDuplicateCandidates(app: Application, record: any, maxCandidates?: number, similarityThreshold?: number): Promise<any[]>;
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Duplicate-check module exports
3
+ */
4
+ export { findRecordsNeedingDuplicateCheck, findDuplicateCandidates, calculateJaccardSimilarity } from './find-candidates';
5
+ export { buildDuplicateCheckPrompt } from './prompt';
6
+ export { mergeRecordsFromGeminiResponse, type MergeResult } from './merge-records';
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Log failed MongoDB backup to file for recovery
3
+ */
4
+ /**
5
+ * Log a failed MongoDB backup to file
6
+ * Writes in JSONL format (one JSON object per line) for easy recovery
7
+ *
8
+ * @param record - The record that failed to backup
9
+ * @param error - The error that occurred
10
+ * @param metadata - Additional metadata about the deletion
11
+ */
12
+ export declare function logFailedMongoBackup(record: any, error: Error, metadata: {
13
+ merged_into_id?: number;
14
+ deletion_reason?: string;
15
+ deleted_at: string;
16
+ deleted_by?: string;
17
+ }): void;
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Merge duplicate records and handle deletion with MongoDB backup
3
+ */
4
+ import { Application } from '../../declarations';
5
+ export interface MergeResult {
6
+ mergedRecordId: number;
7
+ deletedRecordIds: number[];
8
+ confidence: number;
9
+ }
10
+ /**
11
+ * Merge duplicate records into a single record
12
+ * Creates a new merged record and deletes the duplicates (with MongoDB backup via hook)
13
+ *
14
+ * @param app - Feathers application
15
+ * @param duplicateIds - Array of record IDs to merge
16
+ * @param mergedRecordData - Data for the merged record
17
+ * @param confidence - AI confidence score
18
+ * @returns MergeResult with merged record ID and deleted IDs
19
+ */
20
+ export declare function mergeRecordsFromGeminiResponse(app: Application, duplicateIds: number[], mergedRecordData: any, confidence: number): Promise<MergeResult>;
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Build prompts for Gemini API to detect and merge duplicate records
3
+ */
4
+ /**
5
+ * Build a duplicate-check prompt for Gemini API
6
+ * Asks the AI to identify which candidates are duplicates and provide a merged record
7
+ *
8
+ * @param newRecord - The record being checked for duplicates
9
+ * @param candidates - Array of potential duplicate records
10
+ * @returns Prompt string for Gemini API
11
+ */
12
+ export declare function buildDuplicateCheckPrompt(newRecord: any, candidates: any[]): string;
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Duplicate-check batch result processor
3
+ * Implements BatchJobResultHandler to process completed batch jobs from Gemini API
4
+ * Handles merging duplicates, managing status, and fallback for failed cases
5
+ */
6
+ import { Application } from '../declarations';
7
+ import { GenericBatchProcessStats } from './shared';
8
+ export interface ValidatedDuplicateCheckResult {
9
+ duplicateIds: number[];
10
+ mergedRecord: any | null;
11
+ confidence?: number;
12
+ }
13
+ export declare function validateDuplicateCheckResponse(recordId: number, parsed: any): ValidatedDuplicateCheckResult;
14
+ export interface ProcessStats extends GenericBatchProcessStats {
15
+ merged?: number;
16
+ manualReviewFlagged?: number;
17
+ skipped?: number;
18
+ checked?: number;
19
+ }
20
+ /**
21
+ * Process pending duplicate-check batch jobs
22
+ * Polls for completed jobs and processes their results
23
+ *
24
+ * @param app - Feathers application
25
+ * @returns ProcessStats with processing results
26
+ */
27
+ export declare function processDuplicateCheckBatches(app: Application): Promise<ProcessStats>;
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Duplicate-check batch state management
3
+ * Optional: Tracks execution state for monitoring and debugging
4
+ * Can be extended to support checkpoint-based recovery
5
+ */
6
+ export interface BatchExecutionState {
7
+ phase: 'idle' | 'submitting' | 'processing' | 'error';
8
+ lastSubmitAt?: Date;
9
+ lastProcessAt?: Date;
10
+ lastSuccessAt?: Date;
11
+ lastErrorAt?: Date;
12
+ lastErrorMessage?: string;
13
+ pendingJobCount?: number;
14
+ completedJobCount?: number;
15
+ }
16
+ /**
17
+ * In-memory state tracker for batch execution
18
+ * For production use, consider storing in database or cache (Redis)
19
+ */
20
+ declare class DuplicateCheckBatchStateManager {
21
+ private state;
22
+ setState(phase: BatchExecutionState['phase']): void;
23
+ recordSubmitStart(): void;
24
+ recordSubmitComplete(): void;
25
+ recordProcessStart(): void;
26
+ recordProcessComplete(): void;
27
+ recordError(error: Error): void;
28
+ updatePendingJobCount(count: number): void;
29
+ updateCompletedJobCount(count: number): void;
30
+ getState(): BatchExecutionState;
31
+ isIdle(): boolean;
32
+ isRunning(): boolean;
33
+ hasError(): boolean;
34
+ getLastError(): string | undefined;
35
+ resetError(): void;
36
+ }
37
+ export declare const duplicateCheckBatchState: DuplicateCheckBatchStateManager;
38
+ /**
39
+ * Get current batch execution state for monitoring
40
+ * Can be exposed via API endpoint for dashboards
41
+ *
42
+ * @returns Current execution state
43
+ */
44
+ export declare function getDuplicateCheckBatchState(): BatchExecutionState;
45
+ /**
46
+ * Check if batch can run (no other instance running)
47
+ * Use for preventing concurrent executions
48
+ *
49
+ * @returns true if batch can run
50
+ */
51
+ export declare function canRunDuplicateCheckBatch(): boolean;
52
+ export {};
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Duplicate-check batch submission handler
3
+ * Implements BatchRequestFactory to submit duplicate-check batch jobs to Gemini API
4
+ * Reuses shared submitBatchJob utility for API communication
5
+ */
6
+ import { Application } from '../declarations';
7
+ import { GenericBatchSubmitStats, BatchJobMetadata } from './shared';
8
+ export interface SubmitStats extends GenericBatchSubmitStats {
9
+ recordsWithoutCandidates?: number;
10
+ }
11
+ export interface DuplicateCheckBatchJobMetadata extends BatchJobMetadata {
12
+ candidatesArray: {
13
+ subjectId: string;
14
+ candidates: string[];
15
+ }[];
16
+ recordIds?: number[];
17
+ }
18
+ /**
19
+ * Submit a new duplicate-check batch job
20
+ * Finds records needing checking, identifies candidates, and submits to Gemini batch API
21
+ * Records without candidates are marked as checked immediately (no API call needed)
22
+ *
23
+ * @param app - Feathers application
24
+ * @returns SubmitStats with submission results
25
+ */
26
+ export declare function submitDuplicateCheckBatch(app: Application): Promise<SubmitStats>;
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Duplicate-check batch orchestrator
3
+ * Main entry point for managing the duplicate-check batch job lifecycle
4
+ * Coordinates submission and processing phases with scheduling integration
5
+ */
6
+ import { Application } from '../declarations';
7
+ export interface DuplicateCheckBatchStats {
8
+ submitted?: number;
9
+ processed?: number;
10
+ succeeded?: number;
11
+ failed?: number;
12
+ merged?: number;
13
+ recordsWithoutCandidates?: number;
14
+ }
15
+ /**
16
+ * Execute full duplicate-check batch cycle
17
+ * Runs both submit and process phases in sequence
18
+ * Designed to be called by scheduler or manually via endpoint
19
+ *
20
+ * @param app - Feathers application
21
+ * @returns Statistics from both phases
22
+ */
23
+ export declare function executeDuplicateCheckBatch(app: Application): Promise<DuplicateCheckBatchStats>;
24
+ /**
25
+ * Process only (skip submission)
26
+ * Use this to catch up on processing completed jobs without submitting new ones
27
+ * Useful for recovery or manual processing runs
28
+ *
29
+ * @param app - Feathers application
30
+ * @returns Process statistics
31
+ */
32
+ export declare function processDuplicateCheckBatchOnly(app: Application): Promise<import("./duplicate-check-batch-process").ProcessStats>;
33
+ /**
34
+ * Submit only (skip processing)
35
+ * Use this to only submit new batch jobs without processing results
36
+ * Useful if you want to control submission and processing timing separately
37
+ *
38
+ * @param app - Feathers application
39
+ * @returns Submit statistics
40
+ */
41
+ export declare function submitDuplicateCheckBatchOnly(app: Application): Promise<import("./duplicate-check-batch-submit").SubmitStats>;
@@ -0,0 +1,90 @@
1
+ import { Application } from '../../declarations';
2
+ /**
3
+ * Generic configuration for batch job creation
4
+ */
5
+ export interface GenericBatchRequestConfig {
6
+ responseMimeType?: string;
7
+ tools?: any[];
8
+ temperature?: number;
9
+ topP?: number;
10
+ topK?: number;
11
+ maxOutputTokens?: number;
12
+ [key: string]: any;
13
+ }
14
+ /**
15
+ * Statistics for batch submission
16
+ */
17
+ export interface GenericBatchSubmitStats {
18
+ submitted: number;
19
+ jobNames: string[];
20
+ recordsQueued?: number;
21
+ [key: string]: any;
22
+ }
23
+ /**
24
+ * Statistics for batch processing
25
+ */
26
+ export interface GenericBatchProcessStats {
27
+ processed: number;
28
+ recordsExtracted?: number;
29
+ duplicateCheckJobsCreated?: number;
30
+ recordsCreated?: number;
31
+ recordsUpdated?: number;
32
+ duplicatesChecked?: number;
33
+ errors?: number;
34
+ [key: string]: any;
35
+ }
36
+ /**
37
+ * Metadata for batch jobs
38
+ */
39
+ export interface BatchJobMetadata {
40
+ displayName: string;
41
+ submittedAt?: string;
42
+ [key: string]: any;
43
+ }
44
+ /**
45
+ * Generic batch job record in database
46
+ */
47
+ export interface GenericBatchJob {
48
+ id?: number;
49
+ job_name: string;
50
+ job_type: string;
51
+ status: string;
52
+ model?: string;
53
+ request_count?: number;
54
+ metadata?: BatchJobMetadata;
55
+ schema_version?: number;
56
+ created_at?: string;
57
+ updated_at?: string;
58
+ completed_at?: string;
59
+ error_message?: string;
60
+ }
61
+ /**
62
+ * Clean JSON response by removing markdown code blocks
63
+ * Handles patterns like ```json ... ```
64
+ */
65
+ export declare function cleanJSONResponse(jsonResponse: string): string;
66
+ /**
67
+ * Extract response text from Gemini API response object
68
+ */
69
+ export declare function extractResponseText(response: any): string;
70
+ /**
71
+ * No-op function - we now keep the JOB_STATE_ prefix to match Gemini API convention
72
+ * Status values are stored with prefix: JOB_STATE_SUCCEEDED, JOB_STATE_RUNNING, etc.
73
+ */
74
+ export declare function stripJobStatePrefix(status: string): string;
75
+ /**
76
+ * Parse JSON response text with error handling
77
+ */
78
+ export declare function parseResponseJSON(text: string): any;
79
+ /**
80
+ * Get cleanup configuration with defaults
81
+ */
82
+ export declare function getCleanupConfig(app: Application): BatchCleanupConfig;
83
+ /**
84
+ * Cleanup configuration for batch jobs
85
+ */
86
+ export interface BatchCleanupConfig {
87
+ successful_retention_days: number;
88
+ failed_retention_days: number;
89
+ enable_auto_cleanup: boolean;
90
+ }