@yglin/tw-env-records 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/authentication.d.ts +8 -0
- package/lib/configuration.d.ts +34 -1
- package/lib/{jobs → google}/gemini-client-factory.d.ts +5 -8
- package/lib/hooks/after-askai-create.d.ts +2 -0
- package/lib/hooks/before-askai-create.d.ts +2 -0
- package/lib/internal/duplicate-check-batch-endpoints.d.ts +2 -0
- package/lib/jobs/audit-batch-process.d.ts +2 -8
- package/lib/jobs/audit-batch-state.d.ts +4 -0
- package/lib/jobs/audit-batch-submit.d.ts +2 -10
- package/lib/jobs/audit-batch.d.ts +3 -3
- package/lib/jobs/crawler-batch-process.d.ts +5 -14
- package/lib/jobs/crawler-batch-state.d.ts +33 -2
- package/lib/jobs/crawler-batch-submit.d.ts +2 -12
- package/lib/jobs/crawler-batch.d.ts +5 -6
- package/lib/jobs/crawler-prompt.d.ts +6 -0
- package/lib/jobs/duplicate-check/find-candidates.d.ts +29 -0
- package/lib/jobs/duplicate-check/index.d.ts +6 -0
- package/lib/jobs/duplicate-check/log-failed-backup.d.ts +17 -0
- package/lib/jobs/duplicate-check/merge-records.d.ts +20 -0
- package/lib/jobs/duplicate-check/prompt.d.ts +12 -0
- package/lib/jobs/duplicate-check-batch-process.d.ts +27 -0
- package/lib/jobs/duplicate-check-batch-state.d.ts +52 -0
- package/lib/jobs/duplicate-check-batch-submit.d.ts +26 -0
- package/lib/jobs/duplicate-check-batch.d.ts +41 -0
- package/lib/jobs/shared/batch-common.d.ts +90 -0
- package/lib/jobs/shared/batch-orchestration.d.ts +79 -0
- package/lib/jobs/shared/batch-processing.d.ts +37 -0
- package/lib/jobs/shared/batch-request.d.ts +54 -0
- package/lib/jobs/shared/batch-state.d.ts +72 -0
- package/lib/jobs/shared/index.d.ts +9 -0
- package/lib/logger.d.ts +2 -0
- package/lib/services/batch-jobs/batch-jobs.schema.d.ts +218 -136
- package/lib/services/meta/ask-ai-statistics.d.ts +5 -0
- package/lib/services/record/record.class.d.ts +49 -0
- package/lib/services/record/record.schema.d.ts +134 -4
- package/lib/services/users/users.class.d.ts +11 -0
- package/lib/services/users/users.d.ts +11 -0
- package/lib/services/users/users.schema.d.ts +356 -0
- package/lib/services/users/users.shared.d.ts +13 -0
- package/lib/services/users/users.shared.js +13 -0
- package/package.json +5 -1
- package/lib/maids/collate-place-names.d.ts +0 -1
- package/lib/maids/fix-place-names.d.ts +0 -7
- package/lib/maids/full-database-analyze.d.ts +0 -1
- package/lib/maids/geocode.d.ts +0 -4
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { AuthenticationService } from '@feathersjs/authentication';
|
|
2
|
+
import type { Application } from './declarations';
|
|
3
|
+
declare module './declarations' {
|
|
4
|
+
interface ServiceTypes {
|
|
5
|
+
authentication: AuthenticationService;
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
export declare const authentication: (app: Application) => void;
|
package/lib/configuration.d.ts
CHANGED
|
@@ -114,10 +114,13 @@ export declare const configurationSchema: import("@sinclair/typebox").TIntersect
|
|
|
114
114
|
askAi: import("@sinclair/typebox").TObject<{
|
|
115
115
|
ai_model: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
116
116
|
ai_api_key: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
117
|
-
recordsLimit: import("@sinclair/typebox").TNumber
|
|
117
|
+
recordsLimit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
118
|
+
maxRequestsPerDay: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
119
|
+
keepDailyStatsDays: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
118
120
|
}>;
|
|
119
121
|
}>;
|
|
120
122
|
jobs: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
123
|
+
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
121
124
|
crawler: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
122
125
|
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
123
126
|
schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
@@ -184,6 +187,36 @@ export declare const configurationSchema: import("@sinclair/typebox").TIntersect
|
|
|
184
187
|
}>>;
|
|
185
188
|
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
186
189
|
}>>;
|
|
190
|
+
duplicateCheckBatch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
191
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
192
|
+
timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
193
|
+
max_records_per_window: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
194
|
+
max_candidates_per_record: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
195
|
+
similarity_threshold: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
196
|
+
schedules: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
197
|
+
submit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
198
|
+
process: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
199
|
+
}>>;
|
|
200
|
+
batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
201
|
+
use_file_input: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
202
|
+
requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
203
|
+
display_name_prefix: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
204
|
+
}>>;
|
|
205
|
+
processing: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
206
|
+
max_retry_attempts: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
207
|
+
retry_interval_ms: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
208
|
+
confidence_threshold: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
209
|
+
}>>;
|
|
210
|
+
merge: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
211
|
+
story_min_length: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
212
|
+
}>>;
|
|
213
|
+
backup_failure_log: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
214
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
215
|
+
path: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
216
|
+
retention_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
217
|
+
}>>;
|
|
218
|
+
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
219
|
+
}>>;
|
|
187
220
|
}>>;
|
|
188
221
|
}>]>;
|
|
189
222
|
export type ApplicationConfiguration = Static<typeof configurationSchema>;
|
|
@@ -14,13 +14,10 @@ export interface IGeminiClient {
|
|
|
14
14
|
}
|
|
15
15
|
/**
|
|
16
16
|
* Default configuration applied to all Gemini requests
|
|
17
|
-
*
|
|
17
|
+
* NOTE: Google Search grounding is NOT included because the Batch API
|
|
18
|
+
* does not return grounding metadata, making it ineffective for batch jobs.
|
|
18
19
|
*/
|
|
19
|
-
export declare const GEMINI_DEFAULT_CONFIG: {
|
|
20
|
-
tools: {
|
|
21
|
-
googleSearch: {};
|
|
22
|
-
}[];
|
|
23
|
-
};
|
|
20
|
+
export declare const GEMINI_DEFAULT_CONFIG: {};
|
|
24
21
|
/**
|
|
25
22
|
* Default Gemini model
|
|
26
23
|
*/
|
|
@@ -44,8 +41,8 @@ declare class GeminiClientFactory {
|
|
|
44
41
|
*/
|
|
45
42
|
getModel(): string;
|
|
46
43
|
/**
|
|
47
|
-
* Get default configuration
|
|
48
|
-
*
|
|
44
|
+
* Get default configuration for Gemini API requests
|
|
45
|
+
* NOTE: Does not include Google Search grounding since Batch API doesn't support it
|
|
49
46
|
*/
|
|
50
47
|
getDefaultConfig(): typeof GEMINI_DEFAULT_CONFIG;
|
|
51
48
|
/**
|
|
@@ -1,15 +1,9 @@
|
|
|
1
1
|
import { Application } from '../declarations';
|
|
2
|
-
import {
|
|
3
|
-
export interface ProcessStats {
|
|
4
|
-
processed: number;
|
|
2
|
+
import { GenericBatchProcessStats } from './shared/index';
|
|
3
|
+
export interface ProcessStats extends GenericBatchProcessStats {
|
|
5
4
|
recordsUpdated: number;
|
|
6
5
|
recordsSkipped: number;
|
|
7
|
-
errors: number;
|
|
8
6
|
}
|
|
9
|
-
/**
|
|
10
|
-
* Download and parse results from a completed batch job
|
|
11
|
-
*/
|
|
12
|
-
export declare function downloadAndParseResults(client: IGeminiClient, jobName: string, isFileResult: boolean, resultFileName?: string): Promise<any[]>;
|
|
13
7
|
/**
|
|
14
8
|
* Process all pending audit batch jobs
|
|
15
9
|
*/
|
|
@@ -36,3 +36,7 @@ export declare function updateCleanupTime(): void;
|
|
|
36
36
|
* WARNING: Only use in tests!
|
|
37
37
|
*/
|
|
38
38
|
export declare function resetStateForTesting(): void;
|
|
39
|
+
/**
|
|
40
|
+
* Reconstruct state from database on service startup
|
|
41
|
+
*/
|
|
42
|
+
export declare function reconstructFromDatabase(batchJobsService: any): Promise<void>;
|
|
@@ -1,20 +1,12 @@
|
|
|
1
1
|
import { Application } from '../declarations';
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
import { GenericBatchSubmitStats } from './shared/index';
|
|
3
|
+
export interface SubmitStats extends GenericBatchSubmitStats {
|
|
4
4
|
recordsQueued: number;
|
|
5
|
-
jobNames: string[];
|
|
6
5
|
}
|
|
7
6
|
/**
|
|
8
7
|
* Build the audit prompt for a single record
|
|
9
8
|
*/
|
|
10
9
|
export declare function buildAuditPrompt(record: any): string;
|
|
11
|
-
/**
|
|
12
|
-
* Create a batch request object for audit
|
|
13
|
-
*/
|
|
14
|
-
/**
|
|
15
|
-
* Create a batch request object for audit
|
|
16
|
-
*/
|
|
17
|
-
export declare function createBatchRequest(prompt: string, recordId: number): any;
|
|
18
10
|
/**
|
|
19
11
|
* Submit a new audit batch job
|
|
20
12
|
*/
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import { Application } from '../declarations';
|
|
2
2
|
import { type SubmitStats } from './audit-batch-submit';
|
|
3
3
|
import { type ProcessStats } from './audit-batch-process';
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
}
|
|
4
|
+
import { getBatchAuditState, reconstructFromDatabase } from './audit-batch-state';
|
|
5
|
+
import { type CleanupStats } from './shared/index';
|
|
7
6
|
export interface CycleStats {
|
|
8
7
|
submit: SubmitStats;
|
|
9
8
|
process: ProcessStats;
|
|
@@ -34,3 +33,4 @@ export declare function getAuditBatchStatus(app: Application): Promise<{
|
|
|
34
33
|
* Run a complete audit batch cycle (submit + process + cleanup)
|
|
35
34
|
*/
|
|
36
35
|
export declare function runAuditBatchCycle(app: Application): Promise<CycleStats>;
|
|
36
|
+
export { getBatchAuditState, reconstructFromDatabase };
|
|
@@ -1,19 +1,10 @@
|
|
|
1
1
|
import { Application } from '../declarations';
|
|
2
|
-
import {
|
|
3
|
-
export interface ProcessStats {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
duplicateCheckJobsCreated: number;
|
|
7
|
-
recordsCreated: number;
|
|
8
|
-
recordsUpdated: number;
|
|
9
|
-
duplicatesChecked: number;
|
|
10
|
-
errors: number;
|
|
2
|
+
import { GenericBatchProcessStats } from './shared/index';
|
|
3
|
+
export interface ProcessStats extends GenericBatchProcessStats {
|
|
4
|
+
recordsExtracted?: number;
|
|
5
|
+
recordsCreated?: number;
|
|
11
6
|
}
|
|
12
7
|
/**
|
|
13
|
-
*
|
|
14
|
-
*/
|
|
15
|
-
export declare function downloadAndParseResults(client: IGeminiClient, jobName: string, isFileResult: boolean, resultFileName?: string): Promise<any[]>;
|
|
16
|
-
/**
|
|
17
|
-
* Process all pending batch jobs
|
|
8
|
+
* Process all pending crawl batch jobs
|
|
18
9
|
*/
|
|
19
10
|
export declare function processPendingBatches(app: Application): Promise<ProcessStats>;
|
|
@@ -23,17 +23,48 @@ export type BatchCrawlerState = {
|
|
|
23
23
|
runningJobs?: number;
|
|
24
24
|
succeededJobs?: number;
|
|
25
25
|
};
|
|
26
|
+
/**
|
|
27
|
+
* Get current crawler batch state
|
|
28
|
+
*/
|
|
26
29
|
export declare function getBatchCrawlerState(): BatchCrawlerState;
|
|
30
|
+
/**
|
|
31
|
+
* Mark submit phase as started
|
|
32
|
+
*/
|
|
27
33
|
export declare function startSubmit(): void;
|
|
34
|
+
/**
|
|
35
|
+
* Mark submit phase as completed
|
|
36
|
+
*/
|
|
28
37
|
export declare function finishSubmit(result: BatchCrawlerStats, durationMs: number): void;
|
|
38
|
+
/**
|
|
39
|
+
* Mark submit phase as failed
|
|
40
|
+
*/
|
|
29
41
|
export declare function failSubmit(error: unknown, durationMs: number): void;
|
|
42
|
+
/**
|
|
43
|
+
* Mark process phase as started
|
|
44
|
+
*/
|
|
30
45
|
export declare function startProcess(): void;
|
|
46
|
+
/**
|
|
47
|
+
* Mark process phase as completed
|
|
48
|
+
*/
|
|
31
49
|
export declare function finishProcess(result: BatchCrawlerStats, durationMs: number): void;
|
|
50
|
+
/**
|
|
51
|
+
* Mark process phase as failed
|
|
52
|
+
*/
|
|
32
53
|
export declare function failProcess(error: unknown, durationMs: number): void;
|
|
54
|
+
/**
|
|
55
|
+
* Update job counts from database
|
|
56
|
+
*/
|
|
33
57
|
export declare function updateJobCounts(pending: number, running: number, succeeded: number): void;
|
|
58
|
+
/**
|
|
59
|
+
* Mark cleanup operation with current timestamp
|
|
60
|
+
*/
|
|
34
61
|
export declare function updateCleanupTime(): void;
|
|
35
62
|
/**
|
|
36
|
-
* Reset state for testing
|
|
37
|
-
* WARNING: Only use in tests!
|
|
63
|
+
* Reset state for testing
|
|
38
64
|
*/
|
|
39
65
|
export declare function resetStateForTesting(): void;
|
|
66
|
+
/**
|
|
67
|
+
* Reconstruct state from database
|
|
68
|
+
* Used on service startup
|
|
69
|
+
*/
|
|
70
|
+
export declare function reconstructFromDatabase(batchJobsService: any): Promise<void>;
|
|
@@ -1,17 +1,7 @@
|
|
|
1
1
|
import { Application } from '../declarations';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
jobNames: string[];
|
|
2
|
+
import { GenericBatchSubmitStats } from './shared/index';
|
|
3
|
+
export interface SubmitStats extends GenericBatchSubmitStats {
|
|
5
4
|
}
|
|
6
|
-
/**
|
|
7
|
-
* Build the crawl prompt for batch request
|
|
8
|
-
* Reuses the same prompt logic as the synchronous crawler
|
|
9
|
-
*/
|
|
10
|
-
export declare function buildCrawlPrompt(schema: any): string;
|
|
11
|
-
/**
|
|
12
|
-
* Create a batch request object
|
|
13
|
-
*/
|
|
14
|
-
export declare function createBatchRequest(prompt: string, config: any): any;
|
|
15
5
|
/**
|
|
16
6
|
* Submit a new crawl batch job
|
|
17
7
|
*/
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { Application } from '../declarations';
|
|
2
2
|
import { SubmitStats } from './crawler-batch-submit';
|
|
3
3
|
import { ProcessStats } from './crawler-batch-process';
|
|
4
|
-
import { getBatchCrawlerState, BatchCrawlerState } from './crawler-batch-state';
|
|
4
|
+
import { getBatchCrawlerState, BatchCrawlerState, reconstructFromDatabase } from './crawler-batch-state';
|
|
5
|
+
import { CleanupStats as SharedCleanupStats } from './shared/index';
|
|
6
|
+
export type CleanupStats = SharedCleanupStats;
|
|
5
7
|
export interface CycleStats {
|
|
6
8
|
submitted: number;
|
|
7
9
|
processed: number;
|
|
@@ -12,9 +14,6 @@ export interface CycleStats {
|
|
|
12
14
|
cleanedUp: number;
|
|
13
15
|
errors: number;
|
|
14
16
|
}
|
|
15
|
-
export interface CleanupStats {
|
|
16
|
-
cleanedUp: number;
|
|
17
|
-
}
|
|
18
17
|
export type { SubmitStats } from './crawler-batch-submit';
|
|
19
18
|
export type { ProcessStats } from './crawler-batch-process';
|
|
20
19
|
export interface BatchStatus {
|
|
@@ -38,7 +37,7 @@ export declare function submitPhase(app: Application): Promise<SubmitStats>;
|
|
|
38
37
|
*/
|
|
39
38
|
export declare function processPhase(app: Application): Promise<ProcessStats>;
|
|
40
39
|
/**
|
|
41
|
-
* Cleanup phase - remove old completed jobs
|
|
40
|
+
* Cleanup phase - remove old completed jobs based on retention policy
|
|
42
41
|
*/
|
|
43
42
|
export declare function cleanupPhase(app: Application): Promise<CleanupStats>;
|
|
44
43
|
/**
|
|
@@ -49,4 +48,4 @@ export declare function getBatchStatus(app: Application): Promise<BatchStatus[]>
|
|
|
49
48
|
* Run full batch crawl cycle (submit → process → cleanup)
|
|
50
49
|
*/
|
|
51
50
|
export declare function runBatchCrawlCycle(app: Application): Promise<CycleStats>;
|
|
52
|
-
export { getBatchCrawlerState, BatchCrawlerState };
|
|
51
|
+
export { getBatchCrawlerState, BatchCrawlerState, reconstructFromDatabase };
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Find records needing duplicate check and their potential duplicate candidates
|
|
3
|
+
*/
|
|
4
|
+
import { Application } from '../../declarations';
|
|
5
|
+
/**
|
|
6
|
+
* Find records that need duplicate checking
|
|
7
|
+
* @param app - Feathers application
|
|
8
|
+
* @param limit - Maximum number of records to return (default: 50)
|
|
9
|
+
* @returns Array of records with duplicate_check_status = 'pending'
|
|
10
|
+
*/
|
|
11
|
+
export declare function findRecordsNeedingDuplicateCheck(app: Application, limit?: number): Promise<any[]>;
|
|
12
|
+
/**
|
|
13
|
+
* Calculate Jaccard similarity between two arrays
|
|
14
|
+
* @param arr1 - First array
|
|
15
|
+
* @param arr2 - Second array
|
|
16
|
+
* @returns Similarity score between 0.0 and 1.0
|
|
17
|
+
*/
|
|
18
|
+
export declare function calculateJaccardSimilarity(arr1: string[], arr2: string[]): number;
|
|
19
|
+
/**
|
|
20
|
+
* Find potential duplicate candidates for a given record
|
|
21
|
+
* Uses two-phase filtering: database filter (date + place) + in-memory similarity (tags + people)
|
|
22
|
+
*
|
|
23
|
+
* @param app - Feathers application
|
|
24
|
+
* @param record - The record to find duplicates for
|
|
25
|
+
* @param maxCandidates - Maximum number of candidates to return (default: 20)
|
|
26
|
+
* @param similarityThreshold - Minimum Jaccard similarity threshold (default: 0.5)
|
|
27
|
+
* @returns Array of candidate records that might be duplicates
|
|
28
|
+
*/
|
|
29
|
+
export declare function findDuplicateCandidates(app: Application, record: any, maxCandidates?: number, similarityThreshold?: number): Promise<any[]>;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Duplicate-check module exports
|
|
3
|
+
*/
|
|
4
|
+
export { findRecordsNeedingDuplicateCheck, findDuplicateCandidates, calculateJaccardSimilarity } from './find-candidates';
|
|
5
|
+
export { buildDuplicateCheckPrompt } from './prompt';
|
|
6
|
+
export { mergeRecordsFromGeminiResponse, type MergeResult } from './merge-records';
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Log failed MongoDB backup to file for recovery
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Log a failed MongoDB backup to file
|
|
6
|
+
* Writes in JSONL format (one JSON object per line) for easy recovery
|
|
7
|
+
*
|
|
8
|
+
* @param record - The record that failed to backup
|
|
9
|
+
* @param error - The error that occurred
|
|
10
|
+
* @param metadata - Additional metadata about the deletion
|
|
11
|
+
*/
|
|
12
|
+
export declare function logFailedMongoBackup(record: any, error: Error, metadata: {
|
|
13
|
+
merged_into_id?: number;
|
|
14
|
+
deletion_reason?: string;
|
|
15
|
+
deleted_at: string;
|
|
16
|
+
deleted_by?: string;
|
|
17
|
+
}): void;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Merge duplicate records and handle deletion with MongoDB backup
|
|
3
|
+
*/
|
|
4
|
+
import { Application } from '../../declarations';
|
|
5
|
+
export interface MergeResult {
|
|
6
|
+
mergedRecordId: number;
|
|
7
|
+
deletedRecordIds: number[];
|
|
8
|
+
confidence: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Merge duplicate records into a single record
|
|
12
|
+
* Creates a new merged record and deletes the duplicates (with MongoDB backup via hook)
|
|
13
|
+
*
|
|
14
|
+
* @param app - Feathers application
|
|
15
|
+
* @param duplicateIds - Array of record IDs to merge
|
|
16
|
+
* @param mergedRecordData - Data for the merged record
|
|
17
|
+
* @param confidence - AI confidence score
|
|
18
|
+
* @returns MergeResult with merged record ID and deleted IDs
|
|
19
|
+
*/
|
|
20
|
+
export declare function mergeRecordsFromGeminiResponse(app: Application, duplicateIds: number[], mergedRecordData: any, confidence: number): Promise<MergeResult>;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build prompts for Gemini API to detect and merge duplicate records
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Build a duplicate-check prompt for Gemini API
|
|
6
|
+
* Asks the AI to identify which candidates are duplicates and provide a merged record
|
|
7
|
+
*
|
|
8
|
+
* @param newRecord - The record being checked for duplicates
|
|
9
|
+
* @param candidates - Array of potential duplicate records
|
|
10
|
+
* @returns Prompt string for Gemini API
|
|
11
|
+
*/
|
|
12
|
+
export declare function buildDuplicateCheckPrompt(newRecord: any, candidates: any[]): string;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Duplicate-check batch result processor
|
|
3
|
+
* Implements BatchJobResultHandler to process completed batch jobs from Gemini API
|
|
4
|
+
* Handles merging duplicates, managing status, and fallback for failed cases
|
|
5
|
+
*/
|
|
6
|
+
import { Application } from '../declarations';
|
|
7
|
+
import { GenericBatchProcessStats } from './shared';
|
|
8
|
+
export interface ValidatedDuplicateCheckResult {
|
|
9
|
+
duplicateIds: number[];
|
|
10
|
+
mergedRecord: any | null;
|
|
11
|
+
confidence?: number;
|
|
12
|
+
}
|
|
13
|
+
export declare function validateDuplicateCheckResponse(recordId: number, parsed: any): ValidatedDuplicateCheckResult;
|
|
14
|
+
export interface ProcessStats extends GenericBatchProcessStats {
|
|
15
|
+
merged?: number;
|
|
16
|
+
manualReviewFlagged?: number;
|
|
17
|
+
skipped?: number;
|
|
18
|
+
checked?: number;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Process pending duplicate-check batch jobs
|
|
22
|
+
* Polls for completed jobs and processes their results
|
|
23
|
+
*
|
|
24
|
+
* @param app - Feathers application
|
|
25
|
+
* @returns ProcessStats with processing results
|
|
26
|
+
*/
|
|
27
|
+
export declare function processDuplicateCheckBatches(app: Application): Promise<ProcessStats>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Duplicate-check batch state management
|
|
3
|
+
* Optional: Tracks execution state for monitoring and debugging
|
|
4
|
+
* Can be extended to support checkpoint-based recovery
|
|
5
|
+
*/
|
|
6
|
+
export interface BatchExecutionState {
|
|
7
|
+
phase: 'idle' | 'submitting' | 'processing' | 'error';
|
|
8
|
+
lastSubmitAt?: Date;
|
|
9
|
+
lastProcessAt?: Date;
|
|
10
|
+
lastSuccessAt?: Date;
|
|
11
|
+
lastErrorAt?: Date;
|
|
12
|
+
lastErrorMessage?: string;
|
|
13
|
+
pendingJobCount?: number;
|
|
14
|
+
completedJobCount?: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* In-memory state tracker for batch execution
|
|
18
|
+
* For production use, consider storing in database or cache (Redis)
|
|
19
|
+
*/
|
|
20
|
+
declare class DuplicateCheckBatchStateManager {
|
|
21
|
+
private state;
|
|
22
|
+
setState(phase: BatchExecutionState['phase']): void;
|
|
23
|
+
recordSubmitStart(): void;
|
|
24
|
+
recordSubmitComplete(): void;
|
|
25
|
+
recordProcessStart(): void;
|
|
26
|
+
recordProcessComplete(): void;
|
|
27
|
+
recordError(error: Error): void;
|
|
28
|
+
updatePendingJobCount(count: number): void;
|
|
29
|
+
updateCompletedJobCount(count: number): void;
|
|
30
|
+
getState(): BatchExecutionState;
|
|
31
|
+
isIdle(): boolean;
|
|
32
|
+
isRunning(): boolean;
|
|
33
|
+
hasError(): boolean;
|
|
34
|
+
getLastError(): string | undefined;
|
|
35
|
+
resetError(): void;
|
|
36
|
+
}
|
|
37
|
+
export declare const duplicateCheckBatchState: DuplicateCheckBatchStateManager;
|
|
38
|
+
/**
|
|
39
|
+
* Get current batch execution state for monitoring
|
|
40
|
+
* Can be exposed via API endpoint for dashboards
|
|
41
|
+
*
|
|
42
|
+
* @returns Current execution state
|
|
43
|
+
*/
|
|
44
|
+
export declare function getDuplicateCheckBatchState(): BatchExecutionState;
|
|
45
|
+
/**
|
|
46
|
+
* Check if batch can run (no other instance running)
|
|
47
|
+
* Use for preventing concurrent executions
|
|
48
|
+
*
|
|
49
|
+
* @returns true if batch can run
|
|
50
|
+
*/
|
|
51
|
+
export declare function canRunDuplicateCheckBatch(): boolean;
|
|
52
|
+
export {};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Duplicate-check batch submission handler
|
|
3
|
+
* Implements BatchRequestFactory to submit duplicate-check batch jobs to Gemini API
|
|
4
|
+
* Reuses shared submitBatchJob utility for API communication
|
|
5
|
+
*/
|
|
6
|
+
import { Application } from '../declarations';
|
|
7
|
+
import { GenericBatchSubmitStats, BatchJobMetadata } from './shared';
|
|
8
|
+
export interface SubmitStats extends GenericBatchSubmitStats {
|
|
9
|
+
recordsWithoutCandidates?: number;
|
|
10
|
+
}
|
|
11
|
+
export interface DuplicateCheckBatchJobMetadata extends BatchJobMetadata {
|
|
12
|
+
candidatesArray: {
|
|
13
|
+
subjectId: string;
|
|
14
|
+
candidates: string[];
|
|
15
|
+
}[];
|
|
16
|
+
recordIds?: number[];
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Submit a new duplicate-check batch job
|
|
20
|
+
* Finds records needing checking, identifies candidates, and submits to Gemini batch API
|
|
21
|
+
* Records without candidates are marked as checked immediately (no API call needed)
|
|
22
|
+
*
|
|
23
|
+
* @param app - Feathers application
|
|
24
|
+
* @returns SubmitStats with submission results
|
|
25
|
+
*/
|
|
26
|
+
export declare function submitDuplicateCheckBatch(app: Application): Promise<SubmitStats>;
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Duplicate-check batch orchestrator
|
|
3
|
+
* Main entry point for managing the duplicate-check batch job lifecycle
|
|
4
|
+
* Coordinates submission and processing phases with scheduling integration
|
|
5
|
+
*/
|
|
6
|
+
import { Application } from '../declarations';
|
|
7
|
+
export interface DuplicateCheckBatchStats {
|
|
8
|
+
submitted?: number;
|
|
9
|
+
processed?: number;
|
|
10
|
+
succeeded?: number;
|
|
11
|
+
failed?: number;
|
|
12
|
+
merged?: number;
|
|
13
|
+
recordsWithoutCandidates?: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Execute full duplicate-check batch cycle
|
|
17
|
+
* Runs both submit and process phases in sequence
|
|
18
|
+
* Designed to be called by scheduler or manually via endpoint
|
|
19
|
+
*
|
|
20
|
+
* @param app - Feathers application
|
|
21
|
+
* @returns Statistics from both phases
|
|
22
|
+
*/
|
|
23
|
+
export declare function executeDuplicateCheckBatch(app: Application): Promise<DuplicateCheckBatchStats>;
|
|
24
|
+
/**
|
|
25
|
+
* Process only (skip submission)
|
|
26
|
+
* Use this to catch up on processing completed jobs without submitting new ones
|
|
27
|
+
* Useful for recovery or manual processing runs
|
|
28
|
+
*
|
|
29
|
+
* @param app - Feathers application
|
|
30
|
+
* @returns Process statistics
|
|
31
|
+
*/
|
|
32
|
+
export declare function processDuplicateCheckBatchOnly(app: Application): Promise<import("./duplicate-check-batch-process").ProcessStats>;
|
|
33
|
+
/**
|
|
34
|
+
* Submit only (skip processing)
|
|
35
|
+
* Use this to only submit new batch jobs without processing results
|
|
36
|
+
* Useful if you want to control submission and processing timing separately
|
|
37
|
+
*
|
|
38
|
+
* @param app - Feathers application
|
|
39
|
+
* @returns Submit statistics
|
|
40
|
+
*/
|
|
41
|
+
export declare function submitDuplicateCheckBatchOnly(app: Application): Promise<import("./duplicate-check-batch-submit").SubmitStats>;
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import { Application } from '../../declarations';
|
|
2
|
+
/**
|
|
3
|
+
* Generic configuration for batch job creation
|
|
4
|
+
*/
|
|
5
|
+
export interface GenericBatchRequestConfig {
|
|
6
|
+
responseMimeType?: string;
|
|
7
|
+
tools?: any[];
|
|
8
|
+
temperature?: number;
|
|
9
|
+
topP?: number;
|
|
10
|
+
topK?: number;
|
|
11
|
+
maxOutputTokens?: number;
|
|
12
|
+
[key: string]: any;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Statistics for batch submission
|
|
16
|
+
*/
|
|
17
|
+
export interface GenericBatchSubmitStats {
|
|
18
|
+
submitted: number;
|
|
19
|
+
jobNames: string[];
|
|
20
|
+
recordsQueued?: number;
|
|
21
|
+
[key: string]: any;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Statistics for batch processing
|
|
25
|
+
*/
|
|
26
|
+
export interface GenericBatchProcessStats {
|
|
27
|
+
processed: number;
|
|
28
|
+
recordsExtracted?: number;
|
|
29
|
+
duplicateCheckJobsCreated?: number;
|
|
30
|
+
recordsCreated?: number;
|
|
31
|
+
recordsUpdated?: number;
|
|
32
|
+
duplicatesChecked?: number;
|
|
33
|
+
errors?: number;
|
|
34
|
+
[key: string]: any;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Metadata for batch jobs
|
|
38
|
+
*/
|
|
39
|
+
export interface BatchJobMetadata {
|
|
40
|
+
displayName: string;
|
|
41
|
+
submittedAt?: string;
|
|
42
|
+
[key: string]: any;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Generic batch job record in database
|
|
46
|
+
*/
|
|
47
|
+
export interface GenericBatchJob {
|
|
48
|
+
id?: number;
|
|
49
|
+
job_name: string;
|
|
50
|
+
job_type: string;
|
|
51
|
+
status: string;
|
|
52
|
+
model?: string;
|
|
53
|
+
request_count?: number;
|
|
54
|
+
metadata?: BatchJobMetadata;
|
|
55
|
+
schema_version?: number;
|
|
56
|
+
created_at?: string;
|
|
57
|
+
updated_at?: string;
|
|
58
|
+
completed_at?: string;
|
|
59
|
+
error_message?: string;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Clean JSON response by removing markdown code blocks
|
|
63
|
+
* Handles patterns like ```json ... ```
|
|
64
|
+
*/
|
|
65
|
+
export declare function cleanJSONResponse(jsonResponse: string): string;
|
|
66
|
+
/**
|
|
67
|
+
* Extract response text from Gemini API response object
|
|
68
|
+
*/
|
|
69
|
+
export declare function extractResponseText(response: any): string;
|
|
70
|
+
/**
|
|
71
|
+
* No-op function - we now keep the JOB_STATE_ prefix to match Gemini API convention
|
|
72
|
+
* Status values are stored with prefix: JOB_STATE_SUCCEEDED, JOB_STATE_RUNNING, etc.
|
|
73
|
+
*/
|
|
74
|
+
export declare function stripJobStatePrefix(status: string): string;
|
|
75
|
+
/**
|
|
76
|
+
* Parse JSON response text with error handling
|
|
77
|
+
*/
|
|
78
|
+
export declare function parseResponseJSON(text: string): any;
|
|
79
|
+
/**
|
|
80
|
+
* Get cleanup configuration with defaults
|
|
81
|
+
*/
|
|
82
|
+
export declare function getCleanupConfig(app: Application): BatchCleanupConfig;
|
|
83
|
+
/**
|
|
84
|
+
* Cleanup configuration for batch jobs
|
|
85
|
+
*/
|
|
86
|
+
export interface BatchCleanupConfig {
|
|
87
|
+
successful_retention_days: number;
|
|
88
|
+
failed_retention_days: number;
|
|
89
|
+
enable_auto_cleanup: boolean;
|
|
90
|
+
}
|