@yglin/tw-env-records 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/lib/authentication.d.ts +8 -0
  2. package/lib/client.d.ts +1 -1
  3. package/lib/configuration.d.ts +104 -23
  4. package/lib/google/gemini-client-factory.d.ts +72 -0
  5. package/lib/hooks/after-askai-create.d.ts +2 -0
  6. package/lib/hooks/before-askai-create.d.ts +2 -0
  7. package/lib/internal/analyzer-endpoints.d.ts +2 -0
  8. package/lib/internal/audit-batch-endpoints.d.ts +2 -0
  9. package/lib/internal/crawler-batch-endpoints.d.ts +2 -0
  10. package/lib/internal/crawler-endpoints.d.ts +1 -1
  11. package/lib/internal/duplicate-check-batch-endpoints.d.ts +2 -0
  12. package/lib/jobs/audit-batch-process.d.ts +10 -0
  13. package/lib/jobs/audit-batch-state.d.ts +42 -0
  14. package/lib/jobs/audit-batch-submit.d.ts +13 -0
  15. package/lib/jobs/audit-batch.d.ts +36 -0
  16. package/lib/jobs/crawler-batch-process.d.ts +10 -0
  17. package/lib/jobs/crawler-batch-state.d.ts +70 -0
  18. package/lib/jobs/crawler-batch-submit.d.ts +8 -0
  19. package/lib/jobs/crawler-batch.d.ts +51 -0
  20. package/lib/jobs/crawler-prompt.d.ts +6 -0
  21. package/lib/jobs/crawler-state.d.ts +6 -1
  22. package/lib/jobs/crawler.d.ts +1 -1
  23. package/lib/jobs/duplicate-check/find-candidates.d.ts +29 -0
  24. package/lib/jobs/duplicate-check/index.d.ts +6 -0
  25. package/lib/jobs/duplicate-check/log-failed-backup.d.ts +17 -0
  26. package/lib/jobs/duplicate-check/merge-records.d.ts +20 -0
  27. package/lib/jobs/duplicate-check/prompt.d.ts +12 -0
  28. package/lib/jobs/duplicate-check-batch-process.d.ts +27 -0
  29. package/lib/jobs/duplicate-check-batch-state.d.ts +52 -0
  30. package/lib/jobs/duplicate-check-batch-submit.d.ts +26 -0
  31. package/lib/jobs/duplicate-check-batch.d.ts +41 -0
  32. package/lib/jobs/shared/batch-common.d.ts +90 -0
  33. package/lib/jobs/shared/batch-orchestration.d.ts +79 -0
  34. package/lib/jobs/shared/batch-processing.d.ts +37 -0
  35. package/lib/jobs/shared/batch-request.d.ts +54 -0
  36. package/lib/jobs/shared/batch-state.d.ts +72 -0
  37. package/lib/jobs/shared/index.d.ts +9 -0
  38. package/lib/logger.d.ts +2 -0
  39. package/lib/services/batch-jobs/batch-jobs.class.d.ts +15 -0
  40. package/lib/services/batch-jobs/batch-jobs.d.ts +11 -0
  41. package/lib/services/batch-jobs/batch-jobs.schema.d.ts +808 -0
  42. package/lib/services/batch-jobs/batch-jobs.shared.d.ts +2 -0
  43. package/lib/services/batch-jobs/batch-jobs.shared.js +6 -0
  44. package/lib/services/meta/ask-ai-statistics.d.ts +5 -0
  45. package/lib/services/meta/database-statistics.d.ts +7 -0
  46. package/lib/services/meta/meta.shared.d.ts +2 -1
  47. package/lib/services/meta/update-database-statistics.d.ts +2 -0
  48. package/lib/services/record/record.class.d.ts +51 -1
  49. package/lib/services/record/record.schema.d.ts +194 -64
  50. package/lib/services/users/users.class.d.ts +11 -0
  51. package/lib/services/users/users.d.ts +11 -0
  52. package/lib/services/users/users.schema.d.ts +356 -0
  53. package/lib/services/users/users.shared.d.ts +13 -0
  54. package/lib/services/users/users.shared.js +13 -0
  55. package/package.json +13 -12
  56. package/lib/maids/collate-place-names.d.ts +0 -1
  57. package/lib/maids/fix-place-names.d.ts +0 -7
  58. package/lib/maids/full-database-analyze.d.ts +0 -1
  59. package/lib/maids/geocode.d.ts +0 -4
@@ -0,0 +1,8 @@
1
+ import { AuthenticationService } from '@feathersjs/authentication';
2
+ import type { Application } from './declarations';
3
+ declare module './declarations' {
4
+ interface ServiceTypes {
5
+ authentication: AuthenticationService;
6
+ }
7
+ }
8
+ export declare const authentication: (app: Application) => void;
package/lib/client.d.ts CHANGED
@@ -7,7 +7,7 @@ export type { AskAi, AskAiData, AskAiQuery, AskAiPatch, AskAiRequest, AskAiRespo
7
7
  import './services/tag/tag.shared';
8
8
  export type { Tag, TagData, TagQuery, TagPatch } from './services/tag/tag.shared';
9
9
  import './services/meta/meta.shared';
10
- export type { Meta, MetaData, MetaQuery, MetaPatch, ServerInfo, YearlyStatistics, PerCountyStatistics, TagsTopN, PeopleTopN, TagsSimilarityConfigs } from './services/meta/meta.shared';
10
+ export type { Meta, MetaData, MetaQuery, MetaPatch, ServerInfo, YearlyStatistics, PerCountyStatistics, TagsTopN, PeopleTopN, TagsSimilarityConfigs, DatabaseStatistics } from './services/meta/meta.shared';
11
11
  import './services/person/person.shared';
12
12
  export type { Person, PersonData, PersonQuery, PersonPatch } from './services/person/person.shared';
13
13
  import './services/place-names/place-names.shared';
@@ -106,36 +106,117 @@ export declare const configurationSchema: import("@sinclair/typebox").TIntersect
106
106
  apiKey: import("@sinclair/typebox").TString<string>;
107
107
  }>;
108
108
  gemini: import("@sinclair/typebox").TObject<{
109
- api01: import("@sinclair/typebox").TObject<{
110
- model: import("@sinclair/typebox").TString<string>;
111
- apiKey01: import("@sinclair/typebox").TString<string>;
112
- }>;
113
- api02: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
114
- model: import("@sinclair/typebox").TString<string>;
115
- apiKey02: import("@sinclair/typebox").TString<string>;
116
- }>>;
109
+ apiKey: import("@sinclair/typebox").TString<string>;
110
+ model: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
117
111
  }>;
118
112
  }>;
119
113
  services: import("@sinclair/typebox").TObject<{
120
114
  askAi: import("@sinclair/typebox").TObject<{
121
- recordsLimit: import("@sinclair/typebox").TNumber;
115
+ ai_model: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
116
+ ai_api_key: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
117
+ recordsLimit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
118
+ maxRequestsPerDay: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
119
+ keepDailyStatsDays: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
122
120
  }>;
123
121
  }>;
124
- crawler: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
125
- enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
126
- schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
127
- timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
122
+ jobs: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
128
123
  internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
129
- }>>;
130
- analyzer: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
131
- enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
132
- schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
133
- timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
134
- }>>;
135
- audit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
136
- enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
137
- schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
138
- timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
124
+ crawler: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
125
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
126
+ schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
127
+ timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
128
+ internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
129
+ }>>;
130
+ crawlerBatch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
131
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
132
+ timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
133
+ schedules: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
134
+ submit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
135
+ process: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
136
+ cleanup: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
137
+ }>>;
138
+ batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
139
+ use_file_input: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
140
+ max_requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
141
+ requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
142
+ display_name_prefix: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
143
+ }>>;
144
+ processing: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
145
+ max_retry_attempts: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
146
+ retry_interval_ms: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
147
+ }>>;
148
+ cleanup: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
149
+ cleanup_after_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
150
+ keep_failed_jobs_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
151
+ }>>;
152
+ internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
153
+ }>>;
154
+ analyzer: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
155
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
156
+ schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
157
+ timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
158
+ internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
159
+ }>>;
160
+ audit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
161
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
162
+ schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
163
+ timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
164
+ internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
165
+ }>>;
166
+ auditBatch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
167
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
168
+ timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
169
+ schedules: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
170
+ submit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
171
+ process: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
172
+ cleanup: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
173
+ }>>;
174
+ batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
175
+ use_file_input: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
176
+ max_requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
177
+ requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
178
+ display_name_prefix: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
179
+ }>>;
180
+ processing: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
181
+ max_retry_attempts: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
182
+ retry_interval_ms: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
183
+ }>>;
184
+ cleanup: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
185
+ cleanup_after_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
186
+ keep_failed_jobs_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
187
+ }>>;
188
+ internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
189
+ }>>;
190
+ duplicateCheckBatch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
191
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
192
+ timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
193
+ max_records_per_window: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
194
+ max_candidates_per_record: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
195
+ similarity_threshold: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
196
+ schedules: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
197
+ submit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
198
+ process: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
199
+ }>>;
200
+ batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
201
+ use_file_input: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
202
+ requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
203
+ display_name_prefix: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
204
+ }>>;
205
+ processing: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
206
+ max_retry_attempts: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
207
+ retry_interval_ms: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
208
+ confidence_threshold: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
209
+ }>>;
210
+ merge: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
211
+ story_min_length: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
212
+ }>>;
213
+ backup_failure_log: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
214
+ enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
215
+ path: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
216
+ retention_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
217
+ }>>;
218
+ internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
219
+ }>>;
139
220
  }>>;
140
221
  }>]>;
141
222
  export type ApplicationConfiguration = Static<typeof configurationSchema>;
@@ -0,0 +1,72 @@
1
+ import { Application } from '../declarations';
2
+ export interface IGeminiClient {
3
+ models: {
4
+ generateContent: (params: any) => Promise<any>;
5
+ };
6
+ batches: {
7
+ create: (params: any) => Promise<any>;
8
+ get: (params: any) => Promise<any>;
9
+ list: (params?: any) => Promise<any>;
10
+ };
11
+ files: {
12
+ download: (params: any) => Promise<void>;
13
+ };
14
+ }
15
+ /**
16
+ * Default configuration applied to all Gemini requests
17
+ * NOTE: Google Search grounding is NOT included because the Batch API
18
+ * does not return grounding metadata, making it ineffective for batch jobs.
19
+ */
20
+ export declare const GEMINI_DEFAULT_CONFIG: {};
21
+ /**
22
+ * Default Gemini model
23
+ */
24
+ export declare const DEFAULT_GEMINI_MODEL = "gemini-2.5-flash";
25
+ declare class GeminiClientFactory {
26
+ private instance;
27
+ private mockInstance;
28
+ private model;
29
+ /**
30
+ * Initialize the Gemini client from application configuration
31
+ * Call this once at application startup
32
+ */
33
+ initialize(app: Application): void;
34
+ /**
35
+ * Get the singleton Gemini client instance
36
+ * Throws error if not initialized
37
+ */
38
+ getClient(): IGeminiClient;
39
+ /**
40
+ * Get the configured model name
41
+ */
42
+ getModel(): string;
43
+ /**
44
+ * Get default configuration for Gemini API requests
45
+ * NOTE: Does not include Google Search grounding since Batch API doesn't support it
46
+ */
47
+ getDefaultConfig(): typeof GEMINI_DEFAULT_CONFIG;
48
+ /**
49
+ * Set a mock client for testing
50
+ * WARNING: Only use in tests!
51
+ */
52
+ setMockClient(mockClient: IGeminiClient | null): void;
53
+ /**
54
+ * Set mock model for testing
55
+ * WARNING: Only use in tests!
56
+ */
57
+ setMockModel(model: string): void;
58
+ /**
59
+ * Reset the factory (for testing)
60
+ * WARNING: Only use in tests!
61
+ */
62
+ resetForTesting(): void;
63
+ /**
64
+ * Check if client is initialized
65
+ */
66
+ isInitialized(): boolean;
67
+ }
68
+ export declare const geminiClientFactory: GeminiClientFactory;
69
+ export declare function getGeminiClient(): IGeminiClient;
70
+ export declare function getGeminiModel(): string;
71
+ export declare function getGeminiConfig(): typeof GEMINI_DEFAULT_CONFIG;
72
+ export {};
@@ -0,0 +1,2 @@
1
+ import type { HookContext } from '@feathersjs/feathers';
2
+ export declare const afterAskAiCreate: (context: HookContext) => Promise<HookContext<import("@feathersjs/feathers").Application<any, any>, any>>;
@@ -0,0 +1,2 @@
1
+ import type { HookContext } from '@feathersjs/feathers';
2
+ export declare const beforeAskAiCreate: (context: HookContext) => Promise<HookContext<import("@feathersjs/feathers").Application<any, any>, any>>;
@@ -0,0 +1,2 @@
1
+ import { Application } from '../declarations';
2
+ export declare const analyzerEndpoints: (app: Application) => void;
@@ -0,0 +1,2 @@
1
+ import { Application } from '../declarations';
2
+ export declare const auditBatchEndpoints: (app: Application) => void;
@@ -0,0 +1,2 @@
1
+ import { Application } from '../declarations';
2
+ export declare const crawlerBatchEndpoints: (app: Application) => void;
@@ -1,2 +1,2 @@
1
- import { Application } from "../declarations";
1
+ import { Application } from '../declarations';
2
2
  export declare const crawlerEndpoints: (app: Application) => void;
@@ -0,0 +1,2 @@
1
+ import { Application } from '../declarations';
2
+ export declare const duplicateCheckBatchEndpoints: (app: Application) => void;
@@ -0,0 +1,10 @@
1
+ import { Application } from '../declarations';
2
+ import { GenericBatchProcessStats } from './shared/index';
3
+ export interface ProcessStats extends GenericBatchProcessStats {
4
+ recordsUpdated: number;
5
+ recordsSkipped: number;
6
+ }
7
+ /**
8
+ * Process all pending audit batch jobs
9
+ */
10
+ export declare function processPendingBatches(app: Application): Promise<ProcessStats>;
@@ -0,0 +1,42 @@
1
+ export type BatchAuditStats = {
2
+ submitted?: number;
3
+ processed?: number;
4
+ recordsQueued?: number;
5
+ recordsUpdated?: number;
6
+ recordsSkipped?: number;
7
+ cleanedUp?: number;
8
+ errors?: number;
9
+ };
10
+ export type BatchAuditState = {
11
+ submitRunning: boolean;
12
+ processRunning: boolean;
13
+ lastSubmitAt?: string;
14
+ lastProcessAt?: string;
15
+ lastCleanupAt?: string;
16
+ lastSubmitDurationMs?: number;
17
+ lastProcessDurationMs?: number;
18
+ lastSubmitResult?: BatchAuditStats;
19
+ lastProcessResult?: BatchAuditStats;
20
+ lastError?: string;
21
+ pendingJobs?: number;
22
+ runningJobs?: number;
23
+ succeededJobs?: number;
24
+ };
25
+ export declare function getBatchAuditState(): BatchAuditState;
26
+ export declare function startSubmit(): void;
27
+ export declare function finishSubmit(result: BatchAuditStats, durationMs: number): void;
28
+ export declare function failSubmit(error: unknown, durationMs: number): void;
29
+ export declare function startProcess(): void;
30
+ export declare function finishProcess(result: BatchAuditStats, durationMs: number): void;
31
+ export declare function failProcess(error: unknown, durationMs: number): void;
32
+ export declare function updateJobCounts(pending: number, running: number, succeeded: number): void;
33
+ export declare function updateCleanupTime(): void;
34
+ /**
35
+ * Reset state for testing purposes
36
+ * WARNING: Only use in tests!
37
+ */
38
+ export declare function resetStateForTesting(): void;
39
+ /**
40
+ * Reconstruct state from database on service startup
41
+ */
42
+ export declare function reconstructFromDatabase(batchJobsService: any): Promise<void>;
@@ -0,0 +1,13 @@
1
+ import { Application } from '../declarations';
2
+ import { GenericBatchSubmitStats } from './shared/index';
3
+ export interface SubmitStats extends GenericBatchSubmitStats {
4
+ recordsQueued: number;
5
+ }
6
+ /**
7
+ * Build the audit prompt for a single record
8
+ */
9
+ export declare function buildAuditPrompt(record: any): string;
10
+ /**
11
+ * Submit a new audit batch job
12
+ */
13
+ export declare function submitAuditBatch(app: Application): Promise<SubmitStats>;
@@ -0,0 +1,36 @@
1
+ import { Application } from '../declarations';
2
+ import { type SubmitStats } from './audit-batch-submit';
3
+ import { type ProcessStats } from './audit-batch-process';
4
+ import { getBatchAuditState, reconstructFromDatabase } from './audit-batch-state';
5
+ import { type CleanupStats } from './shared/index';
6
+ export interface CycleStats {
7
+ submit: SubmitStats;
8
+ process: ProcessStats;
9
+ cleanup: CleanupStats;
10
+ }
11
+ /**
12
+ * Submit phase: Create and submit new audit batch jobs
13
+ */
14
+ export declare function submitPhase(app: Application): Promise<SubmitStats>;
15
+ /**
16
+ * Process phase: Poll and process completed audit batch jobs
17
+ */
18
+ export declare function processPhase(app: Application): Promise<ProcessStats>;
19
+ /**
20
+ * Cleanup phase: Remove old completed/failed batch jobs
21
+ */
22
+ export declare function cleanupPhase(app: Application): Promise<CleanupStats>;
23
+ /**
24
+ * Get current status of audit batch jobs
25
+ */
26
+ export declare function getAuditBatchStatus(app: Application): Promise<{
27
+ pending: number;
28
+ running: number;
29
+ succeeded: number;
30
+ failed: number;
31
+ }>;
32
+ /**
33
+ * Run a complete audit batch cycle (submit + process + cleanup)
34
+ */
35
+ export declare function runAuditBatchCycle(app: Application): Promise<CycleStats>;
36
+ export { getBatchAuditState, reconstructFromDatabase };
@@ -0,0 +1,10 @@
1
+ import { Application } from '../declarations';
2
+ import { GenericBatchProcessStats } from './shared/index';
3
+ export interface ProcessStats extends GenericBatchProcessStats {
4
+ recordsExtracted?: number;
5
+ recordsCreated?: number;
6
+ }
7
+ /**
8
+ * Process all pending crawl batch jobs
9
+ */
10
+ export declare function processPendingBatches(app: Application): Promise<ProcessStats>;
@@ -0,0 +1,70 @@
1
+ export type BatchCrawlerStats = {
2
+ submitted?: number;
3
+ processed?: number;
4
+ recordsExtracted?: number;
5
+ recordsCreated?: number;
6
+ recordsUpdated?: number;
7
+ duplicatesChecked?: number;
8
+ cleanedUp?: number;
9
+ errors?: number;
10
+ };
11
+ export type BatchCrawlerState = {
12
+ submitRunning: boolean;
13
+ processRunning: boolean;
14
+ lastSubmitAt?: string;
15
+ lastProcessAt?: string;
16
+ lastCleanupAt?: string;
17
+ lastSubmitDurationMs?: number;
18
+ lastProcessDurationMs?: number;
19
+ lastSubmitResult?: BatchCrawlerStats;
20
+ lastProcessResult?: BatchCrawlerStats;
21
+ lastError?: string;
22
+ pendingJobs?: number;
23
+ runningJobs?: number;
24
+ succeededJobs?: number;
25
+ };
26
+ /**
27
+ * Get current crawler batch state
28
+ */
29
+ export declare function getBatchCrawlerState(): BatchCrawlerState;
30
+ /**
31
+ * Mark submit phase as started
32
+ */
33
+ export declare function startSubmit(): void;
34
+ /**
35
+ * Mark submit phase as completed
36
+ */
37
+ export declare function finishSubmit(result: BatchCrawlerStats, durationMs: number): void;
38
+ /**
39
+ * Mark submit phase as failed
40
+ */
41
+ export declare function failSubmit(error: unknown, durationMs: number): void;
42
+ /**
43
+ * Mark process phase as started
44
+ */
45
+ export declare function startProcess(): void;
46
+ /**
47
+ * Mark process phase as completed
48
+ */
49
+ export declare function finishProcess(result: BatchCrawlerStats, durationMs: number): void;
50
+ /**
51
+ * Mark process phase as failed
52
+ */
53
+ export declare function failProcess(error: unknown, durationMs: number): void;
54
+ /**
55
+ * Update job counts from database
56
+ */
57
+ export declare function updateJobCounts(pending: number, running: number, succeeded: number): void;
58
+ /**
59
+ * Mark cleanup operation with current timestamp
60
+ */
61
+ export declare function updateCleanupTime(): void;
62
+ /**
63
+ * Reset state for testing
64
+ */
65
+ export declare function resetStateForTesting(): void;
66
+ /**
67
+ * Reconstruct state from database
68
+ * Used on service startup
69
+ */
70
+ export declare function reconstructFromDatabase(batchJobsService: any): Promise<void>;
@@ -0,0 +1,8 @@
1
+ import { Application } from '../declarations';
2
+ import { GenericBatchSubmitStats } from './shared/index';
3
+ export interface SubmitStats extends GenericBatchSubmitStats {
4
+ }
5
+ /**
6
+ * Submit a new crawl batch job
7
+ */
8
+ export declare function submitCrawlBatch(app: Application): Promise<SubmitStats>;
@@ -0,0 +1,51 @@
1
+ import { Application } from '../declarations';
2
+ import { SubmitStats } from './crawler-batch-submit';
3
+ import { ProcessStats } from './crawler-batch-process';
4
+ import { getBatchCrawlerState, BatchCrawlerState, reconstructFromDatabase } from './crawler-batch-state';
5
+ import { CleanupStats as SharedCleanupStats } from './shared/index';
6
+ export type CleanupStats = SharedCleanupStats;
7
+ export interface CycleStats {
8
+ submitted: number;
9
+ processed: number;
10
+ recordsExtracted: number;
11
+ recordsCreated: number;
12
+ recordsUpdated: number;
13
+ duplicatesChecked: number;
14
+ cleanedUp: number;
15
+ errors: number;
16
+ }
17
+ export type { SubmitStats } from './crawler-batch-submit';
18
+ export type { ProcessStats } from './crawler-batch-process';
19
+ export interface BatchStatus {
20
+ id: number;
21
+ job_name: string;
22
+ job_type: string;
23
+ status: string;
24
+ model: string;
25
+ request_count: number;
26
+ created_at: string;
27
+ updated_at: string;
28
+ completed_at?: string;
29
+ error_message?: string;
30
+ }
31
+ /**
32
+ * Submit phase - create and submit new batch jobs
33
+ */
34
+ export declare function submitPhase(app: Application): Promise<SubmitStats>;
35
+ /**
36
+ * Process phase - poll and process completed batch jobs
37
+ */
38
+ export declare function processPhase(app: Application): Promise<ProcessStats>;
39
+ /**
40
+ * Cleanup phase - remove old completed jobs based on retention policy
41
+ */
42
+ export declare function cleanupPhase(app: Application): Promise<CleanupStats>;
43
+ /**
44
+ * Get status of all batch jobs
45
+ */
46
+ export declare function getBatchStatus(app: Application): Promise<BatchStatus[]>;
47
+ /**
48
+ * Run full batch crawl cycle (submit → process → cleanup)
49
+ */
50
+ export declare function runBatchCrawlCycle(app: Application): Promise<CycleStats>;
51
+ export { getBatchCrawlerState, BatchCrawlerState, reconstructFromDatabase };
@@ -0,0 +1,6 @@
1
+ import { Application } from '../declarations';
2
+ /**
3
+ * Build the crawl prompt for batch request
4
+ * Reuses the same prompt logic as the synchronous crawler
5
+ */
6
+ export declare function buildCrawlPrompt(app: Application): Promise<string>;
@@ -1,4 +1,4 @@
1
- import { CrawlStats } from "./crawler";
1
+ import { CrawlStats } from './crawler';
2
2
  export type CrawlerState = {
3
3
  running: boolean;
4
4
  lastRunAt?: string;
@@ -10,3 +10,8 @@ export declare function getCrawlerState(): CrawlerState;
10
10
  export declare function startRun(): void;
11
11
  export declare function finishRun(result: CrawlStats, durationMs: number): void;
12
12
  export declare function failRun(error: unknown, durationMs: number): void;
13
+ /**
14
+ * Reset state for testing purposes
15
+ * WARNING: Only use in tests!
16
+ */
17
+ export declare function resetStateForTesting(): void;
@@ -1,4 +1,4 @@
1
- import { Application } from "../declarations";
1
+ import { Application } from '../declarations';
2
2
  export type CrawlStats = {
3
3
  created: number;
4
4
  updated: number;
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Find records needing duplicate check and their potential duplicate candidates
3
+ */
4
+ import { Application } from '../../declarations';
5
+ /**
6
+ * Find records that need duplicate checking
7
+ * @param app - Feathers application
8
+ * @param limit - Maximum number of records to return (default: 50)
9
+ * @returns Array of records with duplicate_check_status = 'pending'
10
+ */
11
+ export declare function findRecordsNeedingDuplicateCheck(app: Application, limit?: number): Promise<any[]>;
12
+ /**
13
+ * Calculate Jaccard similarity between two arrays
14
+ * @param arr1 - First array
15
+ * @param arr2 - Second array
16
+ * @returns Similarity score between 0.0 and 1.0
17
+ */
18
+ export declare function calculateJaccardSimilarity(arr1: string[], arr2: string[]): number;
19
+ /**
20
+ * Find potential duplicate candidates for a given record
21
+ * Uses two-phase filtering: database filter (date + place) + in-memory similarity (tags + people)
22
+ *
23
+ * @param app - Feathers application
24
+ * @param record - The record to find duplicates for
25
+ * @param maxCandidates - Maximum number of candidates to return (default: 20)
26
+ * @param similarityThreshold - Minimum Jaccard similarity threshold (default: 0.5)
27
+ * @returns Array of candidate records that might be duplicates
28
+ */
29
+ export declare function findDuplicateCandidates(app: Application, record: any, maxCandidates?: number, similarityThreshold?: number): Promise<any[]>;
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Duplicate-check module exports
3
+ */
4
+ export { findRecordsNeedingDuplicateCheck, findDuplicateCandidates, calculateJaccardSimilarity } from './find-candidates';
5
+ export { buildDuplicateCheckPrompt } from './prompt';
6
+ export { mergeRecordsFromGeminiResponse, type MergeResult } from './merge-records';
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Log failed MongoDB backup to file for recovery
3
+ */
4
+ /**
5
+ * Log a failed MongoDB backup to file
6
+ * Writes in JSONL format (one JSON object per line) for easy recovery
7
+ *
8
+ * @param record - The record that failed to backup
9
+ * @param error - The error that occurred
10
+ * @param metadata - Additional metadata about the deletion
11
+ */
12
+ export declare function logFailedMongoBackup(record: any, error: Error, metadata: {
13
+ merged_into_id?: number;
14
+ deletion_reason?: string;
15
+ deleted_at: string;
16
+ deleted_by?: string;
17
+ }): void;
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Merge duplicate records and handle deletion with MongoDB backup
3
+ */
4
+ import { Application } from '../../declarations';
5
+ export interface MergeResult {
6
+ mergedRecordId: number;
7
+ deletedRecordIds: number[];
8
+ confidence: number;
9
+ }
10
+ /**
11
+ * Merge duplicate records into a single record
12
+ * Creates a new merged record and deletes the duplicates (with MongoDB backup via hook)
13
+ *
14
+ * @param app - Feathers application
15
+ * @param duplicateIds - Array of record IDs to merge
16
+ * @param mergedRecordData - Data for the merged record
17
+ * @param confidence - AI confidence score
18
+ * @returns MergeResult with merged record ID and deleted IDs
19
+ */
20
+ export declare function mergeRecordsFromGeminiResponse(app: Application, duplicateIds: number[], mergedRecordData: any, confidence: number): Promise<MergeResult>;
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Build prompts for Gemini API to detect and merge duplicate records
3
+ */
4
+ /**
5
+ * Build a duplicate-check prompt for Gemini API
6
+ * Asks the AI to identify which candidates are duplicates and provide a merged record
7
+ *
8
+ * @param newRecord - The record being checked for duplicates
9
+ * @param candidates - Array of potential duplicate records
10
+ * @returns Prompt string for Gemini API
11
+ */
12
+ export declare function buildDuplicateCheckPrompt(newRecord: any, candidates: any[]): string;