@yglin/tw-env-records 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/authentication.d.ts +8 -0
- package/lib/client.d.ts +1 -1
- package/lib/configuration.d.ts +104 -23
- package/lib/google/gemini-client-factory.d.ts +72 -0
- package/lib/hooks/after-askai-create.d.ts +2 -0
- package/lib/hooks/before-askai-create.d.ts +2 -0
- package/lib/internal/analyzer-endpoints.d.ts +2 -0
- package/lib/internal/audit-batch-endpoints.d.ts +2 -0
- package/lib/internal/crawler-batch-endpoints.d.ts +2 -0
- package/lib/internal/crawler-endpoints.d.ts +1 -1
- package/lib/internal/duplicate-check-batch-endpoints.d.ts +2 -0
- package/lib/jobs/audit-batch-process.d.ts +10 -0
- package/lib/jobs/audit-batch-state.d.ts +42 -0
- package/lib/jobs/audit-batch-submit.d.ts +13 -0
- package/lib/jobs/audit-batch.d.ts +36 -0
- package/lib/jobs/crawler-batch-process.d.ts +10 -0
- package/lib/jobs/crawler-batch-state.d.ts +70 -0
- package/lib/jobs/crawler-batch-submit.d.ts +8 -0
- package/lib/jobs/crawler-batch.d.ts +51 -0
- package/lib/jobs/crawler-prompt.d.ts +6 -0
- package/lib/jobs/crawler-state.d.ts +6 -1
- package/lib/jobs/crawler.d.ts +1 -1
- package/lib/jobs/duplicate-check/find-candidates.d.ts +29 -0
- package/lib/jobs/duplicate-check/index.d.ts +6 -0
- package/lib/jobs/duplicate-check/log-failed-backup.d.ts +17 -0
- package/lib/jobs/duplicate-check/merge-records.d.ts +20 -0
- package/lib/jobs/duplicate-check/prompt.d.ts +12 -0
- package/lib/jobs/duplicate-check-batch-process.d.ts +27 -0
- package/lib/jobs/duplicate-check-batch-state.d.ts +52 -0
- package/lib/jobs/duplicate-check-batch-submit.d.ts +26 -0
- package/lib/jobs/duplicate-check-batch.d.ts +41 -0
- package/lib/jobs/shared/batch-common.d.ts +90 -0
- package/lib/jobs/shared/batch-orchestration.d.ts +79 -0
- package/lib/jobs/shared/batch-processing.d.ts +37 -0
- package/lib/jobs/shared/batch-request.d.ts +54 -0
- package/lib/jobs/shared/batch-state.d.ts +72 -0
- package/lib/jobs/shared/index.d.ts +9 -0
- package/lib/logger.d.ts +2 -0
- package/lib/services/batch-jobs/batch-jobs.class.d.ts +15 -0
- package/lib/services/batch-jobs/batch-jobs.d.ts +11 -0
- package/lib/services/batch-jobs/batch-jobs.schema.d.ts +808 -0
- package/lib/services/batch-jobs/batch-jobs.shared.d.ts +2 -0
- package/lib/services/batch-jobs/batch-jobs.shared.js +6 -0
- package/lib/services/meta/ask-ai-statistics.d.ts +5 -0
- package/lib/services/meta/database-statistics.d.ts +7 -0
- package/lib/services/meta/meta.shared.d.ts +2 -1
- package/lib/services/meta/update-database-statistics.d.ts +2 -0
- package/lib/services/record/record.class.d.ts +51 -1
- package/lib/services/record/record.schema.d.ts +194 -64
- package/lib/services/users/users.class.d.ts +11 -0
- package/lib/services/users/users.d.ts +11 -0
- package/lib/services/users/users.schema.d.ts +356 -0
- package/lib/services/users/users.shared.d.ts +13 -0
- package/lib/services/users/users.shared.js +13 -0
- package/package.json +13 -12
- package/lib/maids/collate-place-names.d.ts +0 -1
- package/lib/maids/fix-place-names.d.ts +0 -7
- package/lib/maids/full-database-analyze.d.ts +0 -1
- package/lib/maids/geocode.d.ts +0 -4
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { AuthenticationService } from '@feathersjs/authentication';
|
|
2
|
+
import type { Application } from './declarations';
|
|
3
|
+
declare module './declarations' {
|
|
4
|
+
interface ServiceTypes {
|
|
5
|
+
authentication: AuthenticationService;
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
export declare const authentication: (app: Application) => void;
|
package/lib/client.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ export type { AskAi, AskAiData, AskAiQuery, AskAiPatch, AskAiRequest, AskAiRespo
|
|
|
7
7
|
import './services/tag/tag.shared';
|
|
8
8
|
export type { Tag, TagData, TagQuery, TagPatch } from './services/tag/tag.shared';
|
|
9
9
|
import './services/meta/meta.shared';
|
|
10
|
-
export type { Meta, MetaData, MetaQuery, MetaPatch, ServerInfo, YearlyStatistics, PerCountyStatistics, TagsTopN, PeopleTopN, TagsSimilarityConfigs } from './services/meta/meta.shared';
|
|
10
|
+
export type { Meta, MetaData, MetaQuery, MetaPatch, ServerInfo, YearlyStatistics, PerCountyStatistics, TagsTopN, PeopleTopN, TagsSimilarityConfigs, DatabaseStatistics } from './services/meta/meta.shared';
|
|
11
11
|
import './services/person/person.shared';
|
|
12
12
|
export type { Person, PersonData, PersonQuery, PersonPatch } from './services/person/person.shared';
|
|
13
13
|
import './services/place-names/place-names.shared';
|
package/lib/configuration.d.ts
CHANGED
|
@@ -106,36 +106,117 @@ export declare const configurationSchema: import("@sinclair/typebox").TIntersect
|
|
|
106
106
|
apiKey: import("@sinclair/typebox").TString<string>;
|
|
107
107
|
}>;
|
|
108
108
|
gemini: import("@sinclair/typebox").TObject<{
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
apiKey01: import("@sinclair/typebox").TString<string>;
|
|
112
|
-
}>;
|
|
113
|
-
api02: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
114
|
-
model: import("@sinclair/typebox").TString<string>;
|
|
115
|
-
apiKey02: import("@sinclair/typebox").TString<string>;
|
|
116
|
-
}>>;
|
|
109
|
+
apiKey: import("@sinclair/typebox").TString<string>;
|
|
110
|
+
model: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
117
111
|
}>;
|
|
118
112
|
}>;
|
|
119
113
|
services: import("@sinclair/typebox").TObject<{
|
|
120
114
|
askAi: import("@sinclair/typebox").TObject<{
|
|
121
|
-
|
|
115
|
+
ai_model: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
116
|
+
ai_api_key: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
117
|
+
recordsLimit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
118
|
+
maxRequestsPerDay: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
119
|
+
keepDailyStatsDays: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
122
120
|
}>;
|
|
123
121
|
}>;
|
|
124
|
-
|
|
125
|
-
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
126
|
-
schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
127
|
-
timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
122
|
+
jobs: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
128
123
|
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
124
|
+
crawler: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
125
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
126
|
+
schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
127
|
+
timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
128
|
+
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
129
|
+
}>>;
|
|
130
|
+
crawlerBatch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
131
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
132
|
+
timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
133
|
+
schedules: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
134
|
+
submit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
135
|
+
process: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
136
|
+
cleanup: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
137
|
+
}>>;
|
|
138
|
+
batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
139
|
+
use_file_input: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
140
|
+
max_requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
141
|
+
requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
142
|
+
display_name_prefix: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
143
|
+
}>>;
|
|
144
|
+
processing: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
145
|
+
max_retry_attempts: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
146
|
+
retry_interval_ms: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
147
|
+
}>>;
|
|
148
|
+
cleanup: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
149
|
+
cleanup_after_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
150
|
+
keep_failed_jobs_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
151
|
+
}>>;
|
|
152
|
+
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
153
|
+
}>>;
|
|
154
|
+
analyzer: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
155
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
156
|
+
schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
157
|
+
timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
158
|
+
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
159
|
+
}>>;
|
|
160
|
+
audit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
161
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
162
|
+
schedule: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
163
|
+
timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
164
|
+
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
165
|
+
}>>;
|
|
166
|
+
auditBatch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
167
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
168
|
+
timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
169
|
+
schedules: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
170
|
+
submit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
171
|
+
process: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
172
|
+
cleanup: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
173
|
+
}>>;
|
|
174
|
+
batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
175
|
+
use_file_input: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
176
|
+
max_requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
177
|
+
requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
178
|
+
display_name_prefix: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
179
|
+
}>>;
|
|
180
|
+
processing: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
181
|
+
max_retry_attempts: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
182
|
+
retry_interval_ms: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
183
|
+
}>>;
|
|
184
|
+
cleanup: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
185
|
+
cleanup_after_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
186
|
+
keep_failed_jobs_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
187
|
+
}>>;
|
|
188
|
+
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
189
|
+
}>>;
|
|
190
|
+
duplicateCheckBatch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
191
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
192
|
+
timezone: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
193
|
+
max_records_per_window: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
194
|
+
max_candidates_per_record: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
195
|
+
similarity_threshold: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
196
|
+
schedules: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
197
|
+
submit: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
198
|
+
process: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
199
|
+
}>>;
|
|
200
|
+
batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
201
|
+
use_file_input: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
202
|
+
requests_per_batch: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
203
|
+
display_name_prefix: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
204
|
+
}>>;
|
|
205
|
+
processing: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
206
|
+
max_retry_attempts: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
207
|
+
retry_interval_ms: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
208
|
+
confidence_threshold: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
209
|
+
}>>;
|
|
210
|
+
merge: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
211
|
+
story_min_length: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
212
|
+
}>>;
|
|
213
|
+
backup_failure_log: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TObject<{
|
|
214
|
+
enabled: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TBoolean>;
|
|
215
|
+
path: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
216
|
+
retention_days: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
217
|
+
}>>;
|
|
218
|
+
internalSecret: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TString<string>>;
|
|
219
|
+
}>>;
|
|
139
220
|
}>>;
|
|
140
221
|
}>]>;
|
|
141
222
|
export type ApplicationConfiguration = Static<typeof configurationSchema>;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { Application } from '../declarations';
|
|
2
|
+
export interface IGeminiClient {
|
|
3
|
+
models: {
|
|
4
|
+
generateContent: (params: any) => Promise<any>;
|
|
5
|
+
};
|
|
6
|
+
batches: {
|
|
7
|
+
create: (params: any) => Promise<any>;
|
|
8
|
+
get: (params: any) => Promise<any>;
|
|
9
|
+
list: (params?: any) => Promise<any>;
|
|
10
|
+
};
|
|
11
|
+
files: {
|
|
12
|
+
download: (params: any) => Promise<void>;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Default configuration applied to all Gemini requests
|
|
17
|
+
* NOTE: Google Search grounding is NOT included because the Batch API
|
|
18
|
+
* does not return grounding metadata, making it ineffective for batch jobs.
|
|
19
|
+
*/
|
|
20
|
+
export declare const GEMINI_DEFAULT_CONFIG: {};
|
|
21
|
+
/**
|
|
22
|
+
* Default Gemini model
|
|
23
|
+
*/
|
|
24
|
+
export declare const DEFAULT_GEMINI_MODEL = "gemini-2.5-flash";
|
|
25
|
+
declare class GeminiClientFactory {
|
|
26
|
+
private instance;
|
|
27
|
+
private mockInstance;
|
|
28
|
+
private model;
|
|
29
|
+
/**
|
|
30
|
+
* Initialize the Gemini client from application configuration
|
|
31
|
+
* Call this once at application startup
|
|
32
|
+
*/
|
|
33
|
+
initialize(app: Application): void;
|
|
34
|
+
/**
|
|
35
|
+
* Get the singleton Gemini client instance
|
|
36
|
+
* Throws error if not initialized
|
|
37
|
+
*/
|
|
38
|
+
getClient(): IGeminiClient;
|
|
39
|
+
/**
|
|
40
|
+
* Get the configured model name
|
|
41
|
+
*/
|
|
42
|
+
getModel(): string;
|
|
43
|
+
/**
|
|
44
|
+
* Get default configuration for Gemini API requests
|
|
45
|
+
* NOTE: Does not include Google Search grounding since Batch API doesn't support it
|
|
46
|
+
*/
|
|
47
|
+
getDefaultConfig(): typeof GEMINI_DEFAULT_CONFIG;
|
|
48
|
+
/**
|
|
49
|
+
* Set a mock client for testing
|
|
50
|
+
* WARNING: Only use in tests!
|
|
51
|
+
*/
|
|
52
|
+
setMockClient(mockClient: IGeminiClient | null): void;
|
|
53
|
+
/**
|
|
54
|
+
* Set mock model for testing
|
|
55
|
+
* WARNING: Only use in tests!
|
|
56
|
+
*/
|
|
57
|
+
setMockModel(model: string): void;
|
|
58
|
+
/**
|
|
59
|
+
* Reset the factory (for testing)
|
|
60
|
+
* WARNING: Only use in tests!
|
|
61
|
+
*/
|
|
62
|
+
resetForTesting(): void;
|
|
63
|
+
/**
|
|
64
|
+
* Check if client is initialized
|
|
65
|
+
*/
|
|
66
|
+
isInitialized(): boolean;
|
|
67
|
+
}
|
|
68
|
+
export declare const geminiClientFactory: GeminiClientFactory;
|
|
69
|
+
export declare function getGeminiClient(): IGeminiClient;
|
|
70
|
+
export declare function getGeminiModel(): string;
|
|
71
|
+
export declare function getGeminiConfig(): typeof GEMINI_DEFAULT_CONFIG;
|
|
72
|
+
export {};
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { Application } from
|
|
1
|
+
import { Application } from '../declarations';
|
|
2
2
|
export declare const crawlerEndpoints: (app: Application) => void;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { Application } from '../declarations';
|
|
2
|
+
import { GenericBatchProcessStats } from './shared/index';
|
|
3
|
+
export interface ProcessStats extends GenericBatchProcessStats {
|
|
4
|
+
recordsUpdated: number;
|
|
5
|
+
recordsSkipped: number;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Process all pending audit batch jobs
|
|
9
|
+
*/
|
|
10
|
+
export declare function processPendingBatches(app: Application): Promise<ProcessStats>;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
export type BatchAuditStats = {
|
|
2
|
+
submitted?: number;
|
|
3
|
+
processed?: number;
|
|
4
|
+
recordsQueued?: number;
|
|
5
|
+
recordsUpdated?: number;
|
|
6
|
+
recordsSkipped?: number;
|
|
7
|
+
cleanedUp?: number;
|
|
8
|
+
errors?: number;
|
|
9
|
+
};
|
|
10
|
+
export type BatchAuditState = {
|
|
11
|
+
submitRunning: boolean;
|
|
12
|
+
processRunning: boolean;
|
|
13
|
+
lastSubmitAt?: string;
|
|
14
|
+
lastProcessAt?: string;
|
|
15
|
+
lastCleanupAt?: string;
|
|
16
|
+
lastSubmitDurationMs?: number;
|
|
17
|
+
lastProcessDurationMs?: number;
|
|
18
|
+
lastSubmitResult?: BatchAuditStats;
|
|
19
|
+
lastProcessResult?: BatchAuditStats;
|
|
20
|
+
lastError?: string;
|
|
21
|
+
pendingJobs?: number;
|
|
22
|
+
runningJobs?: number;
|
|
23
|
+
succeededJobs?: number;
|
|
24
|
+
};
|
|
25
|
+
export declare function getBatchAuditState(): BatchAuditState;
|
|
26
|
+
export declare function startSubmit(): void;
|
|
27
|
+
export declare function finishSubmit(result: BatchAuditStats, durationMs: number): void;
|
|
28
|
+
export declare function failSubmit(error: unknown, durationMs: number): void;
|
|
29
|
+
export declare function startProcess(): void;
|
|
30
|
+
export declare function finishProcess(result: BatchAuditStats, durationMs: number): void;
|
|
31
|
+
export declare function failProcess(error: unknown, durationMs: number): void;
|
|
32
|
+
export declare function updateJobCounts(pending: number, running: number, succeeded: number): void;
|
|
33
|
+
export declare function updateCleanupTime(): void;
|
|
34
|
+
/**
|
|
35
|
+
* Reset state for testing purposes
|
|
36
|
+
* WARNING: Only use in tests!
|
|
37
|
+
*/
|
|
38
|
+
export declare function resetStateForTesting(): void;
|
|
39
|
+
/**
|
|
40
|
+
* Reconstruct state from database on service startup
|
|
41
|
+
*/
|
|
42
|
+
export declare function reconstructFromDatabase(batchJobsService: any): Promise<void>;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Application } from '../declarations';
|
|
2
|
+
import { GenericBatchSubmitStats } from './shared/index';
|
|
3
|
+
export interface SubmitStats extends GenericBatchSubmitStats {
|
|
4
|
+
recordsQueued: number;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Build the audit prompt for a single record
|
|
8
|
+
*/
|
|
9
|
+
export declare function buildAuditPrompt(record: any): string;
|
|
10
|
+
/**
|
|
11
|
+
* Submit a new audit batch job
|
|
12
|
+
*/
|
|
13
|
+
export declare function submitAuditBatch(app: Application): Promise<SubmitStats>;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { Application } from '../declarations';
|
|
2
|
+
import { type SubmitStats } from './audit-batch-submit';
|
|
3
|
+
import { type ProcessStats } from './audit-batch-process';
|
|
4
|
+
import { getBatchAuditState, reconstructFromDatabase } from './audit-batch-state';
|
|
5
|
+
import { type CleanupStats } from './shared/index';
|
|
6
|
+
export interface CycleStats {
|
|
7
|
+
submit: SubmitStats;
|
|
8
|
+
process: ProcessStats;
|
|
9
|
+
cleanup: CleanupStats;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Submit phase: Create and submit new audit batch jobs
|
|
13
|
+
*/
|
|
14
|
+
export declare function submitPhase(app: Application): Promise<SubmitStats>;
|
|
15
|
+
/**
|
|
16
|
+
* Process phase: Poll and process completed audit batch jobs
|
|
17
|
+
*/
|
|
18
|
+
export declare function processPhase(app: Application): Promise<ProcessStats>;
|
|
19
|
+
/**
|
|
20
|
+
* Cleanup phase: Remove old completed/failed batch jobs
|
|
21
|
+
*/
|
|
22
|
+
export declare function cleanupPhase(app: Application): Promise<CleanupStats>;
|
|
23
|
+
/**
|
|
24
|
+
* Get current status of audit batch jobs
|
|
25
|
+
*/
|
|
26
|
+
export declare function getAuditBatchStatus(app: Application): Promise<{
|
|
27
|
+
pending: number;
|
|
28
|
+
running: number;
|
|
29
|
+
succeeded: number;
|
|
30
|
+
failed: number;
|
|
31
|
+
}>;
|
|
32
|
+
/**
|
|
33
|
+
* Run a complete audit batch cycle (submit + process + cleanup)
|
|
34
|
+
*/
|
|
35
|
+
export declare function runAuditBatchCycle(app: Application): Promise<CycleStats>;
|
|
36
|
+
export { getBatchAuditState, reconstructFromDatabase };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { Application } from '../declarations';
|
|
2
|
+
import { GenericBatchProcessStats } from './shared/index';
|
|
3
|
+
export interface ProcessStats extends GenericBatchProcessStats {
|
|
4
|
+
recordsExtracted?: number;
|
|
5
|
+
recordsCreated?: number;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Process all pending crawl batch jobs
|
|
9
|
+
*/
|
|
10
|
+
export declare function processPendingBatches(app: Application): Promise<ProcessStats>;
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
export type BatchCrawlerStats = {
|
|
2
|
+
submitted?: number;
|
|
3
|
+
processed?: number;
|
|
4
|
+
recordsExtracted?: number;
|
|
5
|
+
recordsCreated?: number;
|
|
6
|
+
recordsUpdated?: number;
|
|
7
|
+
duplicatesChecked?: number;
|
|
8
|
+
cleanedUp?: number;
|
|
9
|
+
errors?: number;
|
|
10
|
+
};
|
|
11
|
+
export type BatchCrawlerState = {
|
|
12
|
+
submitRunning: boolean;
|
|
13
|
+
processRunning: boolean;
|
|
14
|
+
lastSubmitAt?: string;
|
|
15
|
+
lastProcessAt?: string;
|
|
16
|
+
lastCleanupAt?: string;
|
|
17
|
+
lastSubmitDurationMs?: number;
|
|
18
|
+
lastProcessDurationMs?: number;
|
|
19
|
+
lastSubmitResult?: BatchCrawlerStats;
|
|
20
|
+
lastProcessResult?: BatchCrawlerStats;
|
|
21
|
+
lastError?: string;
|
|
22
|
+
pendingJobs?: number;
|
|
23
|
+
runningJobs?: number;
|
|
24
|
+
succeededJobs?: number;
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Get current crawler batch state
|
|
28
|
+
*/
|
|
29
|
+
export declare function getBatchCrawlerState(): BatchCrawlerState;
|
|
30
|
+
/**
|
|
31
|
+
* Mark submit phase as started
|
|
32
|
+
*/
|
|
33
|
+
export declare function startSubmit(): void;
|
|
34
|
+
/**
|
|
35
|
+
* Mark submit phase as completed
|
|
36
|
+
*/
|
|
37
|
+
export declare function finishSubmit(result: BatchCrawlerStats, durationMs: number): void;
|
|
38
|
+
/**
|
|
39
|
+
* Mark submit phase as failed
|
|
40
|
+
*/
|
|
41
|
+
export declare function failSubmit(error: unknown, durationMs: number): void;
|
|
42
|
+
/**
|
|
43
|
+
* Mark process phase as started
|
|
44
|
+
*/
|
|
45
|
+
export declare function startProcess(): void;
|
|
46
|
+
/**
|
|
47
|
+
* Mark process phase as completed
|
|
48
|
+
*/
|
|
49
|
+
export declare function finishProcess(result: BatchCrawlerStats, durationMs: number): void;
|
|
50
|
+
/**
|
|
51
|
+
* Mark process phase as failed
|
|
52
|
+
*/
|
|
53
|
+
export declare function failProcess(error: unknown, durationMs: number): void;
|
|
54
|
+
/**
|
|
55
|
+
* Update job counts from database
|
|
56
|
+
*/
|
|
57
|
+
export declare function updateJobCounts(pending: number, running: number, succeeded: number): void;
|
|
58
|
+
/**
|
|
59
|
+
* Mark cleanup operation with current timestamp
|
|
60
|
+
*/
|
|
61
|
+
export declare function updateCleanupTime(): void;
|
|
62
|
+
/**
|
|
63
|
+
* Reset state for testing
|
|
64
|
+
*/
|
|
65
|
+
export declare function resetStateForTesting(): void;
|
|
66
|
+
/**
|
|
67
|
+
* Reconstruct state from database
|
|
68
|
+
* Used on service startup
|
|
69
|
+
*/
|
|
70
|
+
export declare function reconstructFromDatabase(batchJobsService: any): Promise<void>;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { Application } from '../declarations';
|
|
2
|
+
import { GenericBatchSubmitStats } from './shared/index';
|
|
3
|
+
export interface SubmitStats extends GenericBatchSubmitStats {
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Submit a new crawl batch job
|
|
7
|
+
*/
|
|
8
|
+
export declare function submitCrawlBatch(app: Application): Promise<SubmitStats>;
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { Application } from '../declarations';
|
|
2
|
+
import { SubmitStats } from './crawler-batch-submit';
|
|
3
|
+
import { ProcessStats } from './crawler-batch-process';
|
|
4
|
+
import { getBatchCrawlerState, BatchCrawlerState, reconstructFromDatabase } from './crawler-batch-state';
|
|
5
|
+
import { CleanupStats as SharedCleanupStats } from './shared/index';
|
|
6
|
+
export type CleanupStats = SharedCleanupStats;
|
|
7
|
+
export interface CycleStats {
|
|
8
|
+
submitted: number;
|
|
9
|
+
processed: number;
|
|
10
|
+
recordsExtracted: number;
|
|
11
|
+
recordsCreated: number;
|
|
12
|
+
recordsUpdated: number;
|
|
13
|
+
duplicatesChecked: number;
|
|
14
|
+
cleanedUp: number;
|
|
15
|
+
errors: number;
|
|
16
|
+
}
|
|
17
|
+
export type { SubmitStats } from './crawler-batch-submit';
|
|
18
|
+
export type { ProcessStats } from './crawler-batch-process';
|
|
19
|
+
export interface BatchStatus {
|
|
20
|
+
id: number;
|
|
21
|
+
job_name: string;
|
|
22
|
+
job_type: string;
|
|
23
|
+
status: string;
|
|
24
|
+
model: string;
|
|
25
|
+
request_count: number;
|
|
26
|
+
created_at: string;
|
|
27
|
+
updated_at: string;
|
|
28
|
+
completed_at?: string;
|
|
29
|
+
error_message?: string;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Submit phase - create and submit new batch jobs
|
|
33
|
+
*/
|
|
34
|
+
export declare function submitPhase(app: Application): Promise<SubmitStats>;
|
|
35
|
+
/**
|
|
36
|
+
* Process phase - poll and process completed batch jobs
|
|
37
|
+
*/
|
|
38
|
+
export declare function processPhase(app: Application): Promise<ProcessStats>;
|
|
39
|
+
/**
|
|
40
|
+
* Cleanup phase - remove old completed jobs based on retention policy
|
|
41
|
+
*/
|
|
42
|
+
export declare function cleanupPhase(app: Application): Promise<CleanupStats>;
|
|
43
|
+
/**
|
|
44
|
+
* Get status of all batch jobs
|
|
45
|
+
*/
|
|
46
|
+
export declare function getBatchStatus(app: Application): Promise<BatchStatus[]>;
|
|
47
|
+
/**
|
|
48
|
+
* Run full batch crawl cycle (submit → process → cleanup)
|
|
49
|
+
*/
|
|
50
|
+
export declare function runBatchCrawlCycle(app: Application): Promise<CycleStats>;
|
|
51
|
+
export { getBatchCrawlerState, BatchCrawlerState, reconstructFromDatabase };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { CrawlStats } from
|
|
1
|
+
import { CrawlStats } from './crawler';
|
|
2
2
|
export type CrawlerState = {
|
|
3
3
|
running: boolean;
|
|
4
4
|
lastRunAt?: string;
|
|
@@ -10,3 +10,8 @@ export declare function getCrawlerState(): CrawlerState;
|
|
|
10
10
|
export declare function startRun(): void;
|
|
11
11
|
export declare function finishRun(result: CrawlStats, durationMs: number): void;
|
|
12
12
|
export declare function failRun(error: unknown, durationMs: number): void;
|
|
13
|
+
/**
|
|
14
|
+
* Reset state for testing purposes
|
|
15
|
+
* WARNING: Only use in tests!
|
|
16
|
+
*/
|
|
17
|
+
export declare function resetStateForTesting(): void;
|
package/lib/jobs/crawler.d.ts
CHANGED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Find records needing duplicate check and their potential duplicate candidates
|
|
3
|
+
*/
|
|
4
|
+
import { Application } from '../../declarations';
|
|
5
|
+
/**
|
|
6
|
+
* Find records that need duplicate checking
|
|
7
|
+
* @param app - Feathers application
|
|
8
|
+
* @param limit - Maximum number of records to return (default: 50)
|
|
9
|
+
* @returns Array of records with duplicate_check_status = 'pending'
|
|
10
|
+
*/
|
|
11
|
+
export declare function findRecordsNeedingDuplicateCheck(app: Application, limit?: number): Promise<any[]>;
|
|
12
|
+
/**
|
|
13
|
+
* Calculate Jaccard similarity between two arrays
|
|
14
|
+
* @param arr1 - First array
|
|
15
|
+
* @param arr2 - Second array
|
|
16
|
+
* @returns Similarity score between 0.0 and 1.0
|
|
17
|
+
*/
|
|
18
|
+
export declare function calculateJaccardSimilarity(arr1: string[], arr2: string[]): number;
|
|
19
|
+
/**
|
|
20
|
+
* Find potential duplicate candidates for a given record
|
|
21
|
+
* Uses two-phase filtering: database filter (date + place) + in-memory similarity (tags + people)
|
|
22
|
+
*
|
|
23
|
+
* @param app - Feathers application
|
|
24
|
+
* @param record - The record to find duplicates for
|
|
25
|
+
* @param maxCandidates - Maximum number of candidates to return (default: 20)
|
|
26
|
+
* @param similarityThreshold - Minimum Jaccard similarity threshold (default: 0.5)
|
|
27
|
+
* @returns Array of candidate records that might be duplicates
|
|
28
|
+
*/
|
|
29
|
+
export declare function findDuplicateCandidates(app: Application, record: any, maxCandidates?: number, similarityThreshold?: number): Promise<any[]>;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Duplicate-check module exports
|
|
3
|
+
*/
|
|
4
|
+
export { findRecordsNeedingDuplicateCheck, findDuplicateCandidates, calculateJaccardSimilarity } from './find-candidates';
|
|
5
|
+
export { buildDuplicateCheckPrompt } from './prompt';
|
|
6
|
+
export { mergeRecordsFromGeminiResponse, type MergeResult } from './merge-records';
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Log failed MongoDB backup to file for recovery
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Log a failed MongoDB backup to file
|
|
6
|
+
* Writes in JSONL format (one JSON object per line) for easy recovery
|
|
7
|
+
*
|
|
8
|
+
* @param record - The record that failed to backup
|
|
9
|
+
* @param error - The error that occurred
|
|
10
|
+
* @param metadata - Additional metadata about the deletion
|
|
11
|
+
*/
|
|
12
|
+
export declare function logFailedMongoBackup(record: any, error: Error, metadata: {
|
|
13
|
+
merged_into_id?: number;
|
|
14
|
+
deletion_reason?: string;
|
|
15
|
+
deleted_at: string;
|
|
16
|
+
deleted_by?: string;
|
|
17
|
+
}): void;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Merge duplicate records and handle deletion with MongoDB backup
|
|
3
|
+
*/
|
|
4
|
+
import { Application } from '../../declarations';
|
|
5
|
+
export interface MergeResult {
|
|
6
|
+
mergedRecordId: number;
|
|
7
|
+
deletedRecordIds: number[];
|
|
8
|
+
confidence: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Merge duplicate records into a single record
|
|
12
|
+
* Creates a new merged record and deletes the duplicates (with MongoDB backup via hook)
|
|
13
|
+
*
|
|
14
|
+
* @param app - Feathers application
|
|
15
|
+
* @param duplicateIds - Array of record IDs to merge
|
|
16
|
+
* @param mergedRecordData - Data for the merged record
|
|
17
|
+
* @param confidence - AI confidence score
|
|
18
|
+
* @returns MergeResult with merged record ID and deleted IDs
|
|
19
|
+
*/
|
|
20
|
+
export declare function mergeRecordsFromGeminiResponse(app: Application, duplicateIds: number[], mergedRecordData: any, confidence: number): Promise<MergeResult>;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build prompts for Gemini API to detect and merge duplicate records
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Build a duplicate-check prompt for Gemini API
|
|
6
|
+
* Asks the AI to identify which candidates are duplicates and provide a merged record
|
|
7
|
+
*
|
|
8
|
+
* @param newRecord - The record being checked for duplicates
|
|
9
|
+
* @param candidates - Array of potential duplicate records
|
|
10
|
+
* @returns Prompt string for Gemini API
|
|
11
|
+
*/
|
|
12
|
+
export declare function buildDuplicateCheckPrompt(newRecord: any, candidates: any[]): string;
|