harvester_sdk 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/types.js ADDED
@@ -0,0 +1,140 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getAllowedEntitiesForPlatform = exports.zodDataSchema = exports.zodGeoSelectionSchema = exports.zodGeoSchema = exports.zodSourceSchema = exports.zodSourceGroupSchema = exports.zodRegionSchema = exports.generalStatusList = exports.zodSourceStatusList = exports.platformEntityMap = exports.entityTypesList = exports.platformsList = void 0;
4
+ const zod_1 = require("zod");
5
+ exports.platformsList = [
6
+ 'telegram',
7
+ 'facebook',
8
+ 'instagram',
9
+ 'tiktok',
10
+ 'website',
11
+ ];
12
+ exports.entityTypesList = ['profile', 'group', 'page', 'channel', 'hashtag', 'website'];
13
+ // Platform-specific entity mappings
14
+ exports.platformEntityMap = {
15
+ telegram: ['channel', 'group'],
16
+ facebook: ['profile', 'page', 'group'],
17
+ instagram: ['profile', 'hashtag'],
18
+ tiktok: ['profile', 'hashtag'],
19
+ website: ['website'],
20
+ };
21
+ exports.zodSourceStatusList = [
22
+ 'active',
23
+ 'pending',
24
+ 'inactive',
25
+ 'requested', // sources that are under review
26
+ ];
27
+ exports.generalStatusList = ['active', 'inactive'];
28
+ exports.zodRegionSchema = zod_1.z.object({
29
+ _id: zod_1.z.string().optional(),
30
+ name: zod_1.z.string(),
31
+ slug: zod_1.z.string(),
32
+ legend: zod_1.z.string().optional(),
33
+ created_at: zod_1.z.number().optional(),
34
+ updated_at: zod_1.z.number().optional(), // last update date
35
+ });
36
+ exports.zodSourceGroupSchema = zod_1.z.object({
37
+ _id: zod_1.z.string().optional(),
38
+ name: zod_1.z.string(),
39
+ description: zod_1.z.string().optional(),
40
+ platform: zod_1.z.enum(exports.platformsList),
41
+ region_id: zod_1.z.string().optional(),
42
+ max_active_sources: zod_1.z.number().optional(),
43
+ created_at: zod_1.z.number().optional(),
44
+ updated_at: zod_1.z.number().optional(), // last update date
45
+ });
46
+ exports.zodSourceSchema = zod_1.z
47
+ .object({
48
+ _id: zod_1.z.string().optional(),
49
+ title: zod_1.z.string().optional(),
50
+ platform: zod_1.z.enum(exports.platformsList),
51
+ entity: zod_1.z.enum(exports.entityTypesList),
52
+ public_id: zod_1.z.string().or(zod_1.z.number()).optional(),
53
+ name: zod_1.z.string(),
54
+ url: zod_1.z.string().url().optional(),
55
+ description: zod_1.z.string().optional(),
56
+ language: zod_1.z.string().optional(),
57
+ tags: zod_1.z.array(zod_1.z.string()).optional(),
58
+ status: zod_1.z.enum(exports.zodSourceStatusList),
59
+ is_public: zod_1.z.boolean().default(true),
60
+ metadata: zod_1.z.record(zod_1.z.string(), zod_1.z.any()).optional(),
61
+ created_at: zod_1.z.number().optional(),
62
+ updated_at: zod_1.z.number().optional(),
63
+ region_id: zod_1.z.string().optional(),
64
+ group_id: zod_1.z.string().optional(),
65
+ requested_by: zod_1.z
66
+ .object({
67
+ user_id: zod_1.z.string().optional(),
68
+ username: zod_1.z.string().optional(),
69
+ full_name: zod_1.z.string().optional(),
70
+ email: zod_1.z.string().optional(),
71
+ requested_at: zod_1.z.number().optional(),
72
+ })
73
+ .optional(),
74
+ notes: zod_1.z.string().optional(), // internal notes about the source
75
+ })
76
+ .refine((data) => {
77
+ const allowedEntities = exports.platformEntityMap[data.platform];
78
+ return allowedEntities.includes(data.entity);
79
+ }, (data) => ({
80
+ message: `Entity "${data.entity}" is not valid for platform "${data.platform}". Allowed entities: ${exports.platformEntityMap[data.platform].join(', ')}`,
81
+ path: ['entity'],
82
+ }));
83
+ exports.zodGeoSchema = zod_1.z.object({
84
+ _id: zod_1.z.string().optional(),
85
+ geo_text: zod_1.z.string().optional(),
86
+ timestamp: zod_1.z.number().optional(),
87
+ count: zod_1.z.number().optional(),
88
+ region: zod_1.z.string().optional(),
89
+ subscribers: zod_1.z.array(zod_1.z.string()).optional(),
90
+ is_used: zod_1.z.boolean().optional().default(false), // true if this geo is used in any source
91
+ });
92
+ exports.zodGeoSelectionSchema = zod_1.z.object({
93
+ _id: zod_1.z.string().optional(),
94
+ title: zod_1.z.string().optional(),
95
+ description: zod_1.z.string().optional(),
96
+ type: zod_1.z.string().optional(),
97
+ geos: zod_1.z.array(zod_1.z.string()).optional(),
98
+ region: zod_1.z.string().optional(),
99
+ created_at: zod_1.z.number().optional(),
100
+ updated_at: zod_1.z.number().optional(), // last update date
101
+ });
102
+ exports.zodDataSchema = zod_1.z.object({
103
+ // Filterable fields
104
+ text_geo: zod_1.z.array(zod_1.z.string()),
105
+ timestamp: zod_1.z.number(),
106
+ platform: zod_1.z.enum(exports.platformsList),
107
+ source_region: zod_1.z.string().optional(),
108
+ _id: zod_1.z.string().optional(),
109
+ source_id: zod_1.z.string().or(zod_1.z.number()),
110
+ source_public_id: zod_1.z.string().or(zod_1.z.number()),
111
+ source_name: zod_1.z.string(),
112
+ platform_id: zod_1.z.string().or(zod_1.z.number()),
113
+ original_text_id: zod_1.z.string().or(zod_1.z.number()),
114
+ original_text: zod_1.z.string().optional(),
115
+ translated_text: zod_1.z.string().optional(),
116
+ language: zod_1.z.string().optional(),
117
+ is_reply: zod_1.z.boolean().optional(),
118
+ reply_to_message_id: zod_1.z.string().or(zod_1.z.number()).optional(),
119
+ metadata: zod_1.z.record(zod_1.z.string(), zod_1.z.any()).optional(),
120
+ created_at: zod_1.z.number(),
121
+ updated_at: zod_1.z.number(),
122
+ media: zod_1.z
123
+ .array(zod_1.z.object({
124
+ type: zod_1.z.enum(['image', 'video', 'audio', 'link']),
125
+ url: zod_1.z.string().url(),
126
+ caption: zod_1.z.string().optional(),
127
+ }))
128
+ .optional(),
129
+ author: zod_1.z.string().optional(),
130
+ replies: zod_1.z.any().optional(),
131
+ entities: zod_1.z.any().optional(),
132
+ author_username: zod_1.z.string().optional(),
133
+ author_id: zod_1.z.string().optional(),
134
+ group_id: zod_1.z.string().optional(), // e.g., 'default_pipeline'
135
+ });
136
+ // Helper function to get allowed entities for a platform
137
+ const getAllowedEntitiesForPlatform = (platform) => {
138
+ return exports.platformEntityMap[platform];
139
+ };
140
+ exports.getAllowedEntitiesForPlatform = getAllowedEntitiesForPlatform;
package/index.ts ADDED
@@ -0,0 +1,306 @@
1
+ // Export types for consumers
2
+ export * from './types';
3
+
4
+ import { Schema } from 'mongoose';
5
+ import {
6
+ generalStatusList,
7
+ platformsList,
8
+ entityTypesList,
9
+ zodSourceStatusList
10
+ } from './types';
11
+
12
+ export const MongoDataSchema = new Schema(
13
+ {
14
+ source_id: { type: Schema.Types.Mixed, required: true }, // reference to Source _id
15
+ source_public_id: { type: Schema.Types.Mixed }, // e.g., 'telegram:1234567890' (message_id)
16
+ source_name: { type: String, required: true }, // e.g., 'Telegram Channel Name'
17
+ platform_id: { type: Schema.Types.Mixed }, // channel_id reference to Source source_id
18
+ platform: {
19
+ type: String,
20
+ enum: platformsList,
21
+ required: true,
22
+ }, // e.g., 'telegram', 'facebook'
23
+ original_text_id: { type: Schema.Types.Mixed }, // message_id
24
+ original_text: { type: String }, // original text content
25
+ translated_text: { type: String }, // translated text if available
26
+ timestamp: { type: Number, required: true }, // date in milliseconds
27
+ language: { type: String },
28
+ text_geo: { type: [String], default: [] }, // e.g., ["sinjil", "ramallah", "west bank"]
29
+ source_geo: { type: String }, // e.g., 'hebron'
30
+ is_reply: { type: Boolean }, // true if this text is a reply to another text
31
+ reply_to_message_id: { type: Schema.Types.Mixed },
32
+ metadata: { type: Object }, // platform-specific fields
33
+ created_at: { type: Number, default: Date.now },
34
+ updated_at: { type: Number, default: Date.now },
35
+ media: {
36
+ type: [
37
+ {
38
+ type: { type: String, enum: ['image', 'video', 'audio', 'link'] },
39
+ url: { type: String },
40
+ caption: { type: String },
41
+ },
42
+ ],
43
+ }, // media attachments
44
+ author: { type: String }, // e.g., author name or ID
45
+ replies: { type: Schema.Types.Mixed }, // array of reply texts or IDs
46
+ entities: { type: Schema.Types.Mixed }, // array of entities
47
+ author_username: { type: String }, // e.g., author username
48
+ author_id: { type: String }, // e.g., author ID
49
+ pipeline_name: { type: String }, // e.g., 'default_pipeline'
50
+ source_region: { type: String }, // e.g., "New York", "California"
51
+ },
52
+ {
53
+ versionKey: false,
54
+ toJSON: { virtuals: true },
55
+ toObject: { virtuals: true },
56
+ }
57
+ );
58
+
59
+ export const MongoInstructionsSchema = new Schema(
60
+ {
61
+ user_id: { type: String, required: true },
62
+ prompt: { type: String },
63
+ created_at: { type: Number, default: Date.now },
64
+ updated_at: { type: Number, default: Date.now },
65
+ },
66
+ {
67
+ versionKey: false,
68
+ toJSON: { virtuals: true },
69
+ toObject: { virtuals: true },
70
+ }
71
+ );
72
+
73
+ export const MongoRegionSchema = new Schema(
74
+ {
75
+ name: { type: String, required: true }, // e.g., "New York"
76
+ slug: { type: String, required: true }, // e.g., "new-york"
77
+ legend: { type: String }, // e.g., legend for maps
78
+ created_at: { type: Number, default: Date.now }, // creation date
79
+ updated_at: { type: Number, default: Date.now }, // last update date
80
+ },
81
+ {
82
+ versionKey: false,
83
+ toJSON: { virtuals: true },
84
+ toObject: { virtuals: true },
85
+ }
86
+ );
87
+
88
+ export const MongoSourceGroupSchema = new Schema(
89
+ {
90
+ name: { type: String, required: true }, // e.g., "Group Name"
91
+ description: { type: String }, // e.g., "Description of the group"
92
+ platform: {
93
+ type: String,
94
+ enum: platformsList,
95
+ required: true,
96
+ },
97
+ region_id: { type: String }, // e.g., region ID
98
+ max_active_sources: { type: Number }, // e.g., maximum number of active sources allowed
99
+ created_at: { type: Number, default: Date.now }, // creation date
100
+ updated_at: { type: Number, default: Date.now }, // last update date
101
+ },
102
+ {
103
+ versionKey: false,
104
+ toJSON: { virtuals: true },
105
+ toObject: { virtuals: true },
106
+ }
107
+ );
108
+
109
+ export const MongoSourceSchema = new Schema(
110
+ {
111
+ title: { type: String }, // e.g., 'Telegram Channel Name'
112
+ platform: {
113
+ type: String,
114
+ enum: platformsList,
115
+ required: true,
116
+ },
117
+ entity: {
118
+ type: String,
119
+ enum: entityTypesList,
120
+ required: true,
121
+ }, // e.g., 'channel', 'group', 'page', 'profile', 'hashtag'
122
+ public_id: { type: Schema.Types.Mixed }, // e.g., '@telegram_channel_id'
123
+ name: { type: String, required: true }, // e.g., 'Telegram Channel Name'
124
+ url: { type: String }, // e.g., 'https://t.me/telegram_channel_name'
125
+ description: { type: String }, // e.g., 'A channel about news and updates'
126
+ language: { type: String },
127
+ tags: { type: [String], default: [] }, // array of tags
128
+ status: {
129
+ type: String,
130
+ enum: zodSourceStatusList,
131
+ default: 'pending',
132
+ }, // INDEX
133
+ is_public: { type: Boolean, default: true }, // true if the source is public
134
+ metadata: { type: Object, default: {} }, // flexible per platform
135
+ created_at: { type: Number, default: Date.now },
136
+ updated_at: { type: Number, default: Date.now },
137
+ region_id: { type: String }, // e.g., region ID
138
+ group_id: { type: String }, // e.g., source group ID
139
+ requested_by: {
140
+ type: {
141
+ user_id: { type: String },
142
+ username: { type: String },
143
+ full_name: { type: String },
144
+ email: { type: String },
145
+ requested_at: { type: Number },
146
+ },
147
+ default: undefined,
148
+ }, // user who requested this source
149
+ notes: { type: String }, // internal notes about the source
150
+ },
151
+ {
152
+ versionKey: false,
153
+ toJSON: { virtuals: true },
154
+ toObject: { virtuals: true },
155
+ }
156
+ );
157
+
158
+ export const MongoQuerySchema = new Schema(
159
+ {
160
+ title: { type: String }, // e.g., 'Search Query'
161
+ query: { type: String }, // search query includes geo information where it should be extracted with LLM
162
+ sources: { type: [String] }, // array of source _id strings
163
+ geos: { type: [String], required: true }, // array of geo strings
164
+ geos_ids: { type: [String], required: true }, // array of geo _id strings
165
+ time_range: {
166
+ type: Schema.Types.Mixed, // can be relative or absolute time range
167
+ required: true,
168
+ },
169
+ user_id: { type: String, required: true }, // user identifier
170
+ user_instructions: { type: String }, // optional user instructions for the query
171
+ user_time_zone: { type: String }, // user's timezone
172
+ timestamp: { type: Number, default: Date.now }, // creation date
173
+ },
174
+ {
175
+ versionKey: false,
176
+ toJSON: { virtuals: true },
177
+ toObject: { virtuals: true },
178
+ }
179
+ );
180
+
181
+ export const MongoUserSettingsSchema = new Schema(
182
+ {
183
+ sources: { type: [String] }, // array of source _id strings
184
+ geos: { type: [String], default: [] }, // array of geo strings
185
+ time_range: {
186
+ type: Schema.Types.Mixed,
187
+ },
188
+ user_id: { type: String, required: true },
189
+ thread_id: { type: String },
190
+ instructions: { type: String }, // optional user instructions for the query
191
+ has_jobs_access: { type: Boolean, default: false }, // true if the user has access to jobs
192
+ active_jobs_limit: { type: Number, default: 5 }, // maximum number of active jobs allowed
193
+ created_at: { type: Date, default: Date.now }, // creation date
194
+ updated_at: { type: Date, default: Date.now }, // last update date
195
+ status: {
196
+ type: String,
197
+ enum: generalStatusList,
198
+ default: 'active',
199
+ }, // status of the user
200
+ },
201
+ {
202
+ versionKey: false,
203
+ toJSON: { virtuals: true },
204
+ toObject: { virtuals: true },
205
+ }
206
+ );
207
+
208
+ export const MongoConversationSchema = new Schema(
209
+ {
210
+ user_id: { type: String, required: true }, // user identifier
211
+ title: { type: String }, // optional title for the conversation
212
+ thread_id: { type: String }, // thread ID for the conversation
213
+ status: {
214
+ type: String,
215
+ enum: generalStatusList,
216
+ default: 'active',
217
+ }, // status of the conversation
218
+ created_at: { type: Date, default: Date.now }, // creation date
219
+ updated_at: { type: Date, default: Date.now }, // last update date
220
+ is_job: { type: Boolean, default: false }, // true if this is a job conversation
221
+ },
222
+ {
223
+ versionKey: false,
224
+ toJSON: { virtuals: true },
225
+ toObject: { virtuals: true },
226
+ }
227
+ );
228
+
229
+ export const MongoJobSchema = new Schema(
230
+ {
231
+ user_id: { type: String, required: true }, // user identifier
232
+ job_name: { type: String, required: true }, // name of the automation job
233
+ job_description: { type: String }, // description of the job
234
+ status: {
235
+ type: String,
236
+ enum: generalStatusList,
237
+ default: 'active',
238
+ }, // status of the job
239
+ created_at: { type: Number, default: Date.now }, // creation date
240
+ updated_at: { type: Number, default: Date.now }, // last update date
241
+ schedule: { type: String }, // cron schedule for the job
242
+ schedule_text: { type: String }, // cron schedule in free-text format
243
+ active_until: { type: Number || undefined }, // optional date until which the job is active
244
+ query: { type: Schema.Types.Mixed, required: true }, // query associated with the job
245
+ conversation_id: { type: String }, // optional conversation ID for the job
246
+ thread_id: { type: String }, // thread ID for the job
247
+ time_zone: { type: String }, // optional timezone for the job
248
+ },
249
+ {
250
+ versionKey: false,
251
+ toJSON: { virtuals: true },
252
+ toObject: { virtuals: true },
253
+ }
254
+ );
255
+
256
+ export const MongoGeoSchema = new Schema(
257
+ {
258
+ geo_text: { type: String }, // e.g., "New York, USA"
259
+ timestamp: { type: Number }, // e.g., 1751210833000
260
+ count: { type: Number }, // e.g., 42
261
+ region: { type: String }, // e.g., "New York", "California"
262
+ subscribers: { type: [String], default: [] }, // array of user IDs who subscribed to this geo
263
+ is_used: { type: Boolean, default: false }, // true if this geo is used in any source
264
+ },
265
+ {
266
+ versionKey: false,
267
+ toJSON: { virtuals: true },
268
+ toObject: { virtuals: true },
269
+ }
270
+ );
271
+
272
+ export const MongoGeoSelectionSchema = new Schema(
273
+ {
274
+ title: { type: String }, // e.g., "New York, USA"
275
+ description: { type: String }, // e.g., "A city in the USA"
276
+ type: { type: String }, // e.g., "city", "region", "country"
277
+ geos: { type: [String], default: [] }, // e.g., ["New York", "Los Angeles"]
278
+ region: { type: String }, // e.g., "New York", "California"
279
+ created_at: { type: Number }, // creation date
280
+ updated_at: { type: Number }, // last update date
281
+ },
282
+ {
283
+ versionKey: false,
284
+ toJSON: { virtuals: true },
285
+ toObject: { virtuals: true },
286
+ }
287
+ );
288
+
289
+ export const MongoApiKeySchema = new Schema(
290
+ {
291
+ user_id: { type: String, required: true }, // user identifier
292
+ api_key: { type: String, required: true }, // API key
293
+ created_at: { type: Number, default: Date.now }, // creation date
294
+ updated_at: { type: Number, default: Date.now }, // last update date
295
+ status: {
296
+ type: String,
297
+ enum: generalStatusList,
298
+ default: 'active',
299
+ }, // status of the API key
300
+ },
301
+ {
302
+ versionKey: false,
303
+ toJSON: { virtuals: true },
304
+ toObject: { virtuals: true },
305
+ }
306
+ );
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "harvester_sdk",
3
+ "version": "1.0.1",
4
+ "description": "SDK for interacting with the Harvester API",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "repository": {
8
+ "url": "git+https://github.com/badgerdefense/harvester.sdk.git",
9
+ "type": "git"
10
+ },
11
+ "scripts": {
12
+ "build": "tsc",
13
+ "test": "jest",
14
+ "prepublish": "npm run build"
15
+ },
16
+ "keywords": [
17
+ "asfur",
18
+ "sdk",
19
+ "api"
20
+ ],
21
+ "author": "deerland",
22
+ "license": "MIT",
23
+ "devDependencies": {
24
+ "@types/jest": "^29.5.0",
25
+ "jest": "^29.5.0",
26
+ "typescript": "^4.9.5"
27
+ },
28
+ "dependencies": {
29
+ "axios": "^1.9.0",
30
+ "mongoose": "^8.16.4",
31
+ "zod": "^3.25.76"
32
+ }
33
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "es2018",
4
+ "module": "commonjs",
5
+ "declaration": true,
6
+ "outDir": "./dist",
7
+ "strict": true,
8
+ "esModuleInterop": true,
9
+ "skipLibCheck": true,
10
+ "forceConsistentCasingInFileNames": true
11
+ },
12
+ "include": ["index.ts", "types.ts"],
13
+ "exclude": ["node_modules", "dist"]
14
+ }
package/types.ts ADDED
@@ -0,0 +1,177 @@
1
+ import { object, pipeline, z } from 'zod';
2
+
3
+ export const platformsList = [
4
+ 'telegram',
5
+ 'facebook',
6
+ 'instagram',
7
+ 'tiktok',
8
+ 'website',
9
+ ] as const;
10
+
11
+ export const entityTypesList = ['profile', 'group', 'page', 'channel', 'hashtag', 'website'] as const;
12
+
13
+ // Platform-specific entity mappings
14
+ export const platformEntityMap = {
15
+ telegram: ['channel', 'group'],
16
+ facebook: ['profile', 'page', 'group'],
17
+ instagram: ['profile', 'hashtag'],
18
+ tiktok: ['profile', 'hashtag'],
19
+ website: ['website'],
20
+ } as const;
21
+
22
+ export const zodSourceStatusList = [
23
+ 'active', // active and approved sources
24
+ 'pending', // sources that are pending approval
25
+ 'inactive', // sources that are inactive
26
+ 'requested', // sources that are under review
27
+ ] as const;
28
+
29
+ export const generalStatusList = ['active', 'inactive'] as const;
30
+
31
+ export const zodRegionSchema = z.object({
32
+ _id: z.string().optional(),
33
+ name: z.string(), // e.g., "New York"
34
+ slug: z.string(), // e.g., "new-york"
35
+ legend: z.string().optional(), // e.g., legend for maps
36
+ created_at: z.number().optional(), // creation date
37
+ updated_at: z.number().optional(), // last update date
38
+ });
39
+
40
+ export const zodSourceGroupSchema = z.object({
41
+ _id: z.string().optional(),
42
+ name: z.string(), // e.g., "Group Name"
43
+ description: z.string().optional(), // e.g., "Description of the group"
44
+ platform: z.enum(platformsList), // e.g., array of resource IDs
45
+ region_id: z.string().optional(), // e.g., region ID
46
+ max_active_sources: z.number().optional(), // e.g., maximum number of active sources allowed
47
+ created_at: z.number().optional(), // creation date
48
+ updated_at: z.number().optional(), // last update date
49
+ });
50
+
51
+ export const zodSourceSchema = z
52
+ .object({
53
+ _id: z.string().optional(),
54
+ title: z.string().optional(), // e.g., 'Telegram Channel Name'
55
+ platform: z.enum(platformsList), // e.g., 'telegram', 'facebook'
56
+ entity: z.enum(entityTypesList), // e.g., 'channel', 'group', 'page', 'profile', 'hashtag'
57
+ public_id: z.string().or(z.number()).optional(), // e.g., '@telegram_channel_id'
58
+ name: z.string(), // e.g., 'Telegram Channel Name'
59
+ url: z.string().url().optional(), // e.g., 'https://t.me/telegram_channel_name'
60
+ description: z.string().optional(), // e.g., 'A channel about news and updates'
61
+ language: z.string().optional(),
62
+ tags: z.array(z.string()).optional(),
63
+ status: z.enum(zodSourceStatusList), // INDEX
64
+ is_public: z.boolean().default(true), // true if the source is public
65
+ metadata: z.record(z.string(), z.any()).optional(), // flexible per platform
66
+ created_at: z.number().optional(),
67
+ updated_at: z.number().optional(),
68
+ region_id: z.string().optional(), // e.g., region ID
69
+ group_id: z.string().optional(), // e.g., source group ID
70
+ requested_by: z
71
+ .object({
72
+ user_id: z.string().optional(),
73
+ username: z.string().optional(),
74
+ full_name: z.string().optional(),
75
+ email: z.string().optional(),
76
+ requested_at: z.number().optional(),
77
+ })
78
+ .optional(), // user who requested this source
79
+ notes: z.string().optional(), // internal notes about the source
80
+ })
81
+ .refine(
82
+ (data) => {
83
+ const allowedEntities = platformEntityMap[data.platform] as readonly string[];
84
+ return allowedEntities.includes(data.entity);
85
+ },
86
+ (data) => ({
87
+ message: `Entity "${data.entity}" is not valid for platform "${data.platform}". Allowed entities: ${platformEntityMap[data.platform].join(', ')}`,
88
+ path: ['entity'],
89
+ })
90
+ );
91
+
92
+ export const zodGeoSchema = z.object({
93
+ _id: z.string().optional(),
94
+ geo_text: z.string().optional(), // e.g., "New York, USA"
95
+ timestamp: z.number().optional(), // e.g., 1751210833000
96
+ count: z.number().optional(), // e.g., 42
97
+ region: z.string().optional(), // e.g., "New York", "California"
98
+ subscribers: z.array(z.string()).optional(), // array of user IDs who subscribed to this geo
99
+ is_used: z.boolean().optional().default(false), // true if this geo is used in any source
100
+ });
101
+
102
+ export const zodGeoSelectionSchema = z.object({
103
+ _id: z.string().optional(),
104
+ title: z.string().optional(), // e.g., "New York, USA"
105
+ description: z.string().optional(), // e.g., "A city in the USA"
106
+ type: z.string().optional(), // e.g., "city", "region", "country"
107
+ geos: z.array(z.string()).optional(), // e.g., ["New York", "Los Angeles"]
108
+ region: z.string().optional(), // e.g., "New York", "California"
109
+ created_at: z.number().optional(), // creation date
110
+ updated_at: z.number().optional(), // last update date
111
+ });
112
+
113
+ export const zodDataSchema = z.object({
114
+ // Filterable fields
115
+ text_geo: z.array(z.string()), // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
116
+ timestamp: z.number(), // INDEX - date in milliseconds - e.g., 1751210833000
117
+ platform: z.enum(platformsList), // e.g., 'telegram', 'facebook'
118
+ source_region: z.string().optional(), // e.g., 'hebron'
119
+ _id: z.string().optional(),
120
+ source_id: z.string().or(z.number()), // INDEX - reference to Source _id (e.g., '60c72b2f9b1e8d3f4c8b4567')
121
+ source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
122
+ source_name: z.string(), // e.g., 'Telegram Channel Name'
123
+ platform_id: z.string().or(z.number()), // channel_id reference to Source source_id
124
+ original_text_id: z.string().or(z.number()), // message_id
125
+ original_text: z.string().optional(), // original text content
126
+ translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
127
+ language: z.string().optional(),
128
+ is_reply: z.boolean().optional(), // true if this text is a reply to another text
129
+ reply_to_message_id: z.string().or(z.number()).optional(),
130
+ metadata: z.record(z.string(), z.any()).optional(), // platform-specific fields
131
+ created_at: z.number(),
132
+ updated_at: z.number(),
133
+ media: z
134
+ .array(
135
+ z.object({
136
+ type: z.enum(['image', 'video', 'audio', 'link']),
137
+ url: z.string().url(),
138
+ caption: z.string().optional(),
139
+ })
140
+ )
141
+ .optional(), // media attachments
142
+ author: z.string().optional(), // e.g., author name or ID
143
+ replies: z.any().optional(), // array of reply texts or IDs
144
+ entities: z.any().optional(), // array of reply texts or IDs
145
+ author_username: z.string().optional(), // e.g., author username
146
+ author_id: z.string().optional(), // e.g., author ID
147
+ group_id: z.string().optional(), // e.g., 'default_pipeline'
148
+ });
149
+
150
+ /**
151
+ * 'approved' - active and approved sources,
152
+ * 'back_to_business' - sources that were paused and now resumed,
153
+ * 'pending' - sources that are pending approval,
154
+ * 'inactive' - sources that are inactive,
155
+ * 'in_review' - sources that are under review
156
+ */
157
+ export type RegionType = z.infer<typeof zodRegionSchema>;
158
+ export type SourceGroupType = z.infer<typeof zodSourceGroupSchema>;
159
+ export type SourceType = z.infer<typeof zodSourceSchema>;
160
+ export type DataType = z.infer<typeof zodDataSchema>;
161
+ export type StatusType = (typeof zodSourceStatusList)[number];
162
+ export type TimeRangeTypeLiteral = 'relative' | 'absolute';
163
+ export type AddSourceToReviewType = Pick<
164
+ SourceType,
165
+ 'platform' | 'url' | 'description'
166
+ > & Partial<Pick<SourceType, 'public_id'>>;
167
+
168
+ export type GeoType = z.infer<typeof zodGeoSchema>;
169
+ export type GeoSelectionType = z.infer<typeof zodGeoSelectionSchema>;
170
+
171
+ // Helper type to get allowed entities for a specific platform
172
+ export type PlatformEntityType<T extends typeof platformsList[number]> = typeof platformEntityMap[T][number];
173
+
174
+ // Helper function to get allowed entities for a platform
175
+ export const getAllowedEntitiesForPlatform = (platform: typeof platformsList[number]) => {
176
+ return platformEntityMap[platform];
177
+ };