harvester_sdk 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/dist/index.d.ts +627 -0
- package/dist/index.js +269 -0
- package/dist/types.d.ts +375 -0
- package/dist/types.js +140 -0
- package/index.ts +306 -0
- package/package.json +33 -0
- package/tsconfig.json +14 -0
- package/types.ts +177 -0
package/dist/types.js
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.getAllowedEntitiesForPlatform = exports.zodDataSchema = exports.zodGeoSelectionSchema = exports.zodGeoSchema = exports.zodSourceSchema = exports.zodSourceGroupSchema = exports.zodRegionSchema = exports.generalStatusList = exports.zodSourceStatusList = exports.platformEntityMap = exports.entityTypesList = exports.platformsList = void 0;
|
|
4
|
+
const zod_1 = require("zod");
|
|
5
|
+
exports.platformsList = [
|
|
6
|
+
'telegram',
|
|
7
|
+
'facebook',
|
|
8
|
+
'instagram',
|
|
9
|
+
'tiktok',
|
|
10
|
+
'website',
|
|
11
|
+
];
|
|
12
|
+
exports.entityTypesList = ['profile', 'group', 'page', 'channel', 'hashtag', 'website'];
|
|
13
|
+
// Platform-specific entity mappings
|
|
14
|
+
exports.platformEntityMap = {
|
|
15
|
+
telegram: ['channel', 'group'],
|
|
16
|
+
facebook: ['profile', 'page', 'group'],
|
|
17
|
+
instagram: ['profile', 'hashtag'],
|
|
18
|
+
tiktok: ['profile', 'hashtag'],
|
|
19
|
+
website: ['website'],
|
|
20
|
+
};
|
|
21
|
+
exports.zodSourceStatusList = [
|
|
22
|
+
'active',
|
|
23
|
+
'pending',
|
|
24
|
+
'inactive',
|
|
25
|
+
'requested', // sources that are under review
|
|
26
|
+
];
|
|
27
|
+
exports.generalStatusList = ['active', 'inactive'];
|
|
28
|
+
exports.zodRegionSchema = zod_1.z.object({
|
|
29
|
+
_id: zod_1.z.string().optional(),
|
|
30
|
+
name: zod_1.z.string(),
|
|
31
|
+
slug: zod_1.z.string(),
|
|
32
|
+
legend: zod_1.z.string().optional(),
|
|
33
|
+
created_at: zod_1.z.number().optional(),
|
|
34
|
+
updated_at: zod_1.z.number().optional(), // last update date
|
|
35
|
+
});
|
|
36
|
+
exports.zodSourceGroupSchema = zod_1.z.object({
|
|
37
|
+
_id: zod_1.z.string().optional(),
|
|
38
|
+
name: zod_1.z.string(),
|
|
39
|
+
description: zod_1.z.string().optional(),
|
|
40
|
+
platform: zod_1.z.enum(exports.platformsList),
|
|
41
|
+
region_id: zod_1.z.string().optional(),
|
|
42
|
+
max_active_sources: zod_1.z.number().optional(),
|
|
43
|
+
created_at: zod_1.z.number().optional(),
|
|
44
|
+
updated_at: zod_1.z.number().optional(), // last update date
|
|
45
|
+
});
|
|
46
|
+
exports.zodSourceSchema = zod_1.z
|
|
47
|
+
.object({
|
|
48
|
+
_id: zod_1.z.string().optional(),
|
|
49
|
+
title: zod_1.z.string().optional(),
|
|
50
|
+
platform: zod_1.z.enum(exports.platformsList),
|
|
51
|
+
entity: zod_1.z.enum(exports.entityTypesList),
|
|
52
|
+
public_id: zod_1.z.string().or(zod_1.z.number()).optional(),
|
|
53
|
+
name: zod_1.z.string(),
|
|
54
|
+
url: zod_1.z.string().url().optional(),
|
|
55
|
+
description: zod_1.z.string().optional(),
|
|
56
|
+
language: zod_1.z.string().optional(),
|
|
57
|
+
tags: zod_1.z.array(zod_1.z.string()).optional(),
|
|
58
|
+
status: zod_1.z.enum(exports.zodSourceStatusList),
|
|
59
|
+
is_public: zod_1.z.boolean().default(true),
|
|
60
|
+
metadata: zod_1.z.record(zod_1.z.string(), zod_1.z.any()).optional(),
|
|
61
|
+
created_at: zod_1.z.number().optional(),
|
|
62
|
+
updated_at: zod_1.z.number().optional(),
|
|
63
|
+
region_id: zod_1.z.string().optional(),
|
|
64
|
+
group_id: zod_1.z.string().optional(),
|
|
65
|
+
requested_by: zod_1.z
|
|
66
|
+
.object({
|
|
67
|
+
user_id: zod_1.z.string().optional(),
|
|
68
|
+
username: zod_1.z.string().optional(),
|
|
69
|
+
full_name: zod_1.z.string().optional(),
|
|
70
|
+
email: zod_1.z.string().optional(),
|
|
71
|
+
requested_at: zod_1.z.number().optional(),
|
|
72
|
+
})
|
|
73
|
+
.optional(),
|
|
74
|
+
notes: zod_1.z.string().optional(), // internal notes about the source
|
|
75
|
+
})
|
|
76
|
+
.refine((data) => {
|
|
77
|
+
const allowedEntities = exports.platformEntityMap[data.platform];
|
|
78
|
+
return allowedEntities.includes(data.entity);
|
|
79
|
+
}, (data) => ({
|
|
80
|
+
message: `Entity "${data.entity}" is not valid for platform "${data.platform}". Allowed entities: ${exports.platformEntityMap[data.platform].join(', ')}`,
|
|
81
|
+
path: ['entity'],
|
|
82
|
+
}));
|
|
83
|
+
exports.zodGeoSchema = zod_1.z.object({
|
|
84
|
+
_id: zod_1.z.string().optional(),
|
|
85
|
+
geo_text: zod_1.z.string().optional(),
|
|
86
|
+
timestamp: zod_1.z.number().optional(),
|
|
87
|
+
count: zod_1.z.number().optional(),
|
|
88
|
+
region: zod_1.z.string().optional(),
|
|
89
|
+
subscribers: zod_1.z.array(zod_1.z.string()).optional(),
|
|
90
|
+
is_used: zod_1.z.boolean().optional().default(false), // true if this geo is used in any source
|
|
91
|
+
});
|
|
92
|
+
exports.zodGeoSelectionSchema = zod_1.z.object({
|
|
93
|
+
_id: zod_1.z.string().optional(),
|
|
94
|
+
title: zod_1.z.string().optional(),
|
|
95
|
+
description: zod_1.z.string().optional(),
|
|
96
|
+
type: zod_1.z.string().optional(),
|
|
97
|
+
geos: zod_1.z.array(zod_1.z.string()).optional(),
|
|
98
|
+
region: zod_1.z.string().optional(),
|
|
99
|
+
created_at: zod_1.z.number().optional(),
|
|
100
|
+
updated_at: zod_1.z.number().optional(), // last update date
|
|
101
|
+
});
|
|
102
|
+
exports.zodDataSchema = zod_1.z.object({
|
|
103
|
+
// Filterable fields
|
|
104
|
+
text_geo: zod_1.z.array(zod_1.z.string()),
|
|
105
|
+
timestamp: zod_1.z.number(),
|
|
106
|
+
platform: zod_1.z.enum(exports.platformsList),
|
|
107
|
+
source_region: zod_1.z.string().optional(),
|
|
108
|
+
_id: zod_1.z.string().optional(),
|
|
109
|
+
source_id: zod_1.z.string().or(zod_1.z.number()),
|
|
110
|
+
source_public_id: zod_1.z.string().or(zod_1.z.number()),
|
|
111
|
+
source_name: zod_1.z.string(),
|
|
112
|
+
platform_id: zod_1.z.string().or(zod_1.z.number()),
|
|
113
|
+
original_text_id: zod_1.z.string().or(zod_1.z.number()),
|
|
114
|
+
original_text: zod_1.z.string().optional(),
|
|
115
|
+
translated_text: zod_1.z.string().optional(),
|
|
116
|
+
language: zod_1.z.string().optional(),
|
|
117
|
+
is_reply: zod_1.z.boolean().optional(),
|
|
118
|
+
reply_to_message_id: zod_1.z.string().or(zod_1.z.number()).optional(),
|
|
119
|
+
metadata: zod_1.z.record(zod_1.z.string(), zod_1.z.any()).optional(),
|
|
120
|
+
created_at: zod_1.z.number(),
|
|
121
|
+
updated_at: zod_1.z.number(),
|
|
122
|
+
media: zod_1.z
|
|
123
|
+
.array(zod_1.z.object({
|
|
124
|
+
type: zod_1.z.enum(['image', 'video', 'audio', 'link']),
|
|
125
|
+
url: zod_1.z.string().url(),
|
|
126
|
+
caption: zod_1.z.string().optional(),
|
|
127
|
+
}))
|
|
128
|
+
.optional(),
|
|
129
|
+
author: zod_1.z.string().optional(),
|
|
130
|
+
replies: zod_1.z.any().optional(),
|
|
131
|
+
entities: zod_1.z.any().optional(),
|
|
132
|
+
author_username: zod_1.z.string().optional(),
|
|
133
|
+
author_id: zod_1.z.string().optional(),
|
|
134
|
+
group_id: zod_1.z.string().optional(), // e.g., 'default_pipeline'
|
|
135
|
+
});
|
|
136
|
+
// Helper function to get allowed entities for a platform
|
|
137
|
+
const getAllowedEntitiesForPlatform = (platform) => {
|
|
138
|
+
return exports.platformEntityMap[platform];
|
|
139
|
+
};
|
|
140
|
+
exports.getAllowedEntitiesForPlatform = getAllowedEntitiesForPlatform;
|
package/index.ts
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
// Export types for consumers
|
|
2
|
+
export * from './types';
|
|
3
|
+
|
|
4
|
+
import { Schema } from 'mongoose';
|
|
5
|
+
import {
|
|
6
|
+
generalStatusList,
|
|
7
|
+
platformsList,
|
|
8
|
+
entityTypesList,
|
|
9
|
+
zodSourceStatusList
|
|
10
|
+
} from './types';
|
|
11
|
+
|
|
12
|
+
export const MongoDataSchema = new Schema(
|
|
13
|
+
{
|
|
14
|
+
source_id: { type: Schema.Types.Mixed, required: true }, // reference to Source _id
|
|
15
|
+
source_public_id: { type: Schema.Types.Mixed }, // e.g., 'telegram:1234567890' (message_id)
|
|
16
|
+
source_name: { type: String, required: true }, // e.g., 'Telegram Channel Name'
|
|
17
|
+
platform_id: { type: Schema.Types.Mixed }, // channel_id reference to Source source_id
|
|
18
|
+
platform: {
|
|
19
|
+
type: String,
|
|
20
|
+
enum: platformsList,
|
|
21
|
+
required: true,
|
|
22
|
+
}, // e.g., 'telegram', 'facebook'
|
|
23
|
+
original_text_id: { type: Schema.Types.Mixed }, // message_id
|
|
24
|
+
original_text: { type: String }, // original text content
|
|
25
|
+
translated_text: { type: String }, // translated text if available
|
|
26
|
+
timestamp: { type: Number, required: true }, // date in milliseconds
|
|
27
|
+
language: { type: String },
|
|
28
|
+
text_geo: { type: [String], default: [] }, // e.g., ["sinjil", "ramallah", "west bank"]
|
|
29
|
+
source_geo: { type: String }, // e.g., 'hebron'
|
|
30
|
+
is_reply: { type: Boolean }, // true if this text is a reply to another text
|
|
31
|
+
reply_to_message_id: { type: Schema.Types.Mixed },
|
|
32
|
+
metadata: { type: Object }, // platform-specific fields
|
|
33
|
+
created_at: { type: Number, default: Date.now },
|
|
34
|
+
updated_at: { type: Number, default: Date.now },
|
|
35
|
+
media: {
|
|
36
|
+
type: [
|
|
37
|
+
{
|
|
38
|
+
type: { type: String, enum: ['image', 'video', 'audio', 'link'] },
|
|
39
|
+
url: { type: String },
|
|
40
|
+
caption: { type: String },
|
|
41
|
+
},
|
|
42
|
+
],
|
|
43
|
+
}, // media attachments
|
|
44
|
+
author: { type: String }, // e.g., author name or ID
|
|
45
|
+
replies: { type: Schema.Types.Mixed }, // array of reply texts or IDs
|
|
46
|
+
entities: { type: Schema.Types.Mixed }, // array of entities
|
|
47
|
+
author_username: { type: String }, // e.g., author username
|
|
48
|
+
author_id: { type: String }, // e.g., author ID
|
|
49
|
+
pipeline_name: { type: String }, // e.g., 'default_pipeline'
|
|
50
|
+
source_region: { type: String }, // e.g., "New York", "California"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
versionKey: false,
|
|
54
|
+
toJSON: { virtuals: true },
|
|
55
|
+
toObject: { virtuals: true },
|
|
56
|
+
}
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
export const MongoInstructionsSchema = new Schema(
|
|
60
|
+
{
|
|
61
|
+
user_id: { type: String, required: true },
|
|
62
|
+
prompt: { type: String },
|
|
63
|
+
created_at: { type: Number, default: Date.now },
|
|
64
|
+
updated_at: { type: Number, default: Date.now },
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
versionKey: false,
|
|
68
|
+
toJSON: { virtuals: true },
|
|
69
|
+
toObject: { virtuals: true },
|
|
70
|
+
}
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
export const MongoRegionSchema = new Schema(
|
|
74
|
+
{
|
|
75
|
+
name: { type: String, required: true }, // e.g., "New York"
|
|
76
|
+
slug: { type: String, required: true }, // e.g., "new-york"
|
|
77
|
+
legend: { type: String }, // e.g., legend for maps
|
|
78
|
+
created_at: { type: Number, default: Date.now }, // creation date
|
|
79
|
+
updated_at: { type: Number, default: Date.now }, // last update date
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
versionKey: false,
|
|
83
|
+
toJSON: { virtuals: true },
|
|
84
|
+
toObject: { virtuals: true },
|
|
85
|
+
}
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
export const MongoSourceGroupSchema = new Schema(
|
|
89
|
+
{
|
|
90
|
+
name: { type: String, required: true }, // e.g., "Group Name"
|
|
91
|
+
description: { type: String }, // e.g., "Description of the group"
|
|
92
|
+
platform: {
|
|
93
|
+
type: String,
|
|
94
|
+
enum: platformsList,
|
|
95
|
+
required: true,
|
|
96
|
+
},
|
|
97
|
+
region_id: { type: String }, // e.g., region ID
|
|
98
|
+
max_active_sources: { type: Number }, // e.g., maximum number of active sources allowed
|
|
99
|
+
created_at: { type: Number, default: Date.now }, // creation date
|
|
100
|
+
updated_at: { type: Number, default: Date.now }, // last update date
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
versionKey: false,
|
|
104
|
+
toJSON: { virtuals: true },
|
|
105
|
+
toObject: { virtuals: true },
|
|
106
|
+
}
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
export const MongoSourceSchema = new Schema(
|
|
110
|
+
{
|
|
111
|
+
title: { type: String }, // e.g., 'Telegram Channel Name'
|
|
112
|
+
platform: {
|
|
113
|
+
type: String,
|
|
114
|
+
enum: platformsList,
|
|
115
|
+
required: true,
|
|
116
|
+
},
|
|
117
|
+
entity: {
|
|
118
|
+
type: String,
|
|
119
|
+
enum: entityTypesList,
|
|
120
|
+
required: true,
|
|
121
|
+
}, // e.g., 'channel', 'group', 'page', 'profile', 'hashtag'
|
|
122
|
+
public_id: { type: Schema.Types.Mixed }, // e.g., '@telegram_channel_id'
|
|
123
|
+
name: { type: String, required: true }, // e.g., 'Telegram Channel Name'
|
|
124
|
+
url: { type: String }, // e.g., 'https://t.me/telegram_channel_name'
|
|
125
|
+
description: { type: String }, // e.g., 'A channel about news and updates'
|
|
126
|
+
language: { type: String },
|
|
127
|
+
tags: { type: [String], default: [] }, // array of tags
|
|
128
|
+
status: {
|
|
129
|
+
type: String,
|
|
130
|
+
enum: zodSourceStatusList,
|
|
131
|
+
default: 'pending',
|
|
132
|
+
}, // INDEX
|
|
133
|
+
is_public: { type: Boolean, default: true }, // true if the source is public
|
|
134
|
+
metadata: { type: Object, default: {} }, // flexible per platform
|
|
135
|
+
created_at: { type: Number, default: Date.now },
|
|
136
|
+
updated_at: { type: Number, default: Date.now },
|
|
137
|
+
region_id: { type: String }, // e.g., region ID
|
|
138
|
+
group_id: { type: String }, // e.g., source group ID
|
|
139
|
+
requested_by: {
|
|
140
|
+
type: {
|
|
141
|
+
user_id: { type: String },
|
|
142
|
+
username: { type: String },
|
|
143
|
+
full_name: { type: String },
|
|
144
|
+
email: { type: String },
|
|
145
|
+
requested_at: { type: Number },
|
|
146
|
+
},
|
|
147
|
+
default: undefined,
|
|
148
|
+
}, // user who requested this source
|
|
149
|
+
notes: { type: String }, // internal notes about the source
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
versionKey: false,
|
|
153
|
+
toJSON: { virtuals: true },
|
|
154
|
+
toObject: { virtuals: true },
|
|
155
|
+
}
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
export const MongoQuerySchema = new Schema(
|
|
159
|
+
{
|
|
160
|
+
title: { type: String }, // e.g., 'Search Query'
|
|
161
|
+
query: { type: String }, // search query includes geo information where it should be extracted with LLM
|
|
162
|
+
sources: { type: [String] }, // array of source _id strings
|
|
163
|
+
geos: { type: [String], required: true }, // array of geo strings
|
|
164
|
+
geos_ids: { type: [String], required: true }, // array of geo _id strings
|
|
165
|
+
time_range: {
|
|
166
|
+
type: Schema.Types.Mixed, // can be relative or absolute time range
|
|
167
|
+
required: true,
|
|
168
|
+
},
|
|
169
|
+
user_id: { type: String, required: true }, // user identifier
|
|
170
|
+
user_instructions: { type: String }, // optional user instructions for the query
|
|
171
|
+
user_time_zone: { type: String }, // user's timezone
|
|
172
|
+
timestamp: { type: Number, default: Date.now }, // creation date
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
versionKey: false,
|
|
176
|
+
toJSON: { virtuals: true },
|
|
177
|
+
toObject: { virtuals: true },
|
|
178
|
+
}
|
|
179
|
+
);
|
|
180
|
+
|
|
181
|
+
export const MongoUserSettingsSchema = new Schema(
|
|
182
|
+
{
|
|
183
|
+
sources: { type: [String] }, // array of source _id strings
|
|
184
|
+
geos: { type: [String], default: [] }, // array of geo strings
|
|
185
|
+
time_range: {
|
|
186
|
+
type: Schema.Types.Mixed,
|
|
187
|
+
},
|
|
188
|
+
user_id: { type: String, required: true },
|
|
189
|
+
thread_id: { type: String },
|
|
190
|
+
instructions: { type: String }, // optional user instructions for the query
|
|
191
|
+
has_jobs_access: { type: Boolean, default: false }, // true if the user has access to jobs
|
|
192
|
+
active_jobs_limit: { type: Number, default: 5 }, // maximum number of active jobs allowed
|
|
193
|
+
created_at: { type: Date, default: Date.now }, // creation date
|
|
194
|
+
updated_at: { type: Date, default: Date.now }, // last update date
|
|
195
|
+
status: {
|
|
196
|
+
type: String,
|
|
197
|
+
enum: generalStatusList,
|
|
198
|
+
default: 'active',
|
|
199
|
+
}, // status of the user
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
versionKey: false,
|
|
203
|
+
toJSON: { virtuals: true },
|
|
204
|
+
toObject: { virtuals: true },
|
|
205
|
+
}
|
|
206
|
+
);
|
|
207
|
+
|
|
208
|
+
export const MongoConversationSchema = new Schema(
|
|
209
|
+
{
|
|
210
|
+
user_id: { type: String, required: true }, // user identifier
|
|
211
|
+
title: { type: String }, // optional title for the conversation
|
|
212
|
+
thread_id: { type: String }, // thread ID for the conversation
|
|
213
|
+
status: {
|
|
214
|
+
type: String,
|
|
215
|
+
enum: generalStatusList,
|
|
216
|
+
default: 'active',
|
|
217
|
+
}, // status of the conversation
|
|
218
|
+
created_at: { type: Date, default: Date.now }, // creation date
|
|
219
|
+
updated_at: { type: Date, default: Date.now }, // last update date
|
|
220
|
+
is_job: { type: Boolean, default: false }, // true if this is a job conversation
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
versionKey: false,
|
|
224
|
+
toJSON: { virtuals: true },
|
|
225
|
+
toObject: { virtuals: true },
|
|
226
|
+
}
|
|
227
|
+
);
|
|
228
|
+
|
|
229
|
+
export const MongoJobSchema = new Schema(
|
|
230
|
+
{
|
|
231
|
+
user_id: { type: String, required: true }, // user identifier
|
|
232
|
+
job_name: { type: String, required: true }, // name of the automation job
|
|
233
|
+
job_description: { type: String }, // description of the job
|
|
234
|
+
status: {
|
|
235
|
+
type: String,
|
|
236
|
+
enum: generalStatusList,
|
|
237
|
+
default: 'active',
|
|
238
|
+
}, // status of the job
|
|
239
|
+
created_at: { type: Number, default: Date.now }, // creation date
|
|
240
|
+
updated_at: { type: Number, default: Date.now }, // last update date
|
|
241
|
+
schedule: { type: String }, // cron schedule for the job
|
|
242
|
+
schedule_text: { type: String }, // cron schedule in free-text format
|
|
243
|
+
active_until: { type: Number || undefined }, // optional date until which the job is active
|
|
244
|
+
query: { type: Schema.Types.Mixed, required: true }, // query associated with the job
|
|
245
|
+
conversation_id: { type: String }, // optional conversation ID for the job
|
|
246
|
+
thread_id: { type: String }, // thread ID for the job
|
|
247
|
+
time_zone: { type: String }, // optional timezone for the job
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
versionKey: false,
|
|
251
|
+
toJSON: { virtuals: true },
|
|
252
|
+
toObject: { virtuals: true },
|
|
253
|
+
}
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
export const MongoGeoSchema = new Schema(
|
|
257
|
+
{
|
|
258
|
+
geo_text: { type: String }, // e.g., "New York, USA"
|
|
259
|
+
timestamp: { type: Number }, // e.g., 1751210833000
|
|
260
|
+
count: { type: Number }, // e.g., 42
|
|
261
|
+
region: { type: String }, // e.g., "New York", "California"
|
|
262
|
+
subscribers: { type: [String], default: [] }, // array of user IDs who subscribed to this geo
|
|
263
|
+
is_used: { type: Boolean, default: false }, // true if this geo is used in any source
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
versionKey: false,
|
|
267
|
+
toJSON: { virtuals: true },
|
|
268
|
+
toObject: { virtuals: true },
|
|
269
|
+
}
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
export const MongoGeoSelectionSchema = new Schema(
|
|
273
|
+
{
|
|
274
|
+
title: { type: String }, // e.g., "New York, USA"
|
|
275
|
+
description: { type: String }, // e.g., "A city in the USA"
|
|
276
|
+
type: { type: String }, // e.g., "city", "region", "country"
|
|
277
|
+
geos: { type: [String], default: [] }, // e.g., ["New York", "Los Angeles"]
|
|
278
|
+
region: { type: String }, // e.g., "New York", "California"
|
|
279
|
+
created_at: { type: Number }, // creation date
|
|
280
|
+
updated_at: { type: Number }, // last update date
|
|
281
|
+
},
|
|
282
|
+
{
|
|
283
|
+
versionKey: false,
|
|
284
|
+
toJSON: { virtuals: true },
|
|
285
|
+
toObject: { virtuals: true },
|
|
286
|
+
}
|
|
287
|
+
);
|
|
288
|
+
|
|
289
|
+
export const MongoApiKeySchema = new Schema(
|
|
290
|
+
{
|
|
291
|
+
user_id: { type: String, required: true }, // user identifier
|
|
292
|
+
api_key: { type: String, required: true }, // API key
|
|
293
|
+
created_at: { type: Number, default: Date.now }, // creation date
|
|
294
|
+
updated_at: { type: Number, default: Date.now }, // last update date
|
|
295
|
+
status: {
|
|
296
|
+
type: String,
|
|
297
|
+
enum: generalStatusList,
|
|
298
|
+
default: 'active',
|
|
299
|
+
}, // status of the API key
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
versionKey: false,
|
|
303
|
+
toJSON: { virtuals: true },
|
|
304
|
+
toObject: { virtuals: true },
|
|
305
|
+
}
|
|
306
|
+
);
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "harvester_sdk",
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"description": "SDK for interacting with the Harvester API",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"repository": {
|
|
8
|
+
"url": "git+https://github.com/badgerdefense/harvester.sdk.git",
|
|
9
|
+
"type": "git"
|
|
10
|
+
},
|
|
11
|
+
"scripts": {
|
|
12
|
+
"build": "tsc",
|
|
13
|
+
"test": "jest",
|
|
14
|
+
"prepublish": "npm run build"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"asfur",
|
|
18
|
+
"sdk",
|
|
19
|
+
"api"
|
|
20
|
+
],
|
|
21
|
+
"author": "deerland",
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"devDependencies": {
|
|
24
|
+
"@types/jest": "^29.5.0",
|
|
25
|
+
"jest": "^29.5.0",
|
|
26
|
+
"typescript": "^4.9.5"
|
|
27
|
+
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"axios": "^1.9.0",
|
|
30
|
+
"mongoose": "^8.16.4",
|
|
31
|
+
"zod": "^3.25.76"
|
|
32
|
+
}
|
|
33
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "es2018",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"declaration": true,
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"strict": true,
|
|
8
|
+
"esModuleInterop": true,
|
|
9
|
+
"skipLibCheck": true,
|
|
10
|
+
"forceConsistentCasingInFileNames": true
|
|
11
|
+
},
|
|
12
|
+
"include": ["index.ts", "types.ts"],
|
|
13
|
+
"exclude": ["node_modules", "dist"]
|
|
14
|
+
}
|
package/types.ts
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import { object, pipeline, z } from 'zod';
|
|
2
|
+
|
|
3
|
+
export const platformsList = [
|
|
4
|
+
'telegram',
|
|
5
|
+
'facebook',
|
|
6
|
+
'instagram',
|
|
7
|
+
'tiktok',
|
|
8
|
+
'website',
|
|
9
|
+
] as const;
|
|
10
|
+
|
|
11
|
+
export const entityTypesList = ['profile', 'group', 'page', 'channel', 'hashtag', 'website'] as const;
|
|
12
|
+
|
|
13
|
+
// Platform-specific entity mappings
|
|
14
|
+
export const platformEntityMap = {
|
|
15
|
+
telegram: ['channel', 'group'],
|
|
16
|
+
facebook: ['profile', 'page', 'group'],
|
|
17
|
+
instagram: ['profile', 'hashtag'],
|
|
18
|
+
tiktok: ['profile', 'hashtag'],
|
|
19
|
+
website: ['website'],
|
|
20
|
+
} as const;
|
|
21
|
+
|
|
22
|
+
export const zodSourceStatusList = [
|
|
23
|
+
'active', // active and approved sources
|
|
24
|
+
'pending', // sources that are pending approval
|
|
25
|
+
'inactive', // sources that are inactive
|
|
26
|
+
'requested', // sources that are under review
|
|
27
|
+
] as const;
|
|
28
|
+
|
|
29
|
+
export const generalStatusList = ['active', 'inactive'] as const;
|
|
30
|
+
|
|
31
|
+
export const zodRegionSchema = z.object({
|
|
32
|
+
_id: z.string().optional(),
|
|
33
|
+
name: z.string(), // e.g., "New York"
|
|
34
|
+
slug: z.string(), // e.g., "new-york"
|
|
35
|
+
legend: z.string().optional(), // e.g., legend for maps
|
|
36
|
+
created_at: z.number().optional(), // creation date
|
|
37
|
+
updated_at: z.number().optional(), // last update date
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
export const zodSourceGroupSchema = z.object({
|
|
41
|
+
_id: z.string().optional(),
|
|
42
|
+
name: z.string(), // e.g., "Group Name"
|
|
43
|
+
description: z.string().optional(), // e.g., "Description of the group"
|
|
44
|
+
platform: z.enum(platformsList), // e.g., array of resource IDs
|
|
45
|
+
region_id: z.string().optional(), // e.g., region ID
|
|
46
|
+
max_active_sources: z.number().optional(), // e.g., maximum number of active sources allowed
|
|
47
|
+
created_at: z.number().optional(), // creation date
|
|
48
|
+
updated_at: z.number().optional(), // last update date
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
export const zodSourceSchema = z
|
|
52
|
+
.object({
|
|
53
|
+
_id: z.string().optional(),
|
|
54
|
+
title: z.string().optional(), // e.g., 'Telegram Channel Name'
|
|
55
|
+
platform: z.enum(platformsList), // e.g., 'telegram', 'facebook'
|
|
56
|
+
entity: z.enum(entityTypesList), // e.g., 'channel', 'group', 'page', 'profile', 'hashtag'
|
|
57
|
+
public_id: z.string().or(z.number()).optional(), // e.g., '@telegram_channel_id'
|
|
58
|
+
name: z.string(), // e.g., 'Telegram Channel Name'
|
|
59
|
+
url: z.string().url().optional(), // e.g., 'https://t.me/telegram_channel_name'
|
|
60
|
+
description: z.string().optional(), // e.g., 'A channel about news and updates'
|
|
61
|
+
language: z.string().optional(),
|
|
62
|
+
tags: z.array(z.string()).optional(),
|
|
63
|
+
status: z.enum(zodSourceStatusList), // INDEX
|
|
64
|
+
is_public: z.boolean().default(true), // true if the source is public
|
|
65
|
+
metadata: z.record(z.string(), z.any()).optional(), // flexible per platform
|
|
66
|
+
created_at: z.number().optional(),
|
|
67
|
+
updated_at: z.number().optional(),
|
|
68
|
+
region_id: z.string().optional(), // e.g., region ID
|
|
69
|
+
group_id: z.string().optional(), // e.g., source group ID
|
|
70
|
+
requested_by: z
|
|
71
|
+
.object({
|
|
72
|
+
user_id: z.string().optional(),
|
|
73
|
+
username: z.string().optional(),
|
|
74
|
+
full_name: z.string().optional(),
|
|
75
|
+
email: z.string().optional(),
|
|
76
|
+
requested_at: z.number().optional(),
|
|
77
|
+
})
|
|
78
|
+
.optional(), // user who requested this source
|
|
79
|
+
notes: z.string().optional(), // internal notes about the source
|
|
80
|
+
})
|
|
81
|
+
.refine(
|
|
82
|
+
(data) => {
|
|
83
|
+
const allowedEntities = platformEntityMap[data.platform] as readonly string[];
|
|
84
|
+
return allowedEntities.includes(data.entity);
|
|
85
|
+
},
|
|
86
|
+
(data) => ({
|
|
87
|
+
message: `Entity "${data.entity}" is not valid for platform "${data.platform}". Allowed entities: ${platformEntityMap[data.platform].join(', ')}`,
|
|
88
|
+
path: ['entity'],
|
|
89
|
+
})
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
export const zodGeoSchema = z.object({
|
|
93
|
+
_id: z.string().optional(),
|
|
94
|
+
geo_text: z.string().optional(), // e.g., "New York, USA"
|
|
95
|
+
timestamp: z.number().optional(), // e.g., 1751210833000
|
|
96
|
+
count: z.number().optional(), // e.g., 42
|
|
97
|
+
region: z.string().optional(), // e.g., "New York", "California"
|
|
98
|
+
subscribers: z.array(z.string()).optional(), // array of user IDs who subscribed to this geo
|
|
99
|
+
is_used: z.boolean().optional().default(false), // true if this geo is used in any source
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
export const zodGeoSelectionSchema = z.object({
|
|
103
|
+
_id: z.string().optional(),
|
|
104
|
+
title: z.string().optional(), // e.g., "New York, USA"
|
|
105
|
+
description: z.string().optional(), // e.g., "A city in the USA"
|
|
106
|
+
type: z.string().optional(), // e.g., "city", "region", "country"
|
|
107
|
+
geos: z.array(z.string()).optional(), // e.g., ["New York", "Los Angeles"]
|
|
108
|
+
region: z.string().optional(), // e.g., "New York", "California"
|
|
109
|
+
created_at: z.number().optional(), // creation date
|
|
110
|
+
updated_at: z.number().optional(), // last update date
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
export const zodDataSchema = z.object({
|
|
114
|
+
// Filterable fields
|
|
115
|
+
text_geo: z.array(z.string()), // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
|
|
116
|
+
timestamp: z.number(), // INDEX - date in milliseconds - e.g., 1751210833000
|
|
117
|
+
platform: z.enum(platformsList), // e.g., 'telegram', 'facebook'
|
|
118
|
+
source_region: z.string().optional(), // e.g., 'hebron'
|
|
119
|
+
_id: z.string().optional(),
|
|
120
|
+
source_id: z.string().or(z.number()), // INDEX - reference to Source _id (e.g., '60c72b2f9b1e8d3f4c8b4567')
|
|
121
|
+
source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
|
|
122
|
+
source_name: z.string(), // e.g., 'Telegram Channel Name'
|
|
123
|
+
platform_id: z.string().or(z.number()), // channel_id reference to Source source_id
|
|
124
|
+
original_text_id: z.string().or(z.number()), // message_id
|
|
125
|
+
original_text: z.string().optional(), // original text content
|
|
126
|
+
translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
|
|
127
|
+
language: z.string().optional(),
|
|
128
|
+
is_reply: z.boolean().optional(), // true if this text is a reply to another text
|
|
129
|
+
reply_to_message_id: z.string().or(z.number()).optional(),
|
|
130
|
+
metadata: z.record(z.string(), z.any()).optional(), // platform-specific fields
|
|
131
|
+
created_at: z.number(),
|
|
132
|
+
updated_at: z.number(),
|
|
133
|
+
media: z
|
|
134
|
+
.array(
|
|
135
|
+
z.object({
|
|
136
|
+
type: z.enum(['image', 'video', 'audio', 'link']),
|
|
137
|
+
url: z.string().url(),
|
|
138
|
+
caption: z.string().optional(),
|
|
139
|
+
})
|
|
140
|
+
)
|
|
141
|
+
.optional(), // media attachments
|
|
142
|
+
author: z.string().optional(), // e.g., author name or ID
|
|
143
|
+
replies: z.any().optional(), // array of reply texts or IDs
|
|
144
|
+
entities: z.any().optional(), // array of reply texts or IDs
|
|
145
|
+
author_username: z.string().optional(), // e.g., author username
|
|
146
|
+
author_id: z.string().optional(), // e.g., author ID
|
|
147
|
+
group_id: z.string().optional(), // e.g., 'default_pipeline'
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* 'approved' - active and approved sources,
|
|
152
|
+
* 'back_to_business' - sources that were paused and now resumed,
|
|
153
|
+
* 'pending' - sources that are pending approval,
|
|
154
|
+
* 'inactive' - sources that are inactive,
|
|
155
|
+
* 'in_review' - sources that are under review
|
|
156
|
+
*/
|
|
157
|
+
export type RegionType = z.infer<typeof zodRegionSchema>;
|
|
158
|
+
export type SourceGroupType = z.infer<typeof zodSourceGroupSchema>;
|
|
159
|
+
export type SourceType = z.infer<typeof zodSourceSchema>;
|
|
160
|
+
export type DataType = z.infer<typeof zodDataSchema>;
|
|
161
|
+
export type StatusType = (typeof zodSourceStatusList)[number];
|
|
162
|
+
export type TimeRangeTypeLiteral = 'relative' | 'absolute';
|
|
163
|
+
export type AddSourceToReviewType = Pick<
|
|
164
|
+
SourceType,
|
|
165
|
+
'platform' | 'url' | 'description'
|
|
166
|
+
> & Partial<Pick<SourceType, 'public_id'>>;
|
|
167
|
+
|
|
168
|
+
export type GeoType = z.infer<typeof zodGeoSchema>;
|
|
169
|
+
export type GeoSelectionType = z.infer<typeof zodGeoSelectionSchema>;
|
|
170
|
+
|
|
171
|
+
// Helper type to get allowed entities for a specific platform
|
|
172
|
+
export type PlatformEntityType<T extends typeof platformsList[number]> = typeof platformEntityMap[T][number];
|
|
173
|
+
|
|
174
|
+
// Helper function to get allowed entities for a platform
|
|
175
|
+
export const getAllowedEntitiesForPlatform = (platform: typeof platformsList[number]) => {
|
|
176
|
+
return platformEntityMap[platform];
|
|
177
|
+
};
|