harvester_sdk 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +9 -3
- package/dist/index.js +4 -2
- package/dist/types.d.ts +9 -3
- package/dist/types.js +4 -2
- package/index.ts +7 -6
- package/package.json +1 -1
- package/types.ts +25 -23
package/dist/index.d.ts
CHANGED
|
@@ -36,6 +36,7 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
|
|
|
36
36
|
}, {
|
|
37
37
|
created_at: number;
|
|
38
38
|
updated_at: number;
|
|
39
|
+
source_dominant_geos: string[];
|
|
39
40
|
source_id: any;
|
|
40
41
|
data_geo: string[];
|
|
41
42
|
media: import("mongoose").Types.DocumentArray<{
|
|
@@ -53,12 +54,12 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
|
|
|
53
54
|
}>;
|
|
54
55
|
platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
|
|
55
56
|
metadata?: any;
|
|
56
|
-
group_id?: string | null | undefined;
|
|
57
57
|
timestamp?: number | null | undefined;
|
|
58
58
|
source_region_id?: string | null | undefined;
|
|
59
59
|
source_region_title?: string | null | undefined;
|
|
60
60
|
source_title?: string | null | undefined;
|
|
61
61
|
source_url?: string | null | undefined;
|
|
62
|
+
source_group_id?: string | null | undefined;
|
|
62
63
|
data_id?: any;
|
|
63
64
|
data_text?: string | null | undefined;
|
|
64
65
|
data_url?: string | null | undefined;
|
|
@@ -75,6 +76,7 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
|
|
|
75
76
|
}, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
|
|
76
77
|
created_at: number;
|
|
77
78
|
updated_at: number;
|
|
79
|
+
source_dominant_geos: string[];
|
|
78
80
|
source_id: any;
|
|
79
81
|
data_geo: string[];
|
|
80
82
|
media: import("mongoose").Types.DocumentArray<{
|
|
@@ -92,12 +94,12 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
|
|
|
92
94
|
}>;
|
|
93
95
|
platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
|
|
94
96
|
metadata?: any;
|
|
95
|
-
group_id?: string | null | undefined;
|
|
96
97
|
timestamp?: number | null | undefined;
|
|
97
98
|
source_region_id?: string | null | undefined;
|
|
98
99
|
source_region_title?: string | null | undefined;
|
|
99
100
|
source_title?: string | null | undefined;
|
|
100
101
|
source_url?: string | null | undefined;
|
|
102
|
+
source_group_id?: string | null | undefined;
|
|
101
103
|
data_id?: any;
|
|
102
104
|
data_text?: string | null | undefined;
|
|
103
105
|
data_url?: string | null | undefined;
|
|
@@ -114,6 +116,7 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
|
|
|
114
116
|
}>, {}> & import("mongoose").FlatRecord<{
|
|
115
117
|
created_at: number;
|
|
116
118
|
updated_at: number;
|
|
119
|
+
source_dominant_geos: string[];
|
|
117
120
|
source_id: any;
|
|
118
121
|
data_geo: string[];
|
|
119
122
|
media: import("mongoose").Types.DocumentArray<{
|
|
@@ -131,12 +134,12 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
|
|
|
131
134
|
}>;
|
|
132
135
|
platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
|
|
133
136
|
metadata?: any;
|
|
134
|
-
group_id?: string | null | undefined;
|
|
135
137
|
timestamp?: number | null | undefined;
|
|
136
138
|
source_region_id?: string | null | undefined;
|
|
137
139
|
source_region_title?: string | null | undefined;
|
|
138
140
|
source_title?: string | null | undefined;
|
|
139
141
|
source_url?: string | null | undefined;
|
|
142
|
+
source_group_id?: string | null | undefined;
|
|
140
143
|
data_id?: any;
|
|
141
144
|
data_text?: string | null | undefined;
|
|
142
145
|
data_url?: string | null | undefined;
|
|
@@ -260,6 +263,7 @@ export declare const MongoSourceSchema: Schema<any, import("mongoose").Model<any
|
|
|
260
263
|
public_id?: any;
|
|
261
264
|
url?: string | null | undefined;
|
|
262
265
|
language?: string | null | undefined;
|
|
266
|
+
region_title?: string | null | undefined;
|
|
263
267
|
group_id?: string | null | undefined;
|
|
264
268
|
requested_by?: {
|
|
265
269
|
user_id?: string | null | undefined;
|
|
@@ -285,6 +289,7 @@ export declare const MongoSourceSchema: Schema<any, import("mongoose").Model<any
|
|
|
285
289
|
public_id?: any;
|
|
286
290
|
url?: string | null | undefined;
|
|
287
291
|
language?: string | null | undefined;
|
|
292
|
+
region_title?: string | null | undefined;
|
|
288
293
|
group_id?: string | null | undefined;
|
|
289
294
|
requested_by?: {
|
|
290
295
|
user_id?: string | null | undefined;
|
|
@@ -310,6 +315,7 @@ export declare const MongoSourceSchema: Schema<any, import("mongoose").Model<any
|
|
|
310
315
|
public_id?: any;
|
|
311
316
|
url?: string | null | undefined;
|
|
312
317
|
language?: string | null | undefined;
|
|
318
|
+
region_title?: string | null | undefined;
|
|
313
319
|
group_id?: string | null | undefined;
|
|
314
320
|
requested_by?: {
|
|
315
321
|
user_id?: string | null | undefined;
|
package/dist/index.js
CHANGED
|
@@ -27,9 +27,11 @@ exports.MongoDataSchema = new mongoose_1.Schema({
|
|
|
27
27
|
},
|
|
28
28
|
source_region_id: { type: String },
|
|
29
29
|
source_region_title: { type: String },
|
|
30
|
+
source_dominant_geos: { type: [String], default: [] },
|
|
30
31
|
source_id: { type: mongoose_1.Schema.Types.Mixed, required: true },
|
|
31
32
|
source_title: { type: String },
|
|
32
33
|
source_url: { type: String },
|
|
34
|
+
source_group_id: { type: String },
|
|
33
35
|
data_id: { type: mongoose_1.Schema.Types.Mixed },
|
|
34
36
|
data_geo: { type: [String], default: [] },
|
|
35
37
|
data_text: { type: String },
|
|
@@ -55,8 +57,7 @@ exports.MongoDataSchema = new mongoose_1.Schema({
|
|
|
55
57
|
author: { type: String },
|
|
56
58
|
author_username: { type: String },
|
|
57
59
|
replies: { type: mongoose_1.Schema.Types.Mixed },
|
|
58
|
-
author_id: { type: String },
|
|
59
|
-
group_id: { type: String }, // e.g., source group ID
|
|
60
|
+
author_id: { type: String }, // e.g., author ID
|
|
60
61
|
}, {
|
|
61
62
|
versionKey: false,
|
|
62
63
|
toJSON: { virtuals: true },
|
|
@@ -129,6 +130,7 @@ exports.MongoSourceSchema = new mongoose_1.Schema({
|
|
|
129
130
|
created_at: { type: Number, default: Date.now },
|
|
130
131
|
updated_at: { type: Number, default: Date.now },
|
|
131
132
|
region_id: { type: String },
|
|
133
|
+
region_title: { type: String },
|
|
132
134
|
group_id: { type: String },
|
|
133
135
|
dominant_geos: { type: [String], default: [] },
|
|
134
136
|
requested_by: {
|
package/dist/types.d.ts
CHANGED
|
@@ -88,6 +88,7 @@ export declare const zodSourceSchema: z.ZodObject<{
|
|
|
88
88
|
created_at: z.ZodOptional<z.ZodNumber>;
|
|
89
89
|
updated_at: z.ZodOptional<z.ZodNumber>;
|
|
90
90
|
region_id: z.ZodOptional<z.ZodString>;
|
|
91
|
+
region_title: z.ZodOptional<z.ZodString>;
|
|
91
92
|
group_id: z.ZodOptional<z.ZodString>;
|
|
92
93
|
dominant_geos: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
93
94
|
requested_by: z.ZodOptional<z.ZodObject<{
|
|
@@ -127,6 +128,7 @@ export declare const zodSourceSchema: z.ZodObject<{
|
|
|
127
128
|
language?: string | undefined;
|
|
128
129
|
tags?: string[] | undefined;
|
|
129
130
|
metadata?: Record<string, any> | undefined;
|
|
131
|
+
region_title?: string | undefined;
|
|
130
132
|
group_id?: string | undefined;
|
|
131
133
|
dominant_geos?: string[] | undefined;
|
|
132
134
|
requested_by?: {
|
|
@@ -153,6 +155,7 @@ export declare const zodSourceSchema: z.ZodObject<{
|
|
|
153
155
|
tags?: string[] | undefined;
|
|
154
156
|
is_public?: boolean | undefined;
|
|
155
157
|
metadata?: Record<string, any> | undefined;
|
|
158
|
+
region_title?: string | undefined;
|
|
156
159
|
group_id?: string | undefined;
|
|
157
160
|
dominant_geos?: string[] | undefined;
|
|
158
161
|
requested_by?: {
|
|
@@ -222,9 +225,11 @@ export declare const zodDataSchema: z.ZodObject<{
|
|
|
222
225
|
platform: z.ZodOptional<z.ZodEnum<["telegram", "facebook", "instagram", "tiktok", "website"]>>;
|
|
223
226
|
source_region_id: z.ZodOptional<z.ZodString>;
|
|
224
227
|
source_region_title: z.ZodOptional<z.ZodString>;
|
|
228
|
+
source_dominant_geos: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
225
229
|
source_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
|
|
226
230
|
source_title: z.ZodOptional<z.ZodString>;
|
|
227
231
|
source_url: z.ZodOptional<z.ZodString>;
|
|
232
|
+
source_group_id: z.ZodOptional<z.ZodString>;
|
|
228
233
|
data_id: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
|
|
229
234
|
data_geo: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
|
|
230
235
|
data_text: z.ZodOptional<z.ZodString>;
|
|
@@ -255,7 +260,6 @@ export declare const zodDataSchema: z.ZodObject<{
|
|
|
255
260
|
author_username: z.ZodOptional<z.ZodString>;
|
|
256
261
|
replies: z.ZodOptional<z.ZodAny>;
|
|
257
262
|
author_id: z.ZodOptional<z.ZodString>;
|
|
258
|
-
group_id: z.ZodOptional<z.ZodString>;
|
|
259
263
|
}, "strip", z.ZodTypeAny, {
|
|
260
264
|
created_at: number;
|
|
261
265
|
updated_at: number;
|
|
@@ -263,12 +267,13 @@ export declare const zodDataSchema: z.ZodObject<{
|
|
|
263
267
|
_id?: string | undefined;
|
|
264
268
|
platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | undefined;
|
|
265
269
|
metadata?: Record<string, any> | undefined;
|
|
266
|
-
group_id?: string | undefined;
|
|
267
270
|
timestamp?: number | undefined;
|
|
268
271
|
source_region_id?: string | undefined;
|
|
269
272
|
source_region_title?: string | undefined;
|
|
273
|
+
source_dominant_geos?: string[] | undefined;
|
|
270
274
|
source_title?: string | undefined;
|
|
271
275
|
source_url?: string | undefined;
|
|
276
|
+
source_group_id?: string | undefined;
|
|
272
277
|
data_id?: string | number | undefined;
|
|
273
278
|
data_geo?: string[] | undefined;
|
|
274
279
|
data_text?: string | undefined;
|
|
@@ -295,12 +300,13 @@ export declare const zodDataSchema: z.ZodObject<{
|
|
|
295
300
|
_id?: string | undefined;
|
|
296
301
|
platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | undefined;
|
|
297
302
|
metadata?: Record<string, any> | undefined;
|
|
298
|
-
group_id?: string | undefined;
|
|
299
303
|
timestamp?: number | undefined;
|
|
300
304
|
source_region_id?: string | undefined;
|
|
301
305
|
source_region_title?: string | undefined;
|
|
306
|
+
source_dominant_geos?: string[] | undefined;
|
|
302
307
|
source_title?: string | undefined;
|
|
303
308
|
source_url?: string | undefined;
|
|
309
|
+
source_group_id?: string | undefined;
|
|
304
310
|
data_id?: string | number | undefined;
|
|
305
311
|
data_geo?: string[] | undefined;
|
|
306
312
|
data_text?: string | undefined;
|
package/dist/types.js
CHANGED
|
@@ -72,6 +72,7 @@ exports.zodSourceSchema = zod_1.z.object({
|
|
|
72
72
|
created_at: zod_1.z.number().optional(),
|
|
73
73
|
updated_at: zod_1.z.number().optional(),
|
|
74
74
|
region_id: zod_1.z.string().optional(),
|
|
75
|
+
region_title: zod_1.z.string().optional(),
|
|
75
76
|
group_id: zod_1.z.string().optional(),
|
|
76
77
|
dominant_geos: zod_1.z.array(zod_1.z.string()).optional(),
|
|
77
78
|
requested_by: zod_1.z
|
|
@@ -110,9 +111,11 @@ exports.zodDataSchema = zod_1.z.object({
|
|
|
110
111
|
platform: zod_1.z.enum(exports.platformsList).optional(),
|
|
111
112
|
source_region_id: zod_1.z.string().optional(),
|
|
112
113
|
source_region_title: zod_1.z.string().optional(),
|
|
114
|
+
source_dominant_geos: zod_1.z.array(zod_1.z.string()).optional(),
|
|
113
115
|
source_id: zod_1.z.string().or(zod_1.z.number()),
|
|
114
116
|
source_title: zod_1.z.string().optional(),
|
|
115
117
|
source_url: zod_1.z.string().url().optional(),
|
|
118
|
+
source_group_id: zod_1.z.string().optional(),
|
|
116
119
|
data_id: zod_1.z.string().or(zod_1.z.number()).optional(),
|
|
117
120
|
data_geo: zod_1.z.array(zod_1.z.string()).optional(),
|
|
118
121
|
data_text: zod_1.z.string().optional(),
|
|
@@ -136,8 +139,7 @@ exports.zodDataSchema = zod_1.z.object({
|
|
|
136
139
|
author: zod_1.z.string().optional(),
|
|
137
140
|
author_username: zod_1.z.string().optional(),
|
|
138
141
|
replies: zod_1.z.any().optional(),
|
|
139
|
-
author_id: zod_1.z.string().optional(),
|
|
140
|
-
group_id: zod_1.z.string().optional(), // e.g., 'default_pipeline'
|
|
142
|
+
author_id: zod_1.z.string().optional(), // e.g., author ID -> get from post
|
|
141
143
|
// translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
|
|
142
144
|
// entities: z.any().optional(), // array of reply texts or IDs
|
|
143
145
|
// source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
|
package/index.ts
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
export * from './types';
|
|
3
3
|
|
|
4
4
|
import { Schema } from 'mongoose';
|
|
5
|
-
import {
|
|
6
|
-
generalStatusList,
|
|
7
|
-
platformsList,
|
|
5
|
+
import {
|
|
6
|
+
generalStatusList,
|
|
7
|
+
platformsList,
|
|
8
8
|
entityTypesList,
|
|
9
|
-
sourceStatusList
|
|
9
|
+
sourceStatusList,
|
|
10
10
|
} from './types';
|
|
11
11
|
|
|
12
12
|
export const MongoDataSchema = new Schema(
|
|
@@ -18,9 +18,11 @@ export const MongoDataSchema = new Schema(
|
|
|
18
18
|
}, // e.g., 'telegram', 'facebook'
|
|
19
19
|
source_region_id: { type: String }, // e.g., 'hebron'
|
|
20
20
|
source_region_title: { type: String }, // e.g., 'hebron'
|
|
21
|
+
source_dominant_geos: { type: [String], default: [] }, // e.g., ['hebron', 'west bank']
|
|
21
22
|
source_id: { type: Schema.Types.Mixed, required: true }, // INDEX - reference to Source _id
|
|
22
23
|
source_title: { type: String }, // e.g., 'Telegram Channel Name'
|
|
23
24
|
source_url: { type: String }, // e.g., 'https://t.me/telegram_channel_name'
|
|
25
|
+
source_group_id: { type: String }, // e.g., source group ID
|
|
24
26
|
data_id: { type: Schema.Types.Mixed }, // INDEX - original text ID (e.g., message_id)
|
|
25
27
|
data_geo: { type: [String], default: [] }, // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
|
|
26
28
|
data_text: { type: String }, // processed text content
|
|
@@ -47,7 +49,6 @@ export const MongoDataSchema = new Schema(
|
|
|
47
49
|
author_username: { type: String }, // e.g., author username
|
|
48
50
|
replies: { type: Schema.Types.Mixed }, // array of reply texts or IDs
|
|
49
51
|
author_id: { type: String }, // e.g., author ID
|
|
50
|
-
group_id: { type: String }, // e.g., source group ID
|
|
51
52
|
},
|
|
52
53
|
{
|
|
53
54
|
versionKey: false,
|
|
@@ -69,7 +70,6 @@ export const MongoRegionSchema = new Schema(
|
|
|
69
70
|
default: 'active',
|
|
70
71
|
},
|
|
71
72
|
notes: { type: String }, // internal notes about the region
|
|
72
|
-
|
|
73
73
|
},
|
|
74
74
|
{
|
|
75
75
|
versionKey: false,
|
|
@@ -133,6 +133,7 @@ export const MongoSourceSchema = new Schema(
|
|
|
133
133
|
created_at: { type: Number, default: Date.now },
|
|
134
134
|
updated_at: { type: Number, default: Date.now },
|
|
135
135
|
region_id: { type: String }, // e.g., region ID
|
|
136
|
+
region_title: { type: String }, // e.g., region title
|
|
136
137
|
group_id: { type: String }, // e.g., source group ID
|
|
137
138
|
dominant_geos: { type: [String], default: [] }, // e.g., dominant geos associated with this source
|
|
138
139
|
requested_by: {
|
package/package.json
CHANGED
package/types.ts
CHANGED
|
@@ -77,6 +77,7 @@ export const zodSourceSchema = z.object({
|
|
|
77
77
|
created_at: z.number().optional(),
|
|
78
78
|
updated_at: z.number().optional(),
|
|
79
79
|
region_id: z.string().optional(), // e.g., region ID // INDEX
|
|
80
|
+
region_title: z.string().optional(), // e.g., region title
|
|
80
81
|
group_id: z.string().optional(), // e.g., source group ID // INDEX
|
|
81
82
|
dominant_geos: z.array(z.string()).optional(), // e.g., dominant geos associated with this source
|
|
82
83
|
requested_by: z
|
|
@@ -116,23 +117,25 @@ export const zodDataSchema = z.object({
|
|
|
116
117
|
_id: z.string().optional(),
|
|
117
118
|
timestamp: z.number().optional(), // INDEX - date in milliseconds - e.g., 1751210833000
|
|
118
119
|
platform: z.enum(platformsList).optional(), // e.g., 'telegram', 'facebook'
|
|
119
|
-
source_region_id: z.string().optional(), // e.g., 'hebron'
|
|
120
|
-
source_region_title: z.string().optional(), // e.g., 'hebron'
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
120
|
+
source_region_id: z.string().optional(), // e.g., 'hebron' -> get from source object
|
|
121
|
+
source_region_title: z.string().optional(), // e.g., 'hebron' -> get from source object
|
|
122
|
+
source_dominant_geos: z.array(z.string()).optional(), // e.g., ['hebron', 'west bank'] -> get from source object
|
|
123
|
+
source_id: z.string().or(z.number()), // INDEX - reference to Source _id (e.g., '60c72b2f9b1e8d3f4c8b4567') -> get from source object
|
|
124
|
+
source_title: z.string().optional(), // e.g., 'Telegram Channel Name' -> get from source object
|
|
125
|
+
source_url: z.string().url().optional(), // e.g., 'https://t.me/telegram_channel_name' -> get from source object
|
|
126
|
+
source_group_id: z.string().optional(), // e.g., 'default_pipeline' -> get from source object
|
|
127
|
+
data_id: z.string().or(z.number()).optional(), // INDEX - original text ID (e.g., message_id) -> get from post
|
|
128
|
+
data_geo: z.array(z.string()).optional(), // INDEX - e.g., ["sinjil", "ramallah", "west bank"] -> get from processor
|
|
129
|
+
data_text: z.string().optional(), // processed text content -> get from post or processor
|
|
130
|
+
data_url: z.string().url().optional(), // original text URL if available -> get from post
|
|
131
|
+
data_original_type: z.string().optional(), // e.g., 'post', 'comment', 'reply', 'video', 'image' -> get from post
|
|
132
|
+
data_language: z.string().optional(), // detected language of the text -> get from post or processor
|
|
133
|
+
data_sentiment: z.string().optional(), // sentiment analysis result -> get from processor
|
|
134
|
+
data_timestamp: z.number().optional(), // original post timestamp if different from ingestion timestamp -> get from post
|
|
135
|
+
is_reply: z.boolean().optional(), // true if this text is a reply to another text -> get from post
|
|
136
|
+
reply_to_message_id: z.string().or(z.number()).optional(), // ID of the message this is a reply to -> get from post
|
|
137
|
+
metadata: z.record(z.string(), z.any()).optional(), // platform-specific fields -> get from post
|
|
138
|
+
created_at: z.number(),
|
|
136
139
|
updated_at: z.number(),
|
|
137
140
|
media: z
|
|
138
141
|
.array(
|
|
@@ -142,12 +145,11 @@ export const zodDataSchema = z.object({
|
|
|
142
145
|
caption: z.string().optional(),
|
|
143
146
|
})
|
|
144
147
|
)
|
|
145
|
-
.optional(), // media attachments
|
|
146
|
-
author: z.string().optional(), // e.g., author name or ID
|
|
147
|
-
author_username: z.string().optional(), // e.g., author username
|
|
148
|
-
replies: z.any().optional(), // array of reply texts or IDs
|
|
149
|
-
author_id: z.string().optional(), // e.g., author ID
|
|
150
|
-
group_id: z.string().optional(), // e.g., 'default_pipeline'
|
|
148
|
+
.optional(), // media attachments -> get from post
|
|
149
|
+
author: z.string().optional(), // e.g., author name or ID -> get from post
|
|
150
|
+
author_username: z.string().optional(), // e.g., author username -> get from post
|
|
151
|
+
replies: z.any().optional(), // array of reply texts or IDs -> get from post
|
|
152
|
+
author_id: z.string().optional(), // e.g., author ID -> get from post
|
|
151
153
|
// translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
|
|
152
154
|
// entities: z.any().optional(), // array of reply texts or IDs
|
|
153
155
|
// source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
|