harvester_sdk 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -36,11 +36,8 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
36
36
  }, {
37
37
  created_at: number;
38
38
  updated_at: number;
39
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
40
- timestamp: number;
41
- text_geo: string[];
42
39
  source_id: any;
43
- source_name: string;
40
+ data_geo: string[];
44
41
  media: import("mongoose").Types.DocumentArray<{
45
42
  type?: "image" | "video" | "audio" | "link" | null | undefined;
46
43
  url?: string | null | undefined;
@@ -54,31 +51,32 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
54
51
  url?: string | null | undefined;
55
52
  caption?: string | null | undefined;
56
53
  }>;
57
- language?: string | null | undefined;
54
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
58
55
  metadata?: any;
59
- source_region?: string | null | undefined;
60
- source_public_id?: any;
61
- platform_id?: any;
62
- original_text_id?: any;
63
- original_text?: string | null | undefined;
64
- translated_text?: string | null | undefined;
56
+ group_id?: string | null | undefined;
57
+ timestamp?: number | null | undefined;
58
+ source_region_id?: string | null | undefined;
59
+ source_region_title?: string | null | undefined;
60
+ source_title?: string | null | undefined;
61
+ source_url?: string | null | undefined;
62
+ data_id?: any;
63
+ data_text?: string | null | undefined;
64
+ data_url?: string | null | undefined;
65
+ data_original_type?: string | null | undefined;
66
+ data_language?: string | null | undefined;
67
+ data_sentiment?: string | null | undefined;
68
+ data_timestamp?: number | null | undefined;
65
69
  is_reply?: boolean | null | undefined;
66
70
  reply_to_message_id?: any;
67
71
  author?: string | null | undefined;
68
- replies?: any;
69
- entities?: any;
70
72
  author_username?: string | null | undefined;
73
+ replies?: any;
71
74
  author_id?: string | null | undefined;
72
- source_geo?: string | null | undefined;
73
- pipeline_name?: string | null | undefined;
74
75
  }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
75
76
  created_at: number;
76
77
  updated_at: number;
77
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
78
- timestamp: number;
79
- text_geo: string[];
80
78
  source_id: any;
81
- source_name: string;
79
+ data_geo: string[];
82
80
  media: import("mongoose").Types.DocumentArray<{
83
81
  type?: "image" | "video" | "audio" | "link" | null | undefined;
84
82
  url?: string | null | undefined;
@@ -92,31 +90,32 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
92
90
  url?: string | null | undefined;
93
91
  caption?: string | null | undefined;
94
92
  }>;
95
- language?: string | null | undefined;
93
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
96
94
  metadata?: any;
97
- source_region?: string | null | undefined;
98
- source_public_id?: any;
99
- platform_id?: any;
100
- original_text_id?: any;
101
- original_text?: string | null | undefined;
102
- translated_text?: string | null | undefined;
95
+ group_id?: string | null | undefined;
96
+ timestamp?: number | null | undefined;
97
+ source_region_id?: string | null | undefined;
98
+ source_region_title?: string | null | undefined;
99
+ source_title?: string | null | undefined;
100
+ source_url?: string | null | undefined;
101
+ data_id?: any;
102
+ data_text?: string | null | undefined;
103
+ data_url?: string | null | undefined;
104
+ data_original_type?: string | null | undefined;
105
+ data_language?: string | null | undefined;
106
+ data_sentiment?: string | null | undefined;
107
+ data_timestamp?: number | null | undefined;
103
108
  is_reply?: boolean | null | undefined;
104
109
  reply_to_message_id?: any;
105
110
  author?: string | null | undefined;
106
- replies?: any;
107
- entities?: any;
108
111
  author_username?: string | null | undefined;
112
+ replies?: any;
109
113
  author_id?: string | null | undefined;
110
- source_geo?: string | null | undefined;
111
- pipeline_name?: string | null | undefined;
112
114
  }>, {}> & import("mongoose").FlatRecord<{
113
115
  created_at: number;
114
116
  updated_at: number;
115
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
116
- timestamp: number;
117
- text_geo: string[];
118
117
  source_id: any;
119
- source_name: string;
118
+ data_geo: string[];
120
119
  media: import("mongoose").Types.DocumentArray<{
121
120
  type?: "image" | "video" | "audio" | "link" | null | undefined;
122
121
  url?: string | null | undefined;
@@ -130,23 +129,27 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
130
129
  url?: string | null | undefined;
131
130
  caption?: string | null | undefined;
132
131
  }>;
133
- language?: string | null | undefined;
132
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
134
133
  metadata?: any;
135
- source_region?: string | null | undefined;
136
- source_public_id?: any;
137
- platform_id?: any;
138
- original_text_id?: any;
139
- original_text?: string | null | undefined;
140
- translated_text?: string | null | undefined;
134
+ group_id?: string | null | undefined;
135
+ timestamp?: number | null | undefined;
136
+ source_region_id?: string | null | undefined;
137
+ source_region_title?: string | null | undefined;
138
+ source_title?: string | null | undefined;
139
+ source_url?: string | null | undefined;
140
+ data_id?: any;
141
+ data_text?: string | null | undefined;
142
+ data_url?: string | null | undefined;
143
+ data_original_type?: string | null | undefined;
144
+ data_language?: string | null | undefined;
145
+ data_sentiment?: string | null | undefined;
146
+ data_timestamp?: number | null | undefined;
141
147
  is_reply?: boolean | null | undefined;
142
148
  reply_to_message_id?: any;
143
149
  author?: string | null | undefined;
144
- replies?: any;
145
- entities?: any;
146
150
  author_username?: string | null | undefined;
151
+ replies?: any;
147
152
  author_id?: string | null | undefined;
148
- source_geo?: string | null | undefined;
149
- pipeline_name?: string | null | undefined;
150
153
  }> & {
151
154
  _id: import("mongoose").Types.ObjectId;
152
155
  } & {
@@ -241,7 +244,6 @@ export declare const MongoSourceSchema: Schema<any, import("mongoose").Model<any
241
244
  virtuals: true;
242
245
  };
243
246
  }, {
244
- name: string;
245
247
  created_at: number;
246
248
  updated_at: number;
247
249
  status: "active" | "pending" | "inactive" | "requested" | "discovered" | "deleted";
@@ -267,7 +269,6 @@ export declare const MongoSourceSchema: Schema<any, import("mongoose").Model<any
267
269
  requested_at?: number | null | undefined;
268
270
  } | null | undefined;
269
271
  }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
270
- name: string;
271
272
  created_at: number;
272
273
  updated_at: number;
273
274
  status: "active" | "pending" | "inactive" | "requested" | "discovered" | "deleted";
@@ -293,7 +294,6 @@ export declare const MongoSourceSchema: Schema<any, import("mongoose").Model<any
293
294
  requested_at?: number | null | undefined;
294
295
  } | null | undefined;
295
296
  }>, {}> & import("mongoose").FlatRecord<{
296
- name: string;
297
297
  created_at: number;
298
298
  updated_at: number;
299
299
  status: "active" | "pending" | "inactive" | "requested" | "discovered" | "deleted";
@@ -323,193 +323,6 @@ export declare const MongoSourceSchema: Schema<any, import("mongoose").Model<any
323
323
  } & {
324
324
  __v: number;
325
325
  }>;
326
- export declare const MongoQuerySchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
327
- versionKey: false;
328
- toJSON: {
329
- virtuals: true;
330
- };
331
- toObject: {
332
- virtuals: true;
333
- };
334
- }, {
335
- user_id: string;
336
- timestamp: number;
337
- geos: string[];
338
- sources: string[];
339
- geos_ids: string[];
340
- time_range: any;
341
- title?: string | null | undefined;
342
- query?: string | null | undefined;
343
- user_instructions?: string | null | undefined;
344
- user_time_zone?: string | null | undefined;
345
- }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
346
- user_id: string;
347
- timestamp: number;
348
- geos: string[];
349
- sources: string[];
350
- geos_ids: string[];
351
- time_range: any;
352
- title?: string | null | undefined;
353
- query?: string | null | undefined;
354
- user_instructions?: string | null | undefined;
355
- user_time_zone?: string | null | undefined;
356
- }>, {}> & import("mongoose").FlatRecord<{
357
- user_id: string;
358
- timestamp: number;
359
- geos: string[];
360
- sources: string[];
361
- geos_ids: string[];
362
- time_range: any;
363
- title?: string | null | undefined;
364
- query?: string | null | undefined;
365
- user_instructions?: string | null | undefined;
366
- user_time_zone?: string | null | undefined;
367
- }> & {
368
- _id: import("mongoose").Types.ObjectId;
369
- } & {
370
- __v: number;
371
- }>;
372
- export declare const MongoUserSettingsSchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
373
- versionKey: false;
374
- toJSON: {
375
- virtuals: true;
376
- };
377
- toObject: {
378
- virtuals: true;
379
- };
380
- }, {
381
- created_at: NativeDate;
382
- updated_at: NativeDate;
383
- status: "active" | "inactive" | "deleted";
384
- user_id: string;
385
- geos: string[];
386
- sources: string[];
387
- has_jobs_access: boolean;
388
- active_jobs_limit: number;
389
- time_range?: any;
390
- thread_id?: string | null | undefined;
391
- instructions?: string | null | undefined;
392
- }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
393
- created_at: NativeDate;
394
- updated_at: NativeDate;
395
- status: "active" | "inactive" | "deleted";
396
- user_id: string;
397
- geos: string[];
398
- sources: string[];
399
- has_jobs_access: boolean;
400
- active_jobs_limit: number;
401
- time_range?: any;
402
- thread_id?: string | null | undefined;
403
- instructions?: string | null | undefined;
404
- }>, {}> & import("mongoose").FlatRecord<{
405
- created_at: NativeDate;
406
- updated_at: NativeDate;
407
- status: "active" | "inactive" | "deleted";
408
- user_id: string;
409
- geos: string[];
410
- sources: string[];
411
- has_jobs_access: boolean;
412
- active_jobs_limit: number;
413
- time_range?: any;
414
- thread_id?: string | null | undefined;
415
- instructions?: string | null | undefined;
416
- }> & {
417
- _id: import("mongoose").Types.ObjectId;
418
- } & {
419
- __v: number;
420
- }>;
421
- export declare const MongoConversationSchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
422
- versionKey: false;
423
- toJSON: {
424
- virtuals: true;
425
- };
426
- toObject: {
427
- virtuals: true;
428
- };
429
- }, {
430
- created_at: NativeDate;
431
- updated_at: NativeDate;
432
- status: "active" | "inactive" | "deleted";
433
- user_id: string;
434
- is_job: boolean;
435
- title?: string | null | undefined;
436
- thread_id?: string | null | undefined;
437
- }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
438
- created_at: NativeDate;
439
- updated_at: NativeDate;
440
- status: "active" | "inactive" | "deleted";
441
- user_id: string;
442
- is_job: boolean;
443
- title?: string | null | undefined;
444
- thread_id?: string | null | undefined;
445
- }>, {}> & import("mongoose").FlatRecord<{
446
- created_at: NativeDate;
447
- updated_at: NativeDate;
448
- status: "active" | "inactive" | "deleted";
449
- user_id: string;
450
- is_job: boolean;
451
- title?: string | null | undefined;
452
- thread_id?: string | null | undefined;
453
- }> & {
454
- _id: import("mongoose").Types.ObjectId;
455
- } & {
456
- __v: number;
457
- }>;
458
- export declare const MongoJobSchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
459
- versionKey: false;
460
- toJSON: {
461
- virtuals: true;
462
- };
463
- toObject: {
464
- virtuals: true;
465
- };
466
- }, {
467
- created_at: number;
468
- updated_at: number;
469
- status: "active" | "inactive" | "deleted";
470
- user_id: string;
471
- query: any;
472
- job_name: string;
473
- thread_id?: string | null | undefined;
474
- job_description?: string | null | undefined;
475
- schedule?: string | null | undefined;
476
- schedule_text?: string | null | undefined;
477
- active_until?: number | null | undefined;
478
- conversation_id?: string | null | undefined;
479
- time_zone?: string | null | undefined;
480
- }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
481
- created_at: number;
482
- updated_at: number;
483
- status: "active" | "inactive" | "deleted";
484
- user_id: string;
485
- query: any;
486
- job_name: string;
487
- thread_id?: string | null | undefined;
488
- job_description?: string | null | undefined;
489
- schedule?: string | null | undefined;
490
- schedule_text?: string | null | undefined;
491
- active_until?: number | null | undefined;
492
- conversation_id?: string | null | undefined;
493
- time_zone?: string | null | undefined;
494
- }>, {}> & import("mongoose").FlatRecord<{
495
- created_at: number;
496
- updated_at: number;
497
- status: "active" | "inactive" | "deleted";
498
- user_id: string;
499
- query: any;
500
- job_name: string;
501
- thread_id?: string | null | undefined;
502
- job_description?: string | null | undefined;
503
- schedule?: string | null | undefined;
504
- schedule_text?: string | null | undefined;
505
- active_until?: number | null | undefined;
506
- conversation_id?: string | null | undefined;
507
- time_zone?: string | null | undefined;
508
- }> & {
509
- _id: import("mongoose").Types.ObjectId;
510
- } & {
511
- __v: number;
512
- }>;
513
326
  export declare const MongoGeoSchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
514
327
  versionKey: false;
515
328
  toJSON: {
package/dist/index.js CHANGED
@@ -14,33 +14,35 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.MongoApiKeySchema = exports.MongoGeoSelectionSchema = exports.MongoGeoSchema = exports.MongoJobSchema = exports.MongoConversationSchema = exports.MongoUserSettingsSchema = exports.MongoQuerySchema = exports.MongoSourceSchema = exports.MongoSourceGroupSchema = exports.MongoRegionSchema = exports.MongoDataSchema = void 0;
17
+ exports.MongoApiKeySchema = exports.MongoGeoSelectionSchema = exports.MongoGeoSchema = exports.MongoSourceSchema = exports.MongoSourceGroupSchema = exports.MongoRegionSchema = exports.MongoDataSchema = void 0;
18
18
  // Export types for consumers
19
19
  __exportStar(require("./types"), exports);
20
20
  const mongoose_1 = require("mongoose");
21
21
  const types_1 = require("./types");
22
22
  exports.MongoDataSchema = new mongoose_1.Schema({
23
- source_id: { type: mongoose_1.Schema.Types.Mixed, required: true },
24
- source_public_id: { type: mongoose_1.Schema.Types.Mixed },
25
- source_name: { type: String, required: true },
26
- platform_id: { type: mongoose_1.Schema.Types.Mixed },
23
+ timestamp: { type: Number },
27
24
  platform: {
28
25
  type: String,
29
26
  enum: types_1.platformsList,
30
- required: true,
31
27
  },
32
- original_text_id: { type: mongoose_1.Schema.Types.Mixed },
33
- original_text: { type: String },
34
- translated_text: { type: String },
35
- timestamp: { type: Number, required: true },
36
- language: { type: String },
37
- text_geo: { type: [String], default: [] },
38
- source_geo: { type: String },
28
+ source_region_id: { type: String },
29
+ source_region_title: { type: String },
30
+ source_id: { type: mongoose_1.Schema.Types.Mixed, required: true },
31
+ source_title: { type: String },
32
+ source_url: { type: String },
33
+ data_id: { type: mongoose_1.Schema.Types.Mixed },
34
+ data_geo: { type: [String], default: [] },
35
+ data_text: { type: String },
36
+ data_url: { type: String },
37
+ data_original_type: { type: String },
38
+ data_language: { type: String },
39
+ data_sentiment: { type: String },
40
+ data_timestamp: { type: Number },
39
41
  is_reply: { type: Boolean },
40
42
  reply_to_message_id: { type: mongoose_1.Schema.Types.Mixed },
41
43
  metadata: { type: Object },
42
- created_at: { type: Number, default: Date.now },
43
- updated_at: { type: Number, default: Date.now },
44
+ created_at: { type: Number, default: Date.now, required: true },
45
+ updated_at: { type: Number, default: Date.now, required: true },
44
46
  media: {
45
47
  type: [
46
48
  {
@@ -51,12 +53,10 @@ exports.MongoDataSchema = new mongoose_1.Schema({
51
53
  ],
52
54
  },
53
55
  author: { type: String },
54
- replies: { type: mongoose_1.Schema.Types.Mixed },
55
- entities: { type: mongoose_1.Schema.Types.Mixed },
56
56
  author_username: { type: String },
57
+ replies: { type: mongoose_1.Schema.Types.Mixed },
57
58
  author_id: { type: String },
58
- pipeline_name: { type: String },
59
- source_region: { type: String }, // e.g., "New York", "California"
59
+ group_id: { type: String }, // e.g., source group ID
60
60
  }, {
61
61
  versionKey: false,
62
62
  toJSON: { virtuals: true },
@@ -115,7 +115,6 @@ exports.MongoSourceSchema = new mongoose_1.Schema({
115
115
  required: true,
116
116
  },
117
117
  public_id: { type: mongoose_1.Schema.Types.Mixed },
118
- name: { type: String, required: true },
119
118
  url: { type: String },
120
119
  description: { type: String },
121
120
  language: { type: String },
@@ -148,88 +147,6 @@ exports.MongoSourceSchema = new mongoose_1.Schema({
148
147
  toJSON: { virtuals: true },
149
148
  toObject: { virtuals: true },
150
149
  });
151
- exports.MongoQuerySchema = new mongoose_1.Schema({
152
- title: { type: String },
153
- query: { type: String },
154
- sources: { type: [String] },
155
- geos: { type: [String], required: true },
156
- geos_ids: { type: [String], required: true },
157
- time_range: {
158
- type: mongoose_1.Schema.Types.Mixed,
159
- required: true,
160
- },
161
- user_id: { type: String, required: true },
162
- user_instructions: { type: String },
163
- user_time_zone: { type: String },
164
- timestamp: { type: Number, default: Date.now }, // creation date
165
- }, {
166
- versionKey: false,
167
- toJSON: { virtuals: true },
168
- toObject: { virtuals: true },
169
- });
170
- exports.MongoUserSettingsSchema = new mongoose_1.Schema({
171
- sources: { type: [String] },
172
- geos: { type: [String], default: [] },
173
- time_range: {
174
- type: mongoose_1.Schema.Types.Mixed,
175
- },
176
- user_id: { type: String, required: true },
177
- thread_id: { type: String },
178
- instructions: { type: String },
179
- has_jobs_access: { type: Boolean, default: false },
180
- active_jobs_limit: { type: Number, default: 5 },
181
- created_at: { type: Date, default: Date.now },
182
- updated_at: { type: Date, default: Date.now },
183
- status: {
184
- type: String,
185
- enum: types_1.generalStatusList,
186
- default: 'active',
187
- }, // status of the user
188
- }, {
189
- versionKey: false,
190
- toJSON: { virtuals: true },
191
- toObject: { virtuals: true },
192
- });
193
- exports.MongoConversationSchema = new mongoose_1.Schema({
194
- user_id: { type: String, required: true },
195
- title: { type: String },
196
- thread_id: { type: String },
197
- status: {
198
- type: String,
199
- enum: types_1.generalStatusList,
200
- default: 'active',
201
- },
202
- created_at: { type: Date, default: Date.now },
203
- updated_at: { type: Date, default: Date.now },
204
- is_job: { type: Boolean, default: false }, // true if this is a job conversation
205
- }, {
206
- versionKey: false,
207
- toJSON: { virtuals: true },
208
- toObject: { virtuals: true },
209
- });
210
- exports.MongoJobSchema = new mongoose_1.Schema({
211
- user_id: { type: String, required: true },
212
- job_name: { type: String, required: true },
213
- job_description: { type: String },
214
- status: {
215
- type: String,
216
- enum: types_1.generalStatusList,
217
- default: 'active',
218
- },
219
- created_at: { type: Number, default: Date.now },
220
- updated_at: { type: Number, default: Date.now },
221
- schedule: { type: String },
222
- schedule_text: { type: String },
223
- active_until: { type: Number || undefined },
224
- query: { type: mongoose_1.Schema.Types.Mixed, required: true },
225
- conversation_id: { type: String },
226
- thread_id: { type: String },
227
- time_zone: { type: String }, // optional timezone for the job
228
- }, {
229
- versionKey: false,
230
- toJSON: { virtuals: true },
231
- toObject: { virtuals: true },
232
- });
233
150
  exports.MongoGeoSchema = new mongoose_1.Schema({
234
151
  geo_text: { type: String },
235
152
  timestamp: { type: Number },
package/dist/types.d.ts CHANGED
@@ -78,7 +78,6 @@ export declare const zodSourceSchema: z.ZodObject<{
78
78
  platform: z.ZodEnum<["telegram", "facebook", "instagram", "tiktok", "website"]>;
79
79
  entity: z.ZodEnum<["profile", "group", "page", "channel", "hashtag", "website"]>;
80
80
  public_id: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
81
- name: z.ZodString;
82
81
  url: z.ZodOptional<z.ZodString>;
83
82
  description: z.ZodOptional<z.ZodString>;
84
83
  language: z.ZodOptional<z.ZodString>;
@@ -112,7 +111,6 @@ export declare const zodSourceSchema: z.ZodObject<{
112
111
  }>>;
113
112
  notes: z.ZodOptional<z.ZodString>;
114
113
  }, "strip", z.ZodTypeAny, {
115
- name: string;
116
114
  status: "active" | "pending" | "inactive" | "requested" | "discovered" | "deleted";
117
115
  platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
118
116
  entity: "website" | "profile" | "group" | "page" | "channel" | "hashtag";
@@ -139,7 +137,6 @@ export declare const zodSourceSchema: z.ZodObject<{
139
137
  requested_at?: number | undefined;
140
138
  } | undefined;
141
139
  }, {
142
- name: string;
143
140
  status: "active" | "pending" | "inactive" | "requested" | "discovered" | "deleted";
144
141
  platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
145
142
  entity: "website" | "profile" | "group" | "page" | "channel" | "hashtag";
@@ -220,19 +217,22 @@ export declare const zodGeoSelectionSchema: z.ZodObject<{
220
217
  geos?: string[] | undefined;
221
218
  }>;
222
219
  export declare const zodDataSchema: z.ZodObject<{
223
- text_geo: z.ZodArray<z.ZodString, "many">;
224
- timestamp: z.ZodNumber;
225
- platform: z.ZodEnum<["telegram", "facebook", "instagram", "tiktok", "website"]>;
226
- source_region: z.ZodOptional<z.ZodString>;
227
220
  _id: z.ZodOptional<z.ZodString>;
221
+ timestamp: z.ZodOptional<z.ZodNumber>;
222
+ platform: z.ZodOptional<z.ZodEnum<["telegram", "facebook", "instagram", "tiktok", "website"]>>;
223
+ source_region_id: z.ZodOptional<z.ZodString>;
224
+ source_region_title: z.ZodOptional<z.ZodString>;
228
225
  source_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
229
- source_public_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
230
- source_name: z.ZodString;
231
- platform_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
232
- original_text_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
233
- original_text: z.ZodOptional<z.ZodString>;
234
- translated_text: z.ZodOptional<z.ZodString>;
235
- language: z.ZodOptional<z.ZodString>;
226
+ source_title: z.ZodOptional<z.ZodString>;
227
+ source_url: z.ZodOptional<z.ZodString>;
228
+ data_id: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
229
+ data_geo: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
230
+ data_text: z.ZodOptional<z.ZodString>;
231
+ data_url: z.ZodOptional<z.ZodString>;
232
+ data_original_type: z.ZodOptional<z.ZodString>;
233
+ data_language: z.ZodOptional<z.ZodString>;
234
+ data_sentiment: z.ZodOptional<z.ZodString>;
235
+ data_timestamp: z.ZodOptional<z.ZodNumber>;
236
236
  is_reply: z.ZodOptional<z.ZodBoolean>;
237
237
  reply_to_message_id: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
238
238
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
@@ -252,29 +252,31 @@ export declare const zodDataSchema: z.ZodObject<{
252
252
  caption?: string | undefined;
253
253
  }>, "many">>;
254
254
  author: z.ZodOptional<z.ZodString>;
255
- replies: z.ZodOptional<z.ZodAny>;
256
- entities: z.ZodOptional<z.ZodAny>;
257
255
  author_username: z.ZodOptional<z.ZodString>;
256
+ replies: z.ZodOptional<z.ZodAny>;
258
257
  author_id: z.ZodOptional<z.ZodString>;
259
258
  group_id: z.ZodOptional<z.ZodString>;
260
259
  }, "strip", z.ZodTypeAny, {
261
260
  created_at: number;
262
261
  updated_at: number;
263
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
264
- timestamp: number;
265
- text_geo: string[];
266
262
  source_id: string | number;
267
- source_public_id: string | number;
268
- source_name: string;
269
- platform_id: string | number;
270
- original_text_id: string | number;
271
263
  _id?: string | undefined;
272
- language?: string | undefined;
264
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | undefined;
273
265
  metadata?: Record<string, any> | undefined;
274
266
  group_id?: string | undefined;
275
- source_region?: string | undefined;
276
- original_text?: string | undefined;
277
- translated_text?: string | undefined;
267
+ timestamp?: number | undefined;
268
+ source_region_id?: string | undefined;
269
+ source_region_title?: string | undefined;
270
+ source_title?: string | undefined;
271
+ source_url?: string | undefined;
272
+ data_id?: string | number | undefined;
273
+ data_geo?: string[] | undefined;
274
+ data_text?: string | undefined;
275
+ data_url?: string | undefined;
276
+ data_original_type?: string | undefined;
277
+ data_language?: string | undefined;
278
+ data_sentiment?: string | undefined;
279
+ data_timestamp?: number | undefined;
278
280
  is_reply?: boolean | undefined;
279
281
  reply_to_message_id?: string | number | undefined;
280
282
  media?: {
@@ -283,28 +285,30 @@ export declare const zodDataSchema: z.ZodObject<{
283
285
  caption?: string | undefined;
284
286
  }[] | undefined;
285
287
  author?: string | undefined;
286
- replies?: any;
287
- entities?: any;
288
288
  author_username?: string | undefined;
289
+ replies?: any;
289
290
  author_id?: string | undefined;
290
291
  }, {
291
292
  created_at: number;
292
293
  updated_at: number;
293
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
294
- timestamp: number;
295
- text_geo: string[];
296
294
  source_id: string | number;
297
- source_public_id: string | number;
298
- source_name: string;
299
- platform_id: string | number;
300
- original_text_id: string | number;
301
295
  _id?: string | undefined;
302
- language?: string | undefined;
296
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | undefined;
303
297
  metadata?: Record<string, any> | undefined;
304
298
  group_id?: string | undefined;
305
- source_region?: string | undefined;
306
- original_text?: string | undefined;
307
- translated_text?: string | undefined;
299
+ timestamp?: number | undefined;
300
+ source_region_id?: string | undefined;
301
+ source_region_title?: string | undefined;
302
+ source_title?: string | undefined;
303
+ source_url?: string | undefined;
304
+ data_id?: string | number | undefined;
305
+ data_geo?: string[] | undefined;
306
+ data_text?: string | undefined;
307
+ data_url?: string | undefined;
308
+ data_original_type?: string | undefined;
309
+ data_language?: string | undefined;
310
+ data_sentiment?: string | undefined;
311
+ data_timestamp?: number | undefined;
308
312
  is_reply?: boolean | undefined;
309
313
  reply_to_message_id?: string | number | undefined;
310
314
  media?: {
@@ -313,18 +317,10 @@ export declare const zodDataSchema: z.ZodObject<{
313
317
  caption?: string | undefined;
314
318
  }[] | undefined;
315
319
  author?: string | undefined;
316
- replies?: any;
317
- entities?: any;
318
320
  author_username?: string | undefined;
321
+ replies?: any;
319
322
  author_id?: string | undefined;
320
323
  }>;
321
- /**
322
- * 'approved' - active and approved sources,
323
- * 'back_to_business' - sources that were paused and now resumed,
324
- * 'pending' - sources that are pending approval,
325
- * 'inactive' - sources that are inactive,
326
- * 'in_review' - sources that are under review
327
- */
328
324
  export type RegionType = z.infer<typeof zodRegionSchema>;
329
325
  export type SourceGroupType = z.infer<typeof zodSourceGroupSchema>;
330
326
  export type SourceType = z.infer<typeof zodSourceSchema>;
package/dist/types.js CHANGED
@@ -62,7 +62,6 @@ exports.zodSourceSchema = zod_1.z.object({
62
62
  platform: zod_1.z.enum(exports.platformsList),
63
63
  entity: zod_1.z.enum(exports.entityTypesList),
64
64
  public_id: zod_1.z.string().or(zod_1.z.number()).optional(),
65
- name: zod_1.z.string(),
66
65
  url: zod_1.z.string().url().optional(),
67
66
  description: zod_1.z.string().optional(),
68
67
  language: zod_1.z.string().optional(),
@@ -106,20 +105,22 @@ exports.zodGeoSelectionSchema = zod_1.z.object({
106
105
  updated_at: zod_1.z.number().optional(), // last update date
107
106
  });
108
107
  exports.zodDataSchema = zod_1.z.object({
109
- // Filterable fields
110
- text_geo: zod_1.z.array(zod_1.z.string()),
111
- timestamp: zod_1.z.number(),
112
- platform: zod_1.z.enum(exports.platformsList),
113
- source_region: zod_1.z.string().optional(),
114
108
  _id: zod_1.z.string().optional(),
109
+ timestamp: zod_1.z.number().optional(),
110
+ platform: zod_1.z.enum(exports.platformsList).optional(),
111
+ source_region_id: zod_1.z.string().optional(),
112
+ source_region_title: zod_1.z.string().optional(),
115
113
  source_id: zod_1.z.string().or(zod_1.z.number()),
116
- source_public_id: zod_1.z.string().or(zod_1.z.number()),
117
- source_name: zod_1.z.string(),
118
- platform_id: zod_1.z.string().or(zod_1.z.number()),
119
- original_text_id: zod_1.z.string().or(zod_1.z.number()),
120
- original_text: zod_1.z.string().optional(),
121
- translated_text: zod_1.z.string().optional(),
122
- language: zod_1.z.string().optional(),
114
+ source_title: zod_1.z.string().optional(),
115
+ source_url: zod_1.z.string().url().optional(),
116
+ data_id: zod_1.z.string().or(zod_1.z.number()).optional(),
117
+ data_geo: zod_1.z.array(zod_1.z.string()).optional(),
118
+ data_text: zod_1.z.string().optional(),
119
+ data_url: zod_1.z.string().url().optional(),
120
+ data_original_type: zod_1.z.string().optional(),
121
+ data_language: zod_1.z.string().optional(),
122
+ data_sentiment: zod_1.z.string().optional(),
123
+ data_timestamp: zod_1.z.number().optional(),
123
124
  is_reply: zod_1.z.boolean().optional(),
124
125
  reply_to_message_id: zod_1.z.string().or(zod_1.z.number()).optional(),
125
126
  metadata: zod_1.z.record(zod_1.z.string(), zod_1.z.any()).optional(),
@@ -133,11 +134,15 @@ exports.zodDataSchema = zod_1.z.object({
133
134
  }))
134
135
  .optional(),
135
136
  author: zod_1.z.string().optional(),
136
- replies: zod_1.z.any().optional(),
137
- entities: zod_1.z.any().optional(),
138
137
  author_username: zod_1.z.string().optional(),
138
+ replies: zod_1.z.any().optional(),
139
139
  author_id: zod_1.z.string().optional(),
140
140
  group_id: zod_1.z.string().optional(), // e.g., 'default_pipeline'
141
+ // translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
142
+ // entities: z.any().optional(), // array of reply texts or IDs
143
+ // source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
144
+ // platform_id: z.string().or(z.number()), // channel_id reference to Source source_id
145
+ // original_text_id: z.string().or(z.number()), // message_id
141
146
  });
142
147
  // Helper function to get allowed entities for a platform
143
148
  const getAllowedEntitiesForPlatform = (platform) => {
package/index.ts CHANGED
@@ -11,27 +11,29 @@ import {
11
11
 
12
12
  export const MongoDataSchema = new Schema(
13
13
  {
14
- source_id: { type: Schema.Types.Mixed, required: true }, // reference to Source _id
15
- source_public_id: { type: Schema.Types.Mixed }, // e.g., 'telegram:1234567890' (message_id)
16
- source_name: { type: String, required: true }, // e.g., 'Telegram Channel Name'
17
- platform_id: { type: Schema.Types.Mixed }, // channel_id reference to Source source_id
14
+ timestamp: { type: Number }, // INDEX - date in milliseconds
18
15
  platform: {
19
16
  type: String,
20
17
  enum: platformsList,
21
- required: true,
22
18
  }, // e.g., 'telegram', 'facebook'
23
- original_text_id: { type: Schema.Types.Mixed }, // message_id
24
- original_text: { type: String }, // original text content
25
- translated_text: { type: String }, // translated text if available
26
- timestamp: { type: Number, required: true }, // date in milliseconds
27
- language: { type: String },
28
- text_geo: { type: [String], default: [] }, // e.g., ["sinjil", "ramallah", "west bank"]
29
- source_geo: { type: String }, // e.g., 'hebron'
19
+ source_region_id: { type: String }, // e.g., 'hebron'
20
+ source_region_title: { type: String }, // e.g., 'hebron'
21
+ source_id: { type: Schema.Types.Mixed, required: true }, // INDEX - reference to Source _id
22
+ source_title: { type: String }, // e.g., 'Telegram Channel Name'
23
+ source_url: { type: String }, // e.g., 'https://t.me/telegram_channel_name'
24
+ data_id: { type: Schema.Types.Mixed }, // INDEX - original text ID (e.g., message_id)
25
+ data_geo: { type: [String], default: [] }, // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
26
+ data_text: { type: String }, // processed text content
27
+ data_url: { type: String }, // original text URL if available
28
+ data_original_type: { type: String }, // e.g., 'post', 'comment', 'reply', 'video', 'image'
29
+ data_language: { type: String }, // detected language of the text
30
+ data_sentiment: { type: String }, // sentiment analysis result
31
+ data_timestamp: { type: Number }, // original post timestamp if different from ingestion timestamp
30
32
  is_reply: { type: Boolean }, // true if this text is a reply to another text
31
33
  reply_to_message_id: { type: Schema.Types.Mixed },
32
34
  metadata: { type: Object }, // platform-specific fields
33
- created_at: { type: Number, default: Date.now },
34
- updated_at: { type: Number, default: Date.now },
35
+ created_at: { type: Number, default: Date.now, required: true },
36
+ updated_at: { type: Number, default: Date.now, required: true },
35
37
  media: {
36
38
  type: [
37
39
  {
@@ -42,12 +44,10 @@ export const MongoDataSchema = new Schema(
42
44
  ],
43
45
  }, // media attachments
44
46
  author: { type: String }, // e.g., author name or ID
45
- replies: { type: Schema.Types.Mixed }, // array of reply texts or IDs
46
- entities: { type: Schema.Types.Mixed }, // array of entities
47
47
  author_username: { type: String }, // e.g., author username
48
+ replies: { type: Schema.Types.Mixed }, // array of reply texts or IDs
48
49
  author_id: { type: String }, // e.g., author ID
49
- pipeline_name: { type: String }, // e.g., 'default_pipeline'
50
- source_region: { type: String }, // e.g., "New York", "California"
50
+ group_id: { type: String }, // e.g., source group ID
51
51
  },
52
52
  {
53
53
  versionKey: false,
@@ -119,7 +119,6 @@ export const MongoSourceSchema = new Schema(
119
119
  required: true,
120
120
  }, // e.g., 'channel', 'group', 'page', 'profile', 'hashtag'
121
121
  public_id: { type: Schema.Types.Mixed }, // e.g., '@telegram_channel_id'
122
- name: { type: String, required: true }, // e.g., 'Telegram Channel Name'
123
122
  url: { type: String }, // e.g., 'https://t.me/telegram_channel_name'
124
123
  description: { type: String }, // e.g., 'A channel about news and updates'
125
124
  language: { type: String },
@@ -147,105 +146,6 @@ export const MongoSourceSchema = new Schema(
147
146
  default: undefined,
148
147
  }, // user who requested this source
149
148
  notes: { type: String }, // internal notes about the source
150
-
151
- },
152
- {
153
- versionKey: false,
154
- toJSON: { virtuals: true },
155
- toObject: { virtuals: true },
156
- }
157
- );
158
-
159
- export const MongoQuerySchema = new Schema(
160
- {
161
- title: { type: String }, // e.g., 'Search Query'
162
- query: { type: String }, // search query includes geo information where it should be extracted with LLM
163
- sources: { type: [String] }, // array of source _id strings
164
- geos: { type: [String], required: true }, // array of geo strings
165
- geos_ids: { type: [String], required: true }, // array of geo _id strings
166
- time_range: {
167
- type: Schema.Types.Mixed, // can be relative or absolute time range
168
- required: true,
169
- },
170
- user_id: { type: String, required: true }, // user identifier
171
- user_instructions: { type: String }, // optional user instructions for the query
172
- user_time_zone: { type: String }, // user's timezone
173
- timestamp: { type: Number, default: Date.now }, // creation date
174
- },
175
- {
176
- versionKey: false,
177
- toJSON: { virtuals: true },
178
- toObject: { virtuals: true },
179
- }
180
- );
181
-
182
- export const MongoUserSettingsSchema = new Schema(
183
- {
184
- sources: { type: [String] }, // array of source _id strings
185
- geos: { type: [String], default: [] }, // array of geo strings
186
- time_range: {
187
- type: Schema.Types.Mixed,
188
- },
189
- user_id: { type: String, required: true },
190
- thread_id: { type: String },
191
- instructions: { type: String }, // optional user instructions for the query
192
- has_jobs_access: { type: Boolean, default: false }, // true if the user has access to jobs
193
- active_jobs_limit: { type: Number, default: 5 }, // maximum number of active jobs allowed
194
- created_at: { type: Date, default: Date.now }, // creation date
195
- updated_at: { type: Date, default: Date.now }, // last update date
196
- status: {
197
- type: String,
198
- enum: generalStatusList,
199
- default: 'active',
200
- }, // status of the user
201
- },
202
- {
203
- versionKey: false,
204
- toJSON: { virtuals: true },
205
- toObject: { virtuals: true },
206
- }
207
- );
208
-
209
- export const MongoConversationSchema = new Schema(
210
- {
211
- user_id: { type: String, required: true }, // user identifier
212
- title: { type: String }, // optional title for the conversation
213
- thread_id: { type: String }, // thread ID for the conversation
214
- status: {
215
- type: String,
216
- enum: generalStatusList,
217
- default: 'active',
218
- }, // status of the conversation
219
- created_at: { type: Date, default: Date.now }, // creation date
220
- updated_at: { type: Date, default: Date.now }, // last update date
221
- is_job: { type: Boolean, default: false }, // true if this is a job conversation
222
- },
223
- {
224
- versionKey: false,
225
- toJSON: { virtuals: true },
226
- toObject: { virtuals: true },
227
- }
228
- );
229
-
230
- export const MongoJobSchema = new Schema(
231
- {
232
- user_id: { type: String, required: true }, // user identifier
233
- job_name: { type: String, required: true }, // name of the automation job
234
- job_description: { type: String }, // description of the job
235
- status: {
236
- type: String,
237
- enum: generalStatusList,
238
- default: 'active',
239
- }, // status of the job
240
- created_at: { type: Number, default: Date.now }, // creation date
241
- updated_at: { type: Number, default: Date.now }, // last update date
242
- schedule: { type: String }, // cron schedule for the job
243
- schedule_text: { type: String }, // cron schedule in free-text format
244
- active_until: { type: Number || undefined }, // optional date until which the job is active
245
- query: { type: Schema.Types.Mixed, required: true }, // query associated with the job
246
- conversation_id: { type: String }, // optional conversation ID for the job
247
- thread_id: { type: String }, // thread ID for the job
248
- time_zone: { type: String }, // optional timezone for the job
249
149
  },
250
150
  {
251
151
  versionKey: false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "harvester_sdk",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "SDK for interacting with the Harvester API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/types.ts CHANGED
@@ -67,7 +67,6 @@ export const zodSourceSchema = z.object({
67
67
  platform: z.enum(platformsList), // e.g., 'telegram', 'facebook' // INDEX
68
68
  entity: z.enum(entityTypesList), // e.g., 'channel', 'group', 'page', 'profile', 'hashtag'
69
69
  public_id: z.string().or(z.number()).optional(), // e.g., '@telegram_channel_id'
70
- name: z.string(), // e.g., 'Telegram Channel Name'
71
70
  url: z.string().url().optional(), // e.g., 'https://t.me/telegram_channel_name'
72
71
  description: z.string().optional(), // e.g., 'A channel about news and updates'
73
72
  language: z.string().optional(),
@@ -114,20 +113,22 @@ export const zodGeoSelectionSchema = z.object({
114
113
  });
115
114
 
116
115
  export const zodDataSchema = z.object({
117
- // Filterable fields
118
- text_geo: z.array(z.string()), // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
119
- timestamp: z.number(), // INDEX - date in milliseconds - e.g., 1751210833000
120
- platform: z.enum(platformsList), // e.g., 'telegram', 'facebook'
121
- source_region: z.string().optional(), // e.g., 'hebron'
122
116
  _id: z.string().optional(),
117
+ timestamp: z.number().optional(), // INDEX - date in milliseconds - e.g., 1751210833000
118
+ platform: z.enum(platformsList).optional(), // e.g., 'telegram', 'facebook'
119
+ source_region_id: z.string().optional(), // e.g., 'hebron'
120
+ source_region_title: z.string().optional(), // e.g., 'hebron'
123
121
  source_id: z.string().or(z.number()), // INDEX - reference to Source _id (e.g., '60c72b2f9b1e8d3f4c8b4567')
124
- source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
125
- source_name: z.string(), // e.g., 'Telegram Channel Name'
126
- platform_id: z.string().or(z.number()), // channel_id reference to Source source_id
127
- original_text_id: z.string().or(z.number()), // message_id
128
- original_text: z.string().optional(), // original text content
129
- translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
130
- language: z.string().optional(),
122
+ source_title: z.string().optional(), // e.g., 'Telegram Channel Name'
123
+ source_url: z.string().url().optional(), // e.g., 'https://t.me/telegram_channel_name'
124
+ data_id: z.string().or(z.number()).optional(), // INDEX - original text ID (e.g., message_id)
125
+ data_geo: z.array(z.string()).optional(), // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
126
+ data_text: z.string().optional(), // processed text content
127
+ data_url: z.string().url().optional(), // original text URL if available
128
+ data_original_type: z.string().optional(), // e.g., 'post', 'comment', 'reply', 'video', 'image'
129
+ data_language: z.string().optional(), // detected language of the text
130
+ data_sentiment: z.string().optional(), // sentiment analysis result
131
+ data_timestamp: z.number().optional(), // original post timestamp if different from ingestion timestamp
131
132
  is_reply: z.boolean().optional(), // true if this text is a reply to another text
132
133
  reply_to_message_id: z.string().or(z.number()).optional(),
133
134
  metadata: z.record(z.string(), z.any()).optional(), // platform-specific fields
@@ -143,20 +144,17 @@ export const zodDataSchema = z.object({
143
144
  )
144
145
  .optional(), // media attachments
145
146
  author: z.string().optional(), // e.g., author name or ID
146
- replies: z.any().optional(), // array of reply texts or IDs
147
- entities: z.any().optional(), // array of reply texts or IDs
148
147
  author_username: z.string().optional(), // e.g., author username
148
+ replies: z.any().optional(), // array of reply texts or IDs
149
149
  author_id: z.string().optional(), // e.g., author ID
150
150
  group_id: z.string().optional(), // e.g., 'default_pipeline'
151
+ // translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
152
+ // entities: z.any().optional(), // array of reply texts or IDs
153
+ // source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
154
+ // platform_id: z.string().or(z.number()), // channel_id reference to Source source_id
155
+ // original_text_id: z.string().or(z.number()), // message_id
151
156
  });
152
157
 
153
- /**
154
- * 'approved' - active and approved sources,
155
- * 'back_to_business' - sources that were paused and now resumed,
156
- * 'pending' - sources that are pending approval,
157
- * 'inactive' - sources that are inactive,
158
- * 'in_review' - sources that are under review
159
- */
160
158
  export type RegionType = z.infer<typeof zodRegionSchema>;
161
159
  export type SourceGroupType = z.infer<typeof zodSourceGroupSchema>;
162
160
  export type SourceType = z.infer<typeof zodSourceSchema>;