harvester_sdk 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -36,11 +36,8 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
36
36
  }, {
37
37
  created_at: number;
38
38
  updated_at: number;
39
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
40
- timestamp: number;
41
- text_geo: string[];
42
39
  source_id: any;
43
- source_name: string;
40
+ data_geo: string[];
44
41
  media: import("mongoose").Types.DocumentArray<{
45
42
  type?: "image" | "video" | "audio" | "link" | null | undefined;
46
43
  url?: string | null | undefined;
@@ -54,31 +51,32 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
54
51
  url?: string | null | undefined;
55
52
  caption?: string | null | undefined;
56
53
  }>;
57
- language?: string | null | undefined;
54
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
58
55
  metadata?: any;
59
- source_region?: string | null | undefined;
60
- source_public_id?: any;
61
- platform_id?: any;
62
- original_text_id?: any;
63
- original_text?: string | null | undefined;
64
- translated_text?: string | null | undefined;
56
+ group_id?: string | null | undefined;
57
+ timestamp?: number | null | undefined;
58
+ source_region_id?: string | null | undefined;
59
+ source_region_title?: string | null | undefined;
60
+ source_title?: string | null | undefined;
61
+ source_url?: string | null | undefined;
62
+ data_id?: any;
63
+ data_text?: string | null | undefined;
64
+ data_url?: string | null | undefined;
65
+ data_original_type?: string | null | undefined;
66
+ data_language?: string | null | undefined;
67
+ data_sentiment?: string | null | undefined;
68
+ data_timestamp?: number | null | undefined;
65
69
  is_reply?: boolean | null | undefined;
66
70
  reply_to_message_id?: any;
67
71
  author?: string | null | undefined;
68
- replies?: any;
69
- entities?: any;
70
72
  author_username?: string | null | undefined;
73
+ replies?: any;
71
74
  author_id?: string | null | undefined;
72
- source_geo?: string | null | undefined;
73
- pipeline_name?: string | null | undefined;
74
75
  }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
75
76
  created_at: number;
76
77
  updated_at: number;
77
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
78
- timestamp: number;
79
- text_geo: string[];
80
78
  source_id: any;
81
- source_name: string;
79
+ data_geo: string[];
82
80
  media: import("mongoose").Types.DocumentArray<{
83
81
  type?: "image" | "video" | "audio" | "link" | null | undefined;
84
82
  url?: string | null | undefined;
@@ -92,31 +90,32 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
92
90
  url?: string | null | undefined;
93
91
  caption?: string | null | undefined;
94
92
  }>;
95
- language?: string | null | undefined;
93
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
96
94
  metadata?: any;
97
- source_region?: string | null | undefined;
98
- source_public_id?: any;
99
- platform_id?: any;
100
- original_text_id?: any;
101
- original_text?: string | null | undefined;
102
- translated_text?: string | null | undefined;
95
+ group_id?: string | null | undefined;
96
+ timestamp?: number | null | undefined;
97
+ source_region_id?: string | null | undefined;
98
+ source_region_title?: string | null | undefined;
99
+ source_title?: string | null | undefined;
100
+ source_url?: string | null | undefined;
101
+ data_id?: any;
102
+ data_text?: string | null | undefined;
103
+ data_url?: string | null | undefined;
104
+ data_original_type?: string | null | undefined;
105
+ data_language?: string | null | undefined;
106
+ data_sentiment?: string | null | undefined;
107
+ data_timestamp?: number | null | undefined;
103
108
  is_reply?: boolean | null | undefined;
104
109
  reply_to_message_id?: any;
105
110
  author?: string | null | undefined;
106
- replies?: any;
107
- entities?: any;
108
111
  author_username?: string | null | undefined;
112
+ replies?: any;
109
113
  author_id?: string | null | undefined;
110
- source_geo?: string | null | undefined;
111
- pipeline_name?: string | null | undefined;
112
114
  }>, {}> & import("mongoose").FlatRecord<{
113
115
  created_at: number;
114
116
  updated_at: number;
115
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
116
- timestamp: number;
117
- text_geo: string[];
118
117
  source_id: any;
119
- source_name: string;
118
+ data_geo: string[];
120
119
  media: import("mongoose").Types.DocumentArray<{
121
120
  type?: "image" | "video" | "audio" | "link" | null | undefined;
122
121
  url?: string | null | undefined;
@@ -130,23 +129,27 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
130
129
  url?: string | null | undefined;
131
130
  caption?: string | null | undefined;
132
131
  }>;
133
- language?: string | null | undefined;
132
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
134
133
  metadata?: any;
135
- source_region?: string | null | undefined;
136
- source_public_id?: any;
137
- platform_id?: any;
138
- original_text_id?: any;
139
- original_text?: string | null | undefined;
140
- translated_text?: string | null | undefined;
134
+ group_id?: string | null | undefined;
135
+ timestamp?: number | null | undefined;
136
+ source_region_id?: string | null | undefined;
137
+ source_region_title?: string | null | undefined;
138
+ source_title?: string | null | undefined;
139
+ source_url?: string | null | undefined;
140
+ data_id?: any;
141
+ data_text?: string | null | undefined;
142
+ data_url?: string | null | undefined;
143
+ data_original_type?: string | null | undefined;
144
+ data_language?: string | null | undefined;
145
+ data_sentiment?: string | null | undefined;
146
+ data_timestamp?: number | null | undefined;
141
147
  is_reply?: boolean | null | undefined;
142
148
  reply_to_message_id?: any;
143
149
  author?: string | null | undefined;
144
- replies?: any;
145
- entities?: any;
146
150
  author_username?: string | null | undefined;
151
+ replies?: any;
147
152
  author_id?: string | null | undefined;
148
- source_geo?: string | null | undefined;
149
- pipeline_name?: string | null | undefined;
150
153
  }> & {
151
154
  _id: import("mongoose").Types.ObjectId;
152
155
  } & {
@@ -320,193 +323,6 @@ export declare const MongoSourceSchema: Schema<any, import("mongoose").Model<any
320
323
  } & {
321
324
  __v: number;
322
325
  }>;
323
- export declare const MongoQuerySchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
324
- versionKey: false;
325
- toJSON: {
326
- virtuals: true;
327
- };
328
- toObject: {
329
- virtuals: true;
330
- };
331
- }, {
332
- user_id: string;
333
- timestamp: number;
334
- geos: string[];
335
- sources: string[];
336
- geos_ids: string[];
337
- time_range: any;
338
- title?: string | null | undefined;
339
- query?: string | null | undefined;
340
- user_instructions?: string | null | undefined;
341
- user_time_zone?: string | null | undefined;
342
- }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
343
- user_id: string;
344
- timestamp: number;
345
- geos: string[];
346
- sources: string[];
347
- geos_ids: string[];
348
- time_range: any;
349
- title?: string | null | undefined;
350
- query?: string | null | undefined;
351
- user_instructions?: string | null | undefined;
352
- user_time_zone?: string | null | undefined;
353
- }>, {}> & import("mongoose").FlatRecord<{
354
- user_id: string;
355
- timestamp: number;
356
- geos: string[];
357
- sources: string[];
358
- geos_ids: string[];
359
- time_range: any;
360
- title?: string | null | undefined;
361
- query?: string | null | undefined;
362
- user_instructions?: string | null | undefined;
363
- user_time_zone?: string | null | undefined;
364
- }> & {
365
- _id: import("mongoose").Types.ObjectId;
366
- } & {
367
- __v: number;
368
- }>;
369
- export declare const MongoUserSettingsSchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
370
- versionKey: false;
371
- toJSON: {
372
- virtuals: true;
373
- };
374
- toObject: {
375
- virtuals: true;
376
- };
377
- }, {
378
- created_at: NativeDate;
379
- updated_at: NativeDate;
380
- status: "active" | "inactive" | "deleted";
381
- user_id: string;
382
- geos: string[];
383
- sources: string[];
384
- has_jobs_access: boolean;
385
- active_jobs_limit: number;
386
- time_range?: any;
387
- thread_id?: string | null | undefined;
388
- instructions?: string | null | undefined;
389
- }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
390
- created_at: NativeDate;
391
- updated_at: NativeDate;
392
- status: "active" | "inactive" | "deleted";
393
- user_id: string;
394
- geos: string[];
395
- sources: string[];
396
- has_jobs_access: boolean;
397
- active_jobs_limit: number;
398
- time_range?: any;
399
- thread_id?: string | null | undefined;
400
- instructions?: string | null | undefined;
401
- }>, {}> & import("mongoose").FlatRecord<{
402
- created_at: NativeDate;
403
- updated_at: NativeDate;
404
- status: "active" | "inactive" | "deleted";
405
- user_id: string;
406
- geos: string[];
407
- sources: string[];
408
- has_jobs_access: boolean;
409
- active_jobs_limit: number;
410
- time_range?: any;
411
- thread_id?: string | null | undefined;
412
- instructions?: string | null | undefined;
413
- }> & {
414
- _id: import("mongoose").Types.ObjectId;
415
- } & {
416
- __v: number;
417
- }>;
418
- export declare const MongoConversationSchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
419
- versionKey: false;
420
- toJSON: {
421
- virtuals: true;
422
- };
423
- toObject: {
424
- virtuals: true;
425
- };
426
- }, {
427
- created_at: NativeDate;
428
- updated_at: NativeDate;
429
- status: "active" | "inactive" | "deleted";
430
- user_id: string;
431
- is_job: boolean;
432
- title?: string | null | undefined;
433
- thread_id?: string | null | undefined;
434
- }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
435
- created_at: NativeDate;
436
- updated_at: NativeDate;
437
- status: "active" | "inactive" | "deleted";
438
- user_id: string;
439
- is_job: boolean;
440
- title?: string | null | undefined;
441
- thread_id?: string | null | undefined;
442
- }>, {}> & import("mongoose").FlatRecord<{
443
- created_at: NativeDate;
444
- updated_at: NativeDate;
445
- status: "active" | "inactive" | "deleted";
446
- user_id: string;
447
- is_job: boolean;
448
- title?: string | null | undefined;
449
- thread_id?: string | null | undefined;
450
- }> & {
451
- _id: import("mongoose").Types.ObjectId;
452
- } & {
453
- __v: number;
454
- }>;
455
- export declare const MongoJobSchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
456
- versionKey: false;
457
- toJSON: {
458
- virtuals: true;
459
- };
460
- toObject: {
461
- virtuals: true;
462
- };
463
- }, {
464
- created_at: number;
465
- updated_at: number;
466
- status: "active" | "inactive" | "deleted";
467
- user_id: string;
468
- query: any;
469
- job_name: string;
470
- thread_id?: string | null | undefined;
471
- job_description?: string | null | undefined;
472
- schedule?: string | null | undefined;
473
- schedule_text?: string | null | undefined;
474
- active_until?: number | null | undefined;
475
- conversation_id?: string | null | undefined;
476
- time_zone?: string | null | undefined;
477
- }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
478
- created_at: number;
479
- updated_at: number;
480
- status: "active" | "inactive" | "deleted";
481
- user_id: string;
482
- query: any;
483
- job_name: string;
484
- thread_id?: string | null | undefined;
485
- job_description?: string | null | undefined;
486
- schedule?: string | null | undefined;
487
- schedule_text?: string | null | undefined;
488
- active_until?: number | null | undefined;
489
- conversation_id?: string | null | undefined;
490
- time_zone?: string | null | undefined;
491
- }>, {}> & import("mongoose").FlatRecord<{
492
- created_at: number;
493
- updated_at: number;
494
- status: "active" | "inactive" | "deleted";
495
- user_id: string;
496
- query: any;
497
- job_name: string;
498
- thread_id?: string | null | undefined;
499
- job_description?: string | null | undefined;
500
- schedule?: string | null | undefined;
501
- schedule_text?: string | null | undefined;
502
- active_until?: number | null | undefined;
503
- conversation_id?: string | null | undefined;
504
- time_zone?: string | null | undefined;
505
- }> & {
506
- _id: import("mongoose").Types.ObjectId;
507
- } & {
508
- __v: number;
509
- }>;
510
326
  export declare const MongoGeoSchema: Schema<any, import("mongoose").Model<any, any, any, any, any, any>, {}, {}, {}, {}, {
511
327
  versionKey: false;
512
328
  toJSON: {
package/dist/index.js CHANGED
@@ -14,33 +14,35 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.MongoApiKeySchema = exports.MongoGeoSelectionSchema = exports.MongoGeoSchema = exports.MongoJobSchema = exports.MongoConversationSchema = exports.MongoUserSettingsSchema = exports.MongoQuerySchema = exports.MongoSourceSchema = exports.MongoSourceGroupSchema = exports.MongoRegionSchema = exports.MongoDataSchema = void 0;
17
+ exports.MongoApiKeySchema = exports.MongoGeoSelectionSchema = exports.MongoGeoSchema = exports.MongoSourceSchema = exports.MongoSourceGroupSchema = exports.MongoRegionSchema = exports.MongoDataSchema = void 0;
18
18
  // Export types for consumers
19
19
  __exportStar(require("./types"), exports);
20
20
  const mongoose_1 = require("mongoose");
21
21
  const types_1 = require("./types");
22
22
  exports.MongoDataSchema = new mongoose_1.Schema({
23
- source_id: { type: mongoose_1.Schema.Types.Mixed, required: true },
24
- source_public_id: { type: mongoose_1.Schema.Types.Mixed },
25
- source_name: { type: String, required: true },
26
- platform_id: { type: mongoose_1.Schema.Types.Mixed },
23
+ timestamp: { type: Number },
27
24
  platform: {
28
25
  type: String,
29
26
  enum: types_1.platformsList,
30
- required: true,
31
27
  },
32
- original_text_id: { type: mongoose_1.Schema.Types.Mixed },
33
- original_text: { type: String },
34
- translated_text: { type: String },
35
- timestamp: { type: Number, required: true },
36
- language: { type: String },
37
- text_geo: { type: [String], default: [] },
38
- source_geo: { type: String },
28
+ source_region_id: { type: String },
29
+ source_region_title: { type: String },
30
+ source_id: { type: mongoose_1.Schema.Types.Mixed, required: true },
31
+ source_title: { type: String },
32
+ source_url: { type: String },
33
+ data_id: { type: mongoose_1.Schema.Types.Mixed },
34
+ data_geo: { type: [String], default: [] },
35
+ data_text: { type: String },
36
+ data_url: { type: String },
37
+ data_original_type: { type: String },
38
+ data_language: { type: String },
39
+ data_sentiment: { type: String },
40
+ data_timestamp: { type: Number },
39
41
  is_reply: { type: Boolean },
40
42
  reply_to_message_id: { type: mongoose_1.Schema.Types.Mixed },
41
43
  metadata: { type: Object },
42
- created_at: { type: Number, default: Date.now },
43
- updated_at: { type: Number, default: Date.now },
44
+ created_at: { type: Number, default: Date.now, required: true },
45
+ updated_at: { type: Number, default: Date.now, required: true },
44
46
  media: {
45
47
  type: [
46
48
  {
@@ -51,12 +53,10 @@ exports.MongoDataSchema = new mongoose_1.Schema({
51
53
  ],
52
54
  },
53
55
  author: { type: String },
54
- replies: { type: mongoose_1.Schema.Types.Mixed },
55
- entities: { type: mongoose_1.Schema.Types.Mixed },
56
56
  author_username: { type: String },
57
+ replies: { type: mongoose_1.Schema.Types.Mixed },
57
58
  author_id: { type: String },
58
- pipeline_name: { type: String },
59
- source_region: { type: String }, // e.g., "New York", "California"
59
+ group_id: { type: String }, // e.g., source group ID
60
60
  }, {
61
61
  versionKey: false,
62
62
  toJSON: { virtuals: true },
@@ -147,88 +147,6 @@ exports.MongoSourceSchema = new mongoose_1.Schema({
147
147
  toJSON: { virtuals: true },
148
148
  toObject: { virtuals: true },
149
149
  });
150
- exports.MongoQuerySchema = new mongoose_1.Schema({
151
- title: { type: String },
152
- query: { type: String },
153
- sources: { type: [String] },
154
- geos: { type: [String], required: true },
155
- geos_ids: { type: [String], required: true },
156
- time_range: {
157
- type: mongoose_1.Schema.Types.Mixed,
158
- required: true,
159
- },
160
- user_id: { type: String, required: true },
161
- user_instructions: { type: String },
162
- user_time_zone: { type: String },
163
- timestamp: { type: Number, default: Date.now }, // creation date
164
- }, {
165
- versionKey: false,
166
- toJSON: { virtuals: true },
167
- toObject: { virtuals: true },
168
- });
169
- exports.MongoUserSettingsSchema = new mongoose_1.Schema({
170
- sources: { type: [String] },
171
- geos: { type: [String], default: [] },
172
- time_range: {
173
- type: mongoose_1.Schema.Types.Mixed,
174
- },
175
- user_id: { type: String, required: true },
176
- thread_id: { type: String },
177
- instructions: { type: String },
178
- has_jobs_access: { type: Boolean, default: false },
179
- active_jobs_limit: { type: Number, default: 5 },
180
- created_at: { type: Date, default: Date.now },
181
- updated_at: { type: Date, default: Date.now },
182
- status: {
183
- type: String,
184
- enum: types_1.generalStatusList,
185
- default: 'active',
186
- }, // status of the user
187
- }, {
188
- versionKey: false,
189
- toJSON: { virtuals: true },
190
- toObject: { virtuals: true },
191
- });
192
- exports.MongoConversationSchema = new mongoose_1.Schema({
193
- user_id: { type: String, required: true },
194
- title: { type: String },
195
- thread_id: { type: String },
196
- status: {
197
- type: String,
198
- enum: types_1.generalStatusList,
199
- default: 'active',
200
- },
201
- created_at: { type: Date, default: Date.now },
202
- updated_at: { type: Date, default: Date.now },
203
- is_job: { type: Boolean, default: false }, // true if this is a job conversation
204
- }, {
205
- versionKey: false,
206
- toJSON: { virtuals: true },
207
- toObject: { virtuals: true },
208
- });
209
- exports.MongoJobSchema = new mongoose_1.Schema({
210
- user_id: { type: String, required: true },
211
- job_name: { type: String, required: true },
212
- job_description: { type: String },
213
- status: {
214
- type: String,
215
- enum: types_1.generalStatusList,
216
- default: 'active',
217
- },
218
- created_at: { type: Number, default: Date.now },
219
- updated_at: { type: Number, default: Date.now },
220
- schedule: { type: String },
221
- schedule_text: { type: String },
222
- active_until: { type: Number || undefined },
223
- query: { type: mongoose_1.Schema.Types.Mixed, required: true },
224
- conversation_id: { type: String },
225
- thread_id: { type: String },
226
- time_zone: { type: String }, // optional timezone for the job
227
- }, {
228
- versionKey: false,
229
- toJSON: { virtuals: true },
230
- toObject: { virtuals: true },
231
- });
232
150
  exports.MongoGeoSchema = new mongoose_1.Schema({
233
151
  geo_text: { type: String },
234
152
  timestamp: { type: Number },
package/dist/types.d.ts CHANGED
@@ -217,19 +217,22 @@ export declare const zodGeoSelectionSchema: z.ZodObject<{
217
217
  geos?: string[] | undefined;
218
218
  }>;
219
219
  export declare const zodDataSchema: z.ZodObject<{
220
- text_geo: z.ZodArray<z.ZodString, "many">;
221
- timestamp: z.ZodNumber;
222
- platform: z.ZodEnum<["telegram", "facebook", "instagram", "tiktok", "website"]>;
223
- source_region: z.ZodOptional<z.ZodString>;
224
220
  _id: z.ZodOptional<z.ZodString>;
221
+ timestamp: z.ZodOptional<z.ZodNumber>;
222
+ platform: z.ZodOptional<z.ZodEnum<["telegram", "facebook", "instagram", "tiktok", "website"]>>;
223
+ source_region_id: z.ZodOptional<z.ZodString>;
224
+ source_region_title: z.ZodOptional<z.ZodString>;
225
225
  source_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
226
- source_public_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
227
- source_name: z.ZodString;
228
- platform_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
229
- original_text_id: z.ZodUnion<[z.ZodString, z.ZodNumber]>;
230
- original_text: z.ZodOptional<z.ZodString>;
231
- translated_text: z.ZodOptional<z.ZodString>;
232
- language: z.ZodOptional<z.ZodString>;
226
+ source_title: z.ZodOptional<z.ZodString>;
227
+ source_url: z.ZodOptional<z.ZodString>;
228
+ data_id: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
229
+ data_geo: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
230
+ data_text: z.ZodOptional<z.ZodString>;
231
+ data_url: z.ZodOptional<z.ZodString>;
232
+ data_original_type: z.ZodOptional<z.ZodString>;
233
+ data_language: z.ZodOptional<z.ZodString>;
234
+ data_sentiment: z.ZodOptional<z.ZodString>;
235
+ data_timestamp: z.ZodOptional<z.ZodNumber>;
233
236
  is_reply: z.ZodOptional<z.ZodBoolean>;
234
237
  reply_to_message_id: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
235
238
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
@@ -249,29 +252,31 @@ export declare const zodDataSchema: z.ZodObject<{
249
252
  caption?: string | undefined;
250
253
  }>, "many">>;
251
254
  author: z.ZodOptional<z.ZodString>;
252
- replies: z.ZodOptional<z.ZodAny>;
253
- entities: z.ZodOptional<z.ZodAny>;
254
255
  author_username: z.ZodOptional<z.ZodString>;
256
+ replies: z.ZodOptional<z.ZodAny>;
255
257
  author_id: z.ZodOptional<z.ZodString>;
256
258
  group_id: z.ZodOptional<z.ZodString>;
257
259
  }, "strip", z.ZodTypeAny, {
258
260
  created_at: number;
259
261
  updated_at: number;
260
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
261
- timestamp: number;
262
- text_geo: string[];
263
262
  source_id: string | number;
264
- source_public_id: string | number;
265
- source_name: string;
266
- platform_id: string | number;
267
- original_text_id: string | number;
268
263
  _id?: string | undefined;
269
- language?: string | undefined;
264
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | undefined;
270
265
  metadata?: Record<string, any> | undefined;
271
266
  group_id?: string | undefined;
272
- source_region?: string | undefined;
273
- original_text?: string | undefined;
274
- translated_text?: string | undefined;
267
+ timestamp?: number | undefined;
268
+ source_region_id?: string | undefined;
269
+ source_region_title?: string | undefined;
270
+ source_title?: string | undefined;
271
+ source_url?: string | undefined;
272
+ data_id?: string | number | undefined;
273
+ data_geo?: string[] | undefined;
274
+ data_text?: string | undefined;
275
+ data_url?: string | undefined;
276
+ data_original_type?: string | undefined;
277
+ data_language?: string | undefined;
278
+ data_sentiment?: string | undefined;
279
+ data_timestamp?: number | undefined;
275
280
  is_reply?: boolean | undefined;
276
281
  reply_to_message_id?: string | number | undefined;
277
282
  media?: {
@@ -280,28 +285,30 @@ export declare const zodDataSchema: z.ZodObject<{
280
285
  caption?: string | undefined;
281
286
  }[] | undefined;
282
287
  author?: string | undefined;
283
- replies?: any;
284
- entities?: any;
285
288
  author_username?: string | undefined;
289
+ replies?: any;
286
290
  author_id?: string | undefined;
287
291
  }, {
288
292
  created_at: number;
289
293
  updated_at: number;
290
- platform: "telegram" | "facebook" | "instagram" | "tiktok" | "website";
291
- timestamp: number;
292
- text_geo: string[];
293
294
  source_id: string | number;
294
- source_public_id: string | number;
295
- source_name: string;
296
- platform_id: string | number;
297
- original_text_id: string | number;
298
295
  _id?: string | undefined;
299
- language?: string | undefined;
296
+ platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | undefined;
300
297
  metadata?: Record<string, any> | undefined;
301
298
  group_id?: string | undefined;
302
- source_region?: string | undefined;
303
- original_text?: string | undefined;
304
- translated_text?: string | undefined;
299
+ timestamp?: number | undefined;
300
+ source_region_id?: string | undefined;
301
+ source_region_title?: string | undefined;
302
+ source_title?: string | undefined;
303
+ source_url?: string | undefined;
304
+ data_id?: string | number | undefined;
305
+ data_geo?: string[] | undefined;
306
+ data_text?: string | undefined;
307
+ data_url?: string | undefined;
308
+ data_original_type?: string | undefined;
309
+ data_language?: string | undefined;
310
+ data_sentiment?: string | undefined;
311
+ data_timestamp?: number | undefined;
305
312
  is_reply?: boolean | undefined;
306
313
  reply_to_message_id?: string | number | undefined;
307
314
  media?: {
@@ -310,18 +317,10 @@ export declare const zodDataSchema: z.ZodObject<{
310
317
  caption?: string | undefined;
311
318
  }[] | undefined;
312
319
  author?: string | undefined;
313
- replies?: any;
314
- entities?: any;
315
320
  author_username?: string | undefined;
321
+ replies?: any;
316
322
  author_id?: string | undefined;
317
323
  }>;
318
- /**
319
- * 'approved' - active and approved sources,
320
- * 'back_to_business' - sources that were paused and now resumed,
321
- * 'pending' - sources that are pending approval,
322
- * 'inactive' - sources that are inactive,
323
- * 'in_review' - sources that are under review
324
- */
325
324
  export type RegionType = z.infer<typeof zodRegionSchema>;
326
325
  export type SourceGroupType = z.infer<typeof zodSourceGroupSchema>;
327
326
  export type SourceType = z.infer<typeof zodSourceSchema>;
package/dist/types.js CHANGED
@@ -105,20 +105,22 @@ exports.zodGeoSelectionSchema = zod_1.z.object({
105
105
  updated_at: zod_1.z.number().optional(), // last update date
106
106
  });
107
107
  exports.zodDataSchema = zod_1.z.object({
108
- // Filterable fields
109
- text_geo: zod_1.z.array(zod_1.z.string()),
110
- timestamp: zod_1.z.number(),
111
- platform: zod_1.z.enum(exports.platformsList),
112
- source_region: zod_1.z.string().optional(),
113
108
  _id: zod_1.z.string().optional(),
109
+ timestamp: zod_1.z.number().optional(),
110
+ platform: zod_1.z.enum(exports.platformsList).optional(),
111
+ source_region_id: zod_1.z.string().optional(),
112
+ source_region_title: zod_1.z.string().optional(),
114
113
  source_id: zod_1.z.string().or(zod_1.z.number()),
115
- source_public_id: zod_1.z.string().or(zod_1.z.number()),
116
- source_name: zod_1.z.string(),
117
- platform_id: zod_1.z.string().or(zod_1.z.number()),
118
- original_text_id: zod_1.z.string().or(zod_1.z.number()),
119
- original_text: zod_1.z.string().optional(),
120
- translated_text: zod_1.z.string().optional(),
121
- language: zod_1.z.string().optional(),
114
+ source_title: zod_1.z.string().optional(),
115
+ source_url: zod_1.z.string().url().optional(),
116
+ data_id: zod_1.z.string().or(zod_1.z.number()).optional(),
117
+ data_geo: zod_1.z.array(zod_1.z.string()).optional(),
118
+ data_text: zod_1.z.string().optional(),
119
+ data_url: zod_1.z.string().url().optional(),
120
+ data_original_type: zod_1.z.string().optional(),
121
+ data_language: zod_1.z.string().optional(),
122
+ data_sentiment: zod_1.z.string().optional(),
123
+ data_timestamp: zod_1.z.number().optional(),
122
124
  is_reply: zod_1.z.boolean().optional(),
123
125
  reply_to_message_id: zod_1.z.string().or(zod_1.z.number()).optional(),
124
126
  metadata: zod_1.z.record(zod_1.z.string(), zod_1.z.any()).optional(),
@@ -132,11 +134,15 @@ exports.zodDataSchema = zod_1.z.object({
132
134
  }))
133
135
  .optional(),
134
136
  author: zod_1.z.string().optional(),
135
- replies: zod_1.z.any().optional(),
136
- entities: zod_1.z.any().optional(),
137
137
  author_username: zod_1.z.string().optional(),
138
+ replies: zod_1.z.any().optional(),
138
139
  author_id: zod_1.z.string().optional(),
139
140
  group_id: zod_1.z.string().optional(), // e.g., 'default_pipeline'
141
+ // translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
142
+ // entities: z.any().optional(), // array of reply texts or IDs
143
+ // source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
144
+ // platform_id: z.string().or(z.number()), // channel_id reference to Source source_id
145
+ // original_text_id: z.string().or(z.number()), // message_id
140
146
  });
141
147
  // Helper function to get allowed entities for a platform
142
148
  const getAllowedEntitiesForPlatform = (platform) => {
package/index.ts CHANGED
@@ -11,27 +11,29 @@ import {
11
11
 
12
12
  export const MongoDataSchema = new Schema(
13
13
  {
14
- source_id: { type: Schema.Types.Mixed, required: true }, // reference to Source _id
15
- source_public_id: { type: Schema.Types.Mixed }, // e.g., 'telegram:1234567890' (message_id)
16
- source_name: { type: String, required: true }, // e.g., 'Telegram Channel Name'
17
- platform_id: { type: Schema.Types.Mixed }, // channel_id reference to Source source_id
14
+ timestamp: { type: Number }, // INDEX - date in milliseconds
18
15
  platform: {
19
16
  type: String,
20
17
  enum: platformsList,
21
- required: true,
22
18
  }, // e.g., 'telegram', 'facebook'
23
- original_text_id: { type: Schema.Types.Mixed }, // message_id
24
- original_text: { type: String }, // original text content
25
- translated_text: { type: String }, // translated text if available
26
- timestamp: { type: Number, required: true }, // date in milliseconds
27
- language: { type: String },
28
- text_geo: { type: [String], default: [] }, // e.g., ["sinjil", "ramallah", "west bank"]
29
- source_geo: { type: String }, // e.g., 'hebron'
19
+ source_region_id: { type: String }, // e.g., 'hebron'
20
+ source_region_title: { type: String }, // e.g., 'hebron'
21
+ source_id: { type: Schema.Types.Mixed, required: true }, // INDEX - reference to Source _id
22
+ source_title: { type: String }, // e.g., 'Telegram Channel Name'
23
+ source_url: { type: String }, // e.g., 'https://t.me/telegram_channel_name'
24
+ data_id: { type: Schema.Types.Mixed }, // INDEX - original text ID (e.g., message_id)
25
+ data_geo: { type: [String], default: [] }, // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
26
+ data_text: { type: String }, // processed text content
27
+ data_url: { type: String }, // original text URL if available
28
+ data_original_type: { type: String }, // e.g., 'post', 'comment', 'reply', 'video', 'image'
29
+ data_language: { type: String }, // detected language of the text
30
+ data_sentiment: { type: String }, // sentiment analysis result
31
+ data_timestamp: { type: Number }, // original post timestamp if different from ingestion timestamp
30
32
  is_reply: { type: Boolean }, // true if this text is a reply to another text
31
33
  reply_to_message_id: { type: Schema.Types.Mixed },
32
34
  metadata: { type: Object }, // platform-specific fields
33
- created_at: { type: Number, default: Date.now },
34
- updated_at: { type: Number, default: Date.now },
35
+ created_at: { type: Number, default: Date.now, required: true },
36
+ updated_at: { type: Number, default: Date.now, required: true },
35
37
  media: {
36
38
  type: [
37
39
  {
@@ -42,12 +44,10 @@ export const MongoDataSchema = new Schema(
42
44
  ],
43
45
  }, // media attachments
44
46
  author: { type: String }, // e.g., author name or ID
45
- replies: { type: Schema.Types.Mixed }, // array of reply texts or IDs
46
- entities: { type: Schema.Types.Mixed }, // array of entities
47
47
  author_username: { type: String }, // e.g., author username
48
+ replies: { type: Schema.Types.Mixed }, // array of reply texts or IDs
48
49
  author_id: { type: String }, // e.g., author ID
49
- pipeline_name: { type: String }, // e.g., 'default_pipeline'
50
- source_region: { type: String }, // e.g., "New York", "California"
50
+ group_id: { type: String }, // e.g., source group ID
51
51
  },
52
52
  {
53
53
  versionKey: false,
@@ -146,105 +146,6 @@ export const MongoSourceSchema = new Schema(
146
146
  default: undefined,
147
147
  }, // user who requested this source
148
148
  notes: { type: String }, // internal notes about the source
149
-
150
- },
151
- {
152
- versionKey: false,
153
- toJSON: { virtuals: true },
154
- toObject: { virtuals: true },
155
- }
156
- );
157
-
158
- export const MongoQuerySchema = new Schema(
159
- {
160
- title: { type: String }, // e.g., 'Search Query'
161
- query: { type: String }, // search query includes geo information where it should be extracted with LLM
162
- sources: { type: [String] }, // array of source _id strings
163
- geos: { type: [String], required: true }, // array of geo strings
164
- geos_ids: { type: [String], required: true }, // array of geo _id strings
165
- time_range: {
166
- type: Schema.Types.Mixed, // can be relative or absolute time range
167
- required: true,
168
- },
169
- user_id: { type: String, required: true }, // user identifier
170
- user_instructions: { type: String }, // optional user instructions for the query
171
- user_time_zone: { type: String }, // user's timezone
172
- timestamp: { type: Number, default: Date.now }, // creation date
173
- },
174
- {
175
- versionKey: false,
176
- toJSON: { virtuals: true },
177
- toObject: { virtuals: true },
178
- }
179
- );
180
-
181
- export const MongoUserSettingsSchema = new Schema(
182
- {
183
- sources: { type: [String] }, // array of source _id strings
184
- geos: { type: [String], default: [] }, // array of geo strings
185
- time_range: {
186
- type: Schema.Types.Mixed,
187
- },
188
- user_id: { type: String, required: true },
189
- thread_id: { type: String },
190
- instructions: { type: String }, // optional user instructions for the query
191
- has_jobs_access: { type: Boolean, default: false }, // true if the user has access to jobs
192
- active_jobs_limit: { type: Number, default: 5 }, // maximum number of active jobs allowed
193
- created_at: { type: Date, default: Date.now }, // creation date
194
- updated_at: { type: Date, default: Date.now }, // last update date
195
- status: {
196
- type: String,
197
- enum: generalStatusList,
198
- default: 'active',
199
- }, // status of the user
200
- },
201
- {
202
- versionKey: false,
203
- toJSON: { virtuals: true },
204
- toObject: { virtuals: true },
205
- }
206
- );
207
-
208
- export const MongoConversationSchema = new Schema(
209
- {
210
- user_id: { type: String, required: true }, // user identifier
211
- title: { type: String }, // optional title for the conversation
212
- thread_id: { type: String }, // thread ID for the conversation
213
- status: {
214
- type: String,
215
- enum: generalStatusList,
216
- default: 'active',
217
- }, // status of the conversation
218
- created_at: { type: Date, default: Date.now }, // creation date
219
- updated_at: { type: Date, default: Date.now }, // last update date
220
- is_job: { type: Boolean, default: false }, // true if this is a job conversation
221
- },
222
- {
223
- versionKey: false,
224
- toJSON: { virtuals: true },
225
- toObject: { virtuals: true },
226
- }
227
- );
228
-
229
- export const MongoJobSchema = new Schema(
230
- {
231
- user_id: { type: String, required: true }, // user identifier
232
- job_name: { type: String, required: true }, // name of the automation job
233
- job_description: { type: String }, // description of the job
234
- status: {
235
- type: String,
236
- enum: generalStatusList,
237
- default: 'active',
238
- }, // status of the job
239
- created_at: { type: Number, default: Date.now }, // creation date
240
- updated_at: { type: Number, default: Date.now }, // last update date
241
- schedule: { type: String }, // cron schedule for the job
242
- schedule_text: { type: String }, // cron schedule in free-text format
243
- active_until: { type: Number || undefined }, // optional date until which the job is active
244
- query: { type: Schema.Types.Mixed, required: true }, // query associated with the job
245
- conversation_id: { type: String }, // optional conversation ID for the job
246
- thread_id: { type: String }, // thread ID for the job
247
- time_zone: { type: String }, // optional timezone for the job
248
149
  },
249
150
  {
250
151
  versionKey: false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "harvester_sdk",
3
- "version": "1.0.6",
3
+ "version": "1.0.7",
4
4
  "description": "SDK for interacting with the Harvester API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/types.ts CHANGED
@@ -113,20 +113,22 @@ export const zodGeoSelectionSchema = z.object({
113
113
  });
114
114
 
115
115
  export const zodDataSchema = z.object({
116
- // Filterable fields
117
- text_geo: z.array(z.string()), // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
118
- timestamp: z.number(), // INDEX - date in milliseconds - e.g., 1751210833000
119
- platform: z.enum(platformsList), // e.g., 'telegram', 'facebook'
120
- source_region: z.string().optional(), // e.g., 'hebron'
121
116
  _id: z.string().optional(),
117
+ timestamp: z.number().optional(), // INDEX - date in milliseconds - e.g., 1751210833000
118
+ platform: z.enum(platformsList).optional(), // e.g., 'telegram', 'facebook'
119
+ source_region_id: z.string().optional(), // e.g., 'hebron'
120
+ source_region_title: z.string().optional(), // e.g., 'hebron'
122
121
  source_id: z.string().or(z.number()), // INDEX - reference to Source _id (e.g., '60c72b2f9b1e8d3f4c8b4567')
123
- source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
124
- source_name: z.string(), // e.g., 'Telegram Channel Name'
125
- platform_id: z.string().or(z.number()), // channel_id reference to Source source_id
126
- original_text_id: z.string().or(z.number()), // message_id
127
- original_text: z.string().optional(), // original text content
128
- translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
129
- language: z.string().optional(),
122
+ source_title: z.string().optional(), // e.g., 'Telegram Channel Name'
123
+ source_url: z.string().url().optional(), // e.g., 'https://t.me/telegram_channel_name'
124
+ data_id: z.string().or(z.number()).optional(), // INDEX - original text ID (e.g., message_id)
125
+ data_geo: z.array(z.string()).optional(), // INDEX - e.g., ["sinjil", "ramallah", "west bank"]
126
+ data_text: z.string().optional(), // processed text content
127
+ data_url: z.string().url().optional(), // original text URL if available
128
+ data_original_type: z.string().optional(), // e.g., 'post', 'comment', 'reply', 'video', 'image'
129
+ data_language: z.string().optional(), // detected language of the text
130
+ data_sentiment: z.string().optional(), // sentiment analysis result
131
+ data_timestamp: z.number().optional(), // original post timestamp if different from ingestion timestamp
130
132
  is_reply: z.boolean().optional(), // true if this text is a reply to another text
131
133
  reply_to_message_id: z.string().or(z.number()).optional(),
132
134
  metadata: z.record(z.string(), z.any()).optional(), // platform-specific fields
@@ -142,20 +144,17 @@ export const zodDataSchema = z.object({
142
144
  )
143
145
  .optional(), // media attachments
144
146
  author: z.string().optional(), // e.g., author name or ID
145
- replies: z.any().optional(), // array of reply texts or IDs
146
- entities: z.any().optional(), // array of reply texts or IDs
147
147
  author_username: z.string().optional(), // e.g., author username
148
+ replies: z.any().optional(), // array of reply texts or IDs
148
149
  author_id: z.string().optional(), // e.g., author ID
149
150
  group_id: z.string().optional(), // e.g., 'default_pipeline'
151
+ // translated_text: z.string().optional(), // translated text if available - most of the time it will be translated to English
152
+ // entities: z.any().optional(), // array of reply texts or IDs
153
+ // source_public_id: z.string().or(z.number()), // e.g., 'telegram:1234567890' (message_id)
154
+ // platform_id: z.string().or(z.number()), // channel_id reference to Source source_id
155
+ // original_text_id: z.string().or(z.number()), // message_id
150
156
  });
151
157
 
152
- /**
153
- * 'approved' - active and approved sources,
154
- * 'back_to_business' - sources that were paused and now resumed,
155
- * 'pending' - sources that are pending approval,
156
- * 'inactive' - sources that are inactive,
157
- * 'in_review' - sources that are under review
158
- */
159
158
  export type RegionType = z.infer<typeof zodRegionSchema>;
160
159
  export type SourceGroupType = z.infer<typeof zodSourceGroupSchema>;
161
160
  export type SourceType = z.infer<typeof zodSourceSchema>;