harvester_sdk 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -36,123 +36,279 @@ export declare const MongoDataSchema: Schema<any, import("mongoose").Model<any,
36
36
  }, {
37
37
  created_at: number;
38
38
  updated_at: number;
39
- source_dominant_geos: string[];
39
+ hashtags: string[];
40
+ mentions: string[];
40
41
  source_id: any;
42
+ source_dominant_geos: string[];
41
43
  data_geo: string[];
44
+ data_topics: string[];
45
+ data_keywords: string[];
42
46
  media: import("mongoose").Types.DocumentArray<{
43
- type?: "image" | "video" | "audio" | "link" | null | undefined;
47
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
44
48
  url?: string | null | undefined;
49
+ duration?: number | null | undefined;
45
50
  caption?: string | null | undefined;
51
+ thumbnail_url?: string | null | undefined;
52
+ width?: number | null | undefined;
53
+ height?: number | null | undefined;
54
+ size?: number | null | undefined;
55
+ mime_type?: string | null | undefined;
46
56
  }, import("mongoose").Types.Subdocument<import("mongoose").Types.ObjectId, any, {
47
- type?: "image" | "video" | "audio" | "link" | null | undefined;
57
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
48
58
  url?: string | null | undefined;
59
+ duration?: number | null | undefined;
49
60
  caption?: string | null | undefined;
61
+ thumbnail_url?: string | null | undefined;
62
+ width?: number | null | undefined;
63
+ height?: number | null | undefined;
64
+ size?: number | null | undefined;
65
+ mime_type?: string | null | undefined;
50
66
  }> & {
51
- type?: "image" | "video" | "audio" | "link" | null | undefined;
67
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
52
68
  url?: string | null | undefined;
69
+ duration?: number | null | undefined;
53
70
  caption?: string | null | undefined;
71
+ thumbnail_url?: string | null | undefined;
72
+ width?: number | null | undefined;
73
+ height?: number | null | undefined;
74
+ size?: number | null | undefined;
75
+ mime_type?: string | null | undefined;
54
76
  }>;
77
+ is_reply: boolean;
55
78
  platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
56
79
  metadata?: any;
57
80
  timestamp?: number | null | undefined;
58
- source_region_id?: string | null | undefined;
59
- source_region_title?: string | null | undefined;
81
+ is_pinned?: boolean | null | undefined;
82
+ is_edited?: boolean | null | undefined;
83
+ edit_date?: number | null | undefined;
84
+ forwarded_from_id?: string | null | undefined;
85
+ forwarded_date?: number | null | undefined;
86
+ author?: string | null | undefined;
87
+ data_timestamp?: number | null | undefined;
60
88
  source_title?: string | null | undefined;
61
89
  source_url?: string | null | undefined;
90
+ source_region_id?: string | null | undefined;
91
+ source_region_title?: string | null | undefined;
62
92
  source_group_id?: string | null | undefined;
63
93
  data_id?: any;
64
- data_text?: string | null | undefined;
65
94
  data_url?: string | null | undefined;
66
- data_original_type?: string | null | undefined;
95
+ data_original_type?: "image" | "video" | "link" | "document" | "post" | "comment" | "reply" | "photo" | "story" | "reel" | "article" | null | undefined;
96
+ data_text?: string | null | undefined;
67
97
  data_language?: string | null | undefined;
68
- data_sentiment?: string | null | undefined;
69
- data_timestamp?: number | null | undefined;
70
- is_reply?: boolean | null | undefined;
71
- reply_to_message_id?: any;
72
- author?: string | null | undefined;
98
+ data_sentiment?: "positive" | "negative" | "neutral" | "mixed" | null | undefined;
73
99
  author_username?: string | null | undefined;
74
- replies?: any;
75
100
  author_id?: string | null | undefined;
101
+ author_info?: {
102
+ username?: string | null | undefined;
103
+ id?: string | null | undefined;
104
+ display_name?: string | null | undefined;
105
+ avatar_url?: string | null | undefined;
106
+ is_verified?: boolean | null | undefined;
107
+ follower_count?: number | null | undefined;
108
+ } | null | undefined;
109
+ reply_to_message_id?: any;
110
+ reply_to_author_id?: string | null | undefined;
111
+ replies?: any;
112
+ replies_info?: {
113
+ count: number;
114
+ recent_repliers: string[];
115
+ has_thread?: boolean | null | undefined;
116
+ thread_id?: string | null | undefined;
117
+ } | null | undefined;
118
+ engagement?: {
119
+ views?: number | null | undefined;
120
+ reactions?: Map<string, number> | null | undefined;
121
+ likes?: number | null | undefined;
122
+ shares?: number | null | undefined;
123
+ comments?: number | null | undefined;
124
+ } | null | undefined;
125
+ is_deleted?: boolean | null | undefined;
126
+ is_forwarded?: boolean | null | undefined;
127
+ processing_errors?: string | null | undefined;
76
128
  }, import("mongoose").Document<unknown, {}, import("mongoose").FlatRecord<{
77
129
  created_at: number;
78
130
  updated_at: number;
79
- source_dominant_geos: string[];
131
+ hashtags: string[];
132
+ mentions: string[];
80
133
  source_id: any;
134
+ source_dominant_geos: string[];
81
135
  data_geo: string[];
136
+ data_topics: string[];
137
+ data_keywords: string[];
82
138
  media: import("mongoose").Types.DocumentArray<{
83
- type?: "image" | "video" | "audio" | "link" | null | undefined;
139
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
84
140
  url?: string | null | undefined;
141
+ duration?: number | null | undefined;
85
142
  caption?: string | null | undefined;
143
+ thumbnail_url?: string | null | undefined;
144
+ width?: number | null | undefined;
145
+ height?: number | null | undefined;
146
+ size?: number | null | undefined;
147
+ mime_type?: string | null | undefined;
86
148
  }, import("mongoose").Types.Subdocument<import("mongoose").Types.ObjectId, any, {
87
- type?: "image" | "video" | "audio" | "link" | null | undefined;
149
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
88
150
  url?: string | null | undefined;
151
+ duration?: number | null | undefined;
89
152
  caption?: string | null | undefined;
153
+ thumbnail_url?: string | null | undefined;
154
+ width?: number | null | undefined;
155
+ height?: number | null | undefined;
156
+ size?: number | null | undefined;
157
+ mime_type?: string | null | undefined;
90
158
  }> & {
91
- type?: "image" | "video" | "audio" | "link" | null | undefined;
159
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
92
160
  url?: string | null | undefined;
161
+ duration?: number | null | undefined;
93
162
  caption?: string | null | undefined;
163
+ thumbnail_url?: string | null | undefined;
164
+ width?: number | null | undefined;
165
+ height?: number | null | undefined;
166
+ size?: number | null | undefined;
167
+ mime_type?: string | null | undefined;
94
168
  }>;
169
+ is_reply: boolean;
95
170
  platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
96
171
  metadata?: any;
97
172
  timestamp?: number | null | undefined;
98
- source_region_id?: string | null | undefined;
99
- source_region_title?: string | null | undefined;
173
+ is_pinned?: boolean | null | undefined;
174
+ is_edited?: boolean | null | undefined;
175
+ edit_date?: number | null | undefined;
176
+ forwarded_from_id?: string | null | undefined;
177
+ forwarded_date?: number | null | undefined;
178
+ author?: string | null | undefined;
179
+ data_timestamp?: number | null | undefined;
100
180
  source_title?: string | null | undefined;
101
181
  source_url?: string | null | undefined;
182
+ source_region_id?: string | null | undefined;
183
+ source_region_title?: string | null | undefined;
102
184
  source_group_id?: string | null | undefined;
103
185
  data_id?: any;
104
- data_text?: string | null | undefined;
105
186
  data_url?: string | null | undefined;
106
- data_original_type?: string | null | undefined;
187
+ data_original_type?: "image" | "video" | "link" | "document" | "post" | "comment" | "reply" | "photo" | "story" | "reel" | "article" | null | undefined;
188
+ data_text?: string | null | undefined;
107
189
  data_language?: string | null | undefined;
108
- data_sentiment?: string | null | undefined;
109
- data_timestamp?: number | null | undefined;
110
- is_reply?: boolean | null | undefined;
111
- reply_to_message_id?: any;
112
- author?: string | null | undefined;
190
+ data_sentiment?: "positive" | "negative" | "neutral" | "mixed" | null | undefined;
113
191
  author_username?: string | null | undefined;
114
- replies?: any;
115
192
  author_id?: string | null | undefined;
193
+ author_info?: {
194
+ username?: string | null | undefined;
195
+ id?: string | null | undefined;
196
+ display_name?: string | null | undefined;
197
+ avatar_url?: string | null | undefined;
198
+ is_verified?: boolean | null | undefined;
199
+ follower_count?: number | null | undefined;
200
+ } | null | undefined;
201
+ reply_to_message_id?: any;
202
+ reply_to_author_id?: string | null | undefined;
203
+ replies?: any;
204
+ replies_info?: {
205
+ count: number;
206
+ recent_repliers: string[];
207
+ has_thread?: boolean | null | undefined;
208
+ thread_id?: string | null | undefined;
209
+ } | null | undefined;
210
+ engagement?: {
211
+ views?: number | null | undefined;
212
+ reactions?: Map<string, number> | null | undefined;
213
+ likes?: number | null | undefined;
214
+ shares?: number | null | undefined;
215
+ comments?: number | null | undefined;
216
+ } | null | undefined;
217
+ is_deleted?: boolean | null | undefined;
218
+ is_forwarded?: boolean | null | undefined;
219
+ processing_errors?: string | null | undefined;
116
220
  }>, {}> & import("mongoose").FlatRecord<{
117
221
  created_at: number;
118
222
  updated_at: number;
119
- source_dominant_geos: string[];
223
+ hashtags: string[];
224
+ mentions: string[];
120
225
  source_id: any;
226
+ source_dominant_geos: string[];
121
227
  data_geo: string[];
228
+ data_topics: string[];
229
+ data_keywords: string[];
122
230
  media: import("mongoose").Types.DocumentArray<{
123
- type?: "image" | "video" | "audio" | "link" | null | undefined;
231
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
124
232
  url?: string | null | undefined;
233
+ duration?: number | null | undefined;
125
234
  caption?: string | null | undefined;
235
+ thumbnail_url?: string | null | undefined;
236
+ width?: number | null | undefined;
237
+ height?: number | null | undefined;
238
+ size?: number | null | undefined;
239
+ mime_type?: string | null | undefined;
126
240
  }, import("mongoose").Types.Subdocument<import("mongoose").Types.ObjectId, any, {
127
- type?: "image" | "video" | "audio" | "link" | null | undefined;
241
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
128
242
  url?: string | null | undefined;
243
+ duration?: number | null | undefined;
129
244
  caption?: string | null | undefined;
245
+ thumbnail_url?: string | null | undefined;
246
+ width?: number | null | undefined;
247
+ height?: number | null | undefined;
248
+ size?: number | null | undefined;
249
+ mime_type?: string | null | undefined;
130
250
  }> & {
131
- type?: "image" | "video" | "audio" | "link" | null | undefined;
251
+ type?: "image" | "video" | "audio" | "link" | "document" | "gif" | "sticker" | null | undefined;
132
252
  url?: string | null | undefined;
253
+ duration?: number | null | undefined;
133
254
  caption?: string | null | undefined;
255
+ thumbnail_url?: string | null | undefined;
256
+ width?: number | null | undefined;
257
+ height?: number | null | undefined;
258
+ size?: number | null | undefined;
259
+ mime_type?: string | null | undefined;
134
260
  }>;
261
+ is_reply: boolean;
135
262
  platform?: "telegram" | "facebook" | "instagram" | "tiktok" | "website" | null | undefined;
136
263
  metadata?: any;
137
264
  timestamp?: number | null | undefined;
138
- source_region_id?: string | null | undefined;
139
- source_region_title?: string | null | undefined;
265
+ is_pinned?: boolean | null | undefined;
266
+ is_edited?: boolean | null | undefined;
267
+ edit_date?: number | null | undefined;
268
+ forwarded_from_id?: string | null | undefined;
269
+ forwarded_date?: number | null | undefined;
270
+ author?: string | null | undefined;
271
+ data_timestamp?: number | null | undefined;
140
272
  source_title?: string | null | undefined;
141
273
  source_url?: string | null | undefined;
274
+ source_region_id?: string | null | undefined;
275
+ source_region_title?: string | null | undefined;
142
276
  source_group_id?: string | null | undefined;
143
277
  data_id?: any;
144
- data_text?: string | null | undefined;
145
278
  data_url?: string | null | undefined;
146
- data_original_type?: string | null | undefined;
279
+ data_original_type?: "image" | "video" | "link" | "document" | "post" | "comment" | "reply" | "photo" | "story" | "reel" | "article" | null | undefined;
280
+ data_text?: string | null | undefined;
147
281
  data_language?: string | null | undefined;
148
- data_sentiment?: string | null | undefined;
149
- data_timestamp?: number | null | undefined;
150
- is_reply?: boolean | null | undefined;
151
- reply_to_message_id?: any;
152
- author?: string | null | undefined;
282
+ data_sentiment?: "positive" | "negative" | "neutral" | "mixed" | null | undefined;
153
283
  author_username?: string | null | undefined;
154
- replies?: any;
155
284
  author_id?: string | null | undefined;
285
+ author_info?: {
286
+ username?: string | null | undefined;
287
+ id?: string | null | undefined;
288
+ display_name?: string | null | undefined;
289
+ avatar_url?: string | null | undefined;
290
+ is_verified?: boolean | null | undefined;
291
+ follower_count?: number | null | undefined;
292
+ } | null | undefined;
293
+ reply_to_message_id?: any;
294
+ reply_to_author_id?: string | null | undefined;
295
+ replies?: any;
296
+ replies_info?: {
297
+ count: number;
298
+ recent_repliers: string[];
299
+ has_thread?: boolean | null | undefined;
300
+ thread_id?: string | null | undefined;
301
+ } | null | undefined;
302
+ engagement?: {
303
+ views?: number | null | undefined;
304
+ reactions?: Map<string, number> | null | undefined;
305
+ likes?: number | null | undefined;
306
+ shares?: number | null | undefined;
307
+ comments?: number | null | undefined;
308
+ } | null | undefined;
309
+ is_deleted?: boolean | null | undefined;
310
+ is_forwarded?: boolean | null | undefined;
311
+ processing_errors?: string | null | undefined;
156
312
  }> & {
157
313
  _id: import("mongoose").Types.ObjectId;
158
314
  } & {
package/dist/index.js CHANGED
@@ -20,44 +20,134 @@ __exportStar(require("./types"), exports);
20
20
  const mongoose_1 = require("mongoose");
21
21
  const types_1 = require("./types");
22
22
  exports.MongoDataSchema = new mongoose_1.Schema({
23
+ // Timestamps
23
24
  timestamp: { type: Number },
25
+ data_timestamp: { type: Number },
26
+ created_at: { type: Number, default: Date.now, required: true },
27
+ updated_at: { type: Number, default: Date.now, required: true },
28
+ // Platform & Source Info (denormalized for query performance)
24
29
  platform: {
25
30
  type: String,
26
31
  enum: types_1.platformsList,
27
32
  },
28
- source_region_id: { type: String },
29
- source_region_title: { type: String },
30
- source_dominant_geos: { type: [String], default: [] },
31
33
  source_id: { type: mongoose_1.Schema.Types.Mixed, required: true },
32
34
  source_title: { type: String },
33
35
  source_url: { type: String },
36
+ source_region_id: { type: String },
37
+ source_region_title: { type: String },
34
38
  source_group_id: { type: String },
39
+ source_dominant_geos: { type: [String], default: [] },
40
+ // Data/Post Identifiers
35
41
  data_id: { type: mongoose_1.Schema.Types.Mixed },
36
- data_geo: { type: [String], default: [] },
37
- data_text: { type: String },
38
42
  data_url: { type: String },
39
- data_original_type: { type: String },
43
+ data_original_type: {
44
+ type: String,
45
+ enum: [
46
+ 'post',
47
+ 'comment',
48
+ 'reply',
49
+ 'video',
50
+ 'image',
51
+ 'photo',
52
+ 'story',
53
+ 'reel',
54
+ 'article',
55
+ 'link',
56
+ 'document',
57
+ ],
58
+ },
59
+ // Content
60
+ data_text: { type: String },
40
61
  data_language: { type: String },
41
- data_sentiment: { type: String },
42
- data_timestamp: { type: Number },
43
- is_reply: { type: Boolean },
44
- reply_to_message_id: { type: mongoose_1.Schema.Types.Mixed },
45
- metadata: { type: Object },
46
- created_at: { type: Number, default: Date.now, required: true },
47
- updated_at: { type: Number, default: Date.now, required: true },
62
+ data_geo: { type: [String], default: [] },
63
+ // Analysis (populated by processors)
64
+ data_sentiment: {
65
+ type: String,
66
+ enum: ['positive', 'negative', 'neutral', 'mixed'],
67
+ },
68
+ data_topics: { type: [String], default: [] },
69
+ data_keywords: { type: [String], default: [] },
70
+ // Media attachments
48
71
  media: {
49
72
  type: [
50
73
  {
51
- type: { type: String, enum: ['image', 'video', 'audio', 'link'] },
74
+ type: {
75
+ type: String,
76
+ enum: [
77
+ 'image',
78
+ 'video',
79
+ 'audio',
80
+ 'link',
81
+ 'document',
82
+ 'gif',
83
+ 'sticker',
84
+ ],
85
+ },
52
86
  url: { type: String },
53
87
  caption: { type: String },
88
+ thumbnail_url: { type: String },
89
+ width: { type: Number },
90
+ height: { type: Number },
91
+ duration: { type: Number },
92
+ size: { type: Number },
93
+ mime_type: { type: String },
54
94
  },
55
95
  ],
96
+ default: [],
56
97
  },
98
+ // Author information (legacy fields for backward compatibility)
57
99
  author: { type: String },
58
100
  author_username: { type: String },
101
+ author_id: { type: String },
102
+ // NEW - structured author info
103
+ author_info: {
104
+ type: {
105
+ id: { type: String },
106
+ username: { type: String },
107
+ display_name: { type: String },
108
+ avatar_url: { type: String },
109
+ is_verified: { type: Boolean },
110
+ follower_count: { type: Number },
111
+ },
112
+ },
113
+ // Reply/Thread information
114
+ is_reply: { type: Boolean, default: false },
115
+ reply_to_message_id: { type: mongoose_1.Schema.Types.Mixed },
116
+ reply_to_author_id: { type: String },
59
117
  replies: { type: mongoose_1.Schema.Types.Mixed },
60
- author_id: { type: String }, // e.g., author ID
118
+ // NEW - structured replies info
119
+ replies_info: {
120
+ type: {
121
+ count: { type: Number, default: 0 },
122
+ recent_repliers: { type: [String] },
123
+ has_thread: { type: Boolean },
124
+ thread_id: { type: String },
125
+ },
126
+ },
127
+ // Engagement metrics (platform-specific)
128
+ engagement: {
129
+ type: {
130
+ views: { type: Number },
131
+ likes: { type: Number },
132
+ shares: { type: Number },
133
+ comments: { type: Number },
134
+ reactions: { type: Map, of: Number }, // e.g., { "like": 10, "love": 5 }
135
+ },
136
+ },
137
+ // Content flags
138
+ is_edited: { type: Boolean },
139
+ edit_date: { type: Number },
140
+ is_pinned: { type: Boolean },
141
+ is_deleted: { type: Boolean },
142
+ is_forwarded: { type: Boolean },
143
+ forwarded_from_id: { type: String },
144
+ forwarded_date: { type: Number },
145
+ // Hashtags and mentions (extracted for easier querying)
146
+ hashtags: { type: [String], default: [] },
147
+ mentions: { type: [String], default: [] },
148
+ // Platform-specific metadata (flexible)
149
+ metadata: { type: Object },
150
+ processing_errors: { type: String },
61
151
  }, {
62
152
  versionKey: false,
63
153
  toJSON: { virtuals: true },