instasave-sdk 1.2.6 → 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +1 -1
- package/dist/platforms/instagram/extractors/metadata.d.ts +1 -10
- package/dist/platforms/instagram/extractors/metadata.d.ts.map +1 -1
- package/dist/platforms/instagram/extractors/metadata.js +14 -116
- package/dist/platforms/instagram/extractors/metadata.js.map +1 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,38 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to InstaSave SDK will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.2.7] - 2026-01-04
|
|
6
|
+
|
|
7
|
+
### 🐛 Metadata Extraction Fix
|
|
8
|
+
|
|
9
|
+
### Fixed
|
|
10
|
+
- **Reliable metadata extraction** - Now extracts all metadata from `og:description` meta tag
|
|
11
|
+
- **likesCount** - Previously null, now correctly extracted (e.g., 4229)
|
|
12
|
+
- **commentsCount** - Previously null, now correctly extracted (e.g., 55)
|
|
13
|
+
- **caption** - Previously null, now correctly extracted from quoted text
|
|
14
|
+
- **timestamp** - Previously returned "o" (regex bug), now correctly extracted (e.g., "January 2, 2026")
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
- **Extraction method** - Replaced complex JSON script parsing with simple `og:description` parsing
|
|
18
|
+
- **Regex patterns** - Simple, reliable patterns for each metadata field:
|
|
19
|
+
- Likes: `^([\d,]+)\s+likes`
|
|
20
|
+
- Comments: `(\d+[\d,]*)\s+comments`
|
|
21
|
+
- Caption: `"([^&]+)"` or `"([^"]+)"`
|
|
22
|
+
- Timestamp: `on\s+([^:]+):`
|
|
23
|
+
|
|
24
|
+
### Example Output
|
|
25
|
+
```json
|
|
26
|
+
{
|
|
27
|
+
"metadata": {
|
|
28
|
+
"likesCount": 4229,
|
|
29
|
+
"commentsCount": 55,
|
|
30
|
+
"caption": "Made for him",
|
|
31
|
+
"timestamp": "January 2, 2026",
|
|
32
|
+
"location": null
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
|
|
5
37
|
## [1.2.6] - 2026-01-04
|
|
6
38
|
|
|
7
39
|
### 🔧 Configurable Workflows Directory
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# InstaSave SDK
|
|
2
2
|
|
|
3
|
-
[](https://github.com/nykadamec/instasave-sdk/releases/tag/v1.2.7)
|
|
4
4
|
[](https://github.com/nykadamec/instasave-sdk/blob/main/CHANGELOG.md)
|
|
5
5
|
|
|
6
6
|
📸 **Simple Instagram scraper for downloading images from posts**
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Metadata extractor
|
|
2
|
+
* Metadata extractor using og:description
|
|
3
3
|
*/
|
|
4
4
|
import { Page } from 'puppeteer';
|
|
5
5
|
export interface PostMetadata {
|
|
@@ -9,16 +9,7 @@ export interface PostMetadata {
|
|
|
9
9
|
timestamp: string | null;
|
|
10
10
|
location: string | null;
|
|
11
11
|
}
|
|
12
|
-
/**
|
|
13
|
-
* MetadataExtractor s oddělenými metodami pro každý typ metadat
|
|
14
|
-
*/
|
|
15
12
|
export declare class MetadataExtractor {
|
|
16
13
|
extract(page: Page): Promise<PostMetadata>;
|
|
17
|
-
private extractCaption;
|
|
18
|
-
private extractLikesCount;
|
|
19
|
-
private extractCommentsCount;
|
|
20
|
-
private extractTimestamp;
|
|
21
|
-
private extractLocation;
|
|
22
|
-
private formatTimestamp;
|
|
23
14
|
}
|
|
24
15
|
//# sourceMappingURL=metadata.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../../../../src/platforms/instagram/extractors/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../../../../src/platforms/instagram/extractors/metadata.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;CACzB;AAED,qBAAa,iBAAiB;IACtB,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,YAAY,CAAC;CAoBjD"}
|
|
@@ -1,129 +1,27 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
/**
|
|
3
|
-
* Metadata extractor
|
|
3
|
+
* Metadata extractor using og:description
|
|
4
4
|
*/
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.MetadataExtractor = void 0;
|
|
7
|
-
const common_1 = require("./common");
|
|
8
|
-
const constants_1 = require("../constants");
|
|
9
|
-
/**
|
|
10
|
-
* MetadataExtractor s oddělenými metodami pro každý typ metadat
|
|
11
|
-
*/
|
|
12
7
|
class MetadataExtractor {
|
|
13
8
|
async extract(page) {
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
]);
|
|
9
|
+
const description = await page.$eval('meta[property="og:description"]', el => el.getAttribute('content') || '').catch(() => null);
|
|
10
|
+
if (!description) {
|
|
11
|
+
return { likesCount: null, commentsCount: null, caption: null, timestamp: null, location: null };
|
|
12
|
+
}
|
|
13
|
+
const likesMatch = description.match(/^([\d,]+)\s+likes/);
|
|
14
|
+
const commentsMatch = description.match(/(\d+[\d,]*)\s+comments/);
|
|
15
|
+
const captionMatch = description.match(/"([^&]+)"/) || description.match(/"([^"]+)"/);
|
|
16
|
+
const timestampMatch = description.match(/on\s+([^:]+):/);
|
|
21
17
|
return {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
timestamp,
|
|
26
|
-
location
|
|
18
|
+
likesCount: likesMatch ? parseInt(likesMatch[1].replace(/,/g, ''), 10) : null,
|
|
19
|
+
commentsCount: commentsMatch ? parseInt(commentsMatch[1].replace(/,/g, ''), 10) : null,
|
|
20
|
+
caption: captionMatch ? captionMatch[1].trim() : null,
|
|
21
|
+
timestamp: timestampMatch ? timestampMatch[1].trim() : null,
|
|
22
|
+
location: null
|
|
27
23
|
};
|
|
28
24
|
}
|
|
29
|
-
async extractCaption(page) {
|
|
30
|
-
return await (0, common_1.findInJsonScripts)(page, (obj) => {
|
|
31
|
-
if (obj.edge_media_to_caption?.edges?.[0]?.node?.text) {
|
|
32
|
-
return obj.edge_media_to_caption.edges[0].node.text;
|
|
33
|
-
}
|
|
34
|
-
if (obj.caption?.text) {
|
|
35
|
-
return obj.caption.text;
|
|
36
|
-
}
|
|
37
|
-
if (obj.accessibility_caption) {
|
|
38
|
-
return obj.accessibility_caption;
|
|
39
|
-
}
|
|
40
|
-
return null;
|
|
41
|
-
});
|
|
42
|
-
}
|
|
43
|
-
async extractLikesCount(page) {
|
|
44
|
-
return await (0, common_1.findInJsonScripts)(page, (obj) => {
|
|
45
|
-
if (obj.edge_media_preview_like?.count !== undefined) {
|
|
46
|
-
return obj.edge_media_preview_like.count;
|
|
47
|
-
}
|
|
48
|
-
if (obj.like_count !== undefined) {
|
|
49
|
-
return obj.like_count;
|
|
50
|
-
}
|
|
51
|
-
if (obj.edge_liked_by?.count !== undefined) {
|
|
52
|
-
return obj.edge_liked_by.count;
|
|
53
|
-
}
|
|
54
|
-
return null;
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
async extractCommentsCount(page) {
|
|
58
|
-
return await (0, common_1.findInJsonScripts)(page, (obj) => {
|
|
59
|
-
if (obj.edge_media_to_comment?.count !== undefined) {
|
|
60
|
-
return obj.edge_media_to_comment.count;
|
|
61
|
-
}
|
|
62
|
-
if (obj.comment_count !== undefined) {
|
|
63
|
-
return obj.comment_count;
|
|
64
|
-
}
|
|
65
|
-
if (obj.edge_media_to_parent_comment?.count !== undefined) {
|
|
66
|
-
return obj.edge_media_to_parent_comment.count;
|
|
67
|
-
}
|
|
68
|
-
return null;
|
|
69
|
-
});
|
|
70
|
-
}
|
|
71
|
-
async extractTimestamp(page) {
|
|
72
|
-
// Nejdřív zkusit najít relativní čas v DOM
|
|
73
|
-
const relativeTime = await page.evaluate((timeRegex) => {
|
|
74
|
-
const allElements = document.querySelectorAll('*');
|
|
75
|
-
for (const element of allElements) {
|
|
76
|
-
const text = element.textContent?.trim();
|
|
77
|
-
if (text?.match(timeRegex)) {
|
|
78
|
-
return text.match(timeRegex)?.[0] || null;
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
return null;
|
|
82
|
-
}, constants_1.REGEX_PATTERNS.TIME_AGO);
|
|
83
|
-
if (relativeTime)
|
|
84
|
-
return relativeTime;
|
|
85
|
-
// Fallback na absolutní čas z JSON
|
|
86
|
-
return await (0, common_1.findInJsonScripts)(page, (obj) => {
|
|
87
|
-
if (obj.taken_at_timestamp) {
|
|
88
|
-
return this.formatTimestamp(obj.taken_at_timestamp);
|
|
89
|
-
}
|
|
90
|
-
if (obj.created_time) {
|
|
91
|
-
return this.formatTimestamp(obj.created_time);
|
|
92
|
-
}
|
|
93
|
-
return null;
|
|
94
|
-
});
|
|
95
|
-
}
|
|
96
|
-
async extractLocation(page) {
|
|
97
|
-
return await (0, common_1.findInJsonScripts)(page, (obj) => {
|
|
98
|
-
if (obj.location?.name) {
|
|
99
|
-
return obj.location.name;
|
|
100
|
-
}
|
|
101
|
-
if (obj.venue?.name) {
|
|
102
|
-
return obj.venue.name;
|
|
103
|
-
}
|
|
104
|
-
if (obj.place?.name) {
|
|
105
|
-
return obj.place.name;
|
|
106
|
-
}
|
|
107
|
-
return null;
|
|
108
|
-
});
|
|
109
|
-
}
|
|
110
|
-
formatTimestamp(timestamp) {
|
|
111
|
-
const date = new Date(timestamp * 1000);
|
|
112
|
-
const now = new Date();
|
|
113
|
-
const diffMs = now.getTime() - date.getTime();
|
|
114
|
-
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
|
|
115
|
-
const diffDays = Math.floor(diffHours / 24);
|
|
116
|
-
if (diffDays > 0) {
|
|
117
|
-
return `${diffDays} day${diffDays > 1 ? 's' : ''} ago`;
|
|
118
|
-
}
|
|
119
|
-
else if (diffHours > 0) {
|
|
120
|
-
return `${diffHours} hour${diffHours > 1 ? 's' : ''} ago`;
|
|
121
|
-
}
|
|
122
|
-
else {
|
|
123
|
-
const diffMins = Math.floor(diffMs / (1000 * 60));
|
|
124
|
-
return `${diffMins} minute${diffMins > 1 ? 's' : ''} ago`;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
25
|
}
|
|
128
26
|
exports.MetadataExtractor = MetadataExtractor;
|
|
129
27
|
//# sourceMappingURL=metadata.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../../../src/platforms/instagram/extractors/metadata.ts"],"names":[],"mappings":";AAAA;;GAEG;;;
|
|
1
|
+
{"version":3,"file":"metadata.js","sourceRoot":"","sources":["../../../../src/platforms/instagram/extractors/metadata.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAYH,MAAa,iBAAiB;IAC5B,KAAK,CAAC,OAAO,CAAC,IAAU;QACtB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,iCAAiC,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;QAElI,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;QACnG,CAAC;QAED,MAAM,UAAU,GAAG,WAAW,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAC1D,MAAM,aAAa,GAAG,WAAW,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAClE,MAAM,YAAY,GAAG,WAAW,CAAC,KAAK,CAAC,qBAAqB,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAChG,MAAM,cAAc,GAAG,WAAW,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;QAE1D,OAAO;YACL,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI;YAC7E,aAAa,EAAE,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI;YACtF,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI;YACrD,SAAS,EAAE,cAAc,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI;YAC3D,QAAQ,EAAE,IAAI;SACf,CAAC;IACJ,CAAC;CACF;AArBD,8CAqBC"}
|