instagram-archiver 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of instagram-archiver might be problematic. Click here for more details.

@@ -0,0 +1,18 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 instagram-archiver authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
6
+ associated documentation files (the "Software"), to deal in the Software without restriction,
7
+ including without limitation the rights to use, copy, modify, merge, publish, distribute,
8
+ sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or
12
+ substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
15
+ NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
18
+ OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.3
2
+ Name: instagram-archiver
3
+ Version: 0.3.0
4
+ Summary: Save Instagram content you have access to.
5
+ License: MIT
6
+ Keywords: command line,instagram
7
+ Author: Andrew Udvare
8
+ Author-email: audvare@gmail.com
9
+ Requires-Python: >=3.12,<3.14
10
+ Classifier: Development Status :: 2 - Pre-Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Typing :: Typed
17
+ Requires-Dist: beautifulsoup4 (>=4.13.4,<5.0.0)
18
+ Requires-Dist: click (>=8.2.0,<9.0.0)
19
+ Requires-Dist: colorlog (>=6.9.0,<7.0.0)
20
+ Requires-Dist: html5lib (>=1.1,<2.0)
21
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
22
+ Requires-Dist: typing-extensions (>=4.13.1,<5.0.0)
23
+ Requires-Dist: yt-dlp-utils (>=0,<1)
24
+ Project-URL: Documentation, https://instagram-archiver.readthedocs.org
25
+ Project-URL: Homepage, https://tatsh.github.io/instagram-archiver/
26
+ Project-URL: Issues, https://github.com/Tatsh/instagram-archiver/issues
27
+ Project-URL: Repository, https://github.com/Tatsh/instagram-archiver
28
+ Description-Content-Type: text/markdown
29
+
30
+ # instagram-archiver
31
+
32
+ [![Python versions](https://img.shields.io/pypi/pyversions/instagram-archiver.svg?color=blue&logo=python&logoColor=white)](https://www.python.org/)
33
+ [![PyPI - Version](https://img.shields.io/pypi/v/instagram-archiver)](https://pypi.org/project/instagram-archiver/)
34
+ [![GitHub tag (with filter)](https://img.shields.io/github/v/tag/Tatsh/instagram-archiver)](https://github.com/Tatsh/instagram-archiver/tags)
35
+ [![License](https://img.shields.io/github/license/Tatsh/instagram-archiver)](https://github.com/Tatsh/instagram-archiver/blob/master/LICENSE.txt)
36
+ [![GitHub commits since latest release (by SemVer including pre-releases)](https://img.shields.io/github/commits-since/Tatsh/instagram-archiver/v0.3.0/master)](https://github.com/Tatsh/instagram-archiver/compare/v0.3.0...master)
37
+ [![QA](https://github.com/Tatsh/instagram-archiver/actions/workflows/qa.yml/badge.svg)](https://github.com/Tatsh/instagram-archiver/actions/workflows/qa.yml)
38
+ [![Tests](https://github.com/Tatsh/instagram-archiver/actions/workflows/tests.yml/badge.svg)](https://github.com/Tatsh/instagram-archiver/actions/workflows/tests.yml)
39
+ [![Coverage Status](https://coveralls.io/repos/github/Tatsh/instagram-archiver/badge.svg?branch=master)](https://coveralls.io/github/Tatsh/instagram-archiver?branch=master)
40
+ [![Documentation Status](https://readthedocs.org/projects/instagram-archiver/badge/?version=latest)](https://instagram-archiver.readthedocs.org/?badge=latest)
41
+ [![mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
42
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
43
+ [![pydocstyle](https://img.shields.io/badge/pydocstyle-enabled-AD4CD3)](http://www.pydocstyle.org/en/stable/)
44
+ [![pytest](https://img.shields.io/badge/pytest-zz?logo=Pytest&labelColor=black&color=black)](https://docs.pytest.org/en/stable/)
45
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
46
+ [![Downloads](https://static.pepy.tech/badge/instagram-archiver/month)](https://pepy.tech/project/instagram-archiver)
47
+ [![Stargazers](https://img.shields.io/github/stars/Tatsh/instagram-archiver?logo=github&style=flat)](https://github.com/Tatsh/instagram-archiver/stargazers)
48
+
49
+ [![@Tatsh](https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fpublic.api.bsky.app%2Fxrpc%2Fapp.bsky.actor.getProfile%2F%3Factor%3Ddid%3Aplc%3Auq42idtvuccnmtl57nsucz72%26query%3D%24.followersCount%26style%3Dsocial%26logo%3Dbluesky%26label%3DFollow%2520%40Tatsh&query=%24.followersCount&style=social&logo=bluesky&label=Follow%20%40Tatsh)](https://bsky.app/profile/Tatsh.bsky.social)
50
+ [![Mastodon Follow](https://img.shields.io/mastodon/follow/109370961877277568?domain=hostux.social&style=social)](https://hostux.social/@Tatsh)
51
+
52
+ Save Instagram content you have access to.
53
+
54
+ ## Installation
55
+
56
+ ### Poetry
57
+
58
+ ```shell
59
+ poetry add instagram-archiver
60
+ ```
61
+
62
+ ### Pip
63
+
64
+ ```shell
65
+ pip install instagram-archiver
66
+ ```
67
+
68
+ ## Usage
69
+
70
+ ```plain
71
+ Usage: instagram-archiver [OPTIONS] USERNAME
72
+
73
+ Archive a profile's posts.
74
+
75
+ Options:
76
+ -o, --output-dir DIRECTORY Output directory.
77
+ -b, --browser [brave|chrome|chromium|edge|opera|vivaldi|firefox|safari]
78
+ Browser to read cookies from.
79
+ -p, --profile TEXT Browser profile.
80
+ -d, --debug Enable debug output.
81
+ --no-log Ignore log (re-fetch everything).
82
+ -C, --include-comments Also download all comments (extends download
83
+ time significantly).
84
+ -h, --help Show this message and exit.
85
+ ```
86
+
87
+ Typical use:
88
+
89
+ ```shell
90
+ instagram-archiver -o ~/instagram-backups/username username
91
+ ```
92
+
93
+ ### `instagram-save-saved`
94
+
95
+ This tool saves your saved posts (at `www.instagram.com/username/saved/all-posts`).
96
+
97
+ ```plain
98
+ Usage: instagram-save-saved [OPTIONS]
99
+
100
+ Archive your saved posts.
101
+
102
+ Options:
103
+ -o, --output-dir DIRECTORY Output directory.
104
+ -b, --browser [brave|chrome|chromium|edge|opera|vivaldi|firefox|safari]
105
+ Browser to read cookies from.
106
+ -p, --profile TEXT Browser profile.
107
+ -d, --debug Enable debug output.
108
+ -C, --include-comments Also download all comments (extends download
109
+ time significantly).
110
+ -u, --unsave Unsave posts after successful archive.
111
+ -h, --help Show this message and exit.
112
+ ```
113
+
114
+ ## Notes
115
+
116
+ The default output path is the username under the current working directory.
117
+
118
+ Videos are saved using yt-dlp and its respective configuration.
119
+
@@ -0,0 +1,89 @@
1
+ # instagram-archiver
2
+
3
+ [![Python versions](https://img.shields.io/pypi/pyversions/instagram-archiver.svg?color=blue&logo=python&logoColor=white)](https://www.python.org/)
4
+ [![PyPI - Version](https://img.shields.io/pypi/v/instagram-archiver)](https://pypi.org/project/instagram-archiver/)
5
+ [![GitHub tag (with filter)](https://img.shields.io/github/v/tag/Tatsh/instagram-archiver)](https://github.com/Tatsh/instagram-archiver/tags)
6
+ [![License](https://img.shields.io/github/license/Tatsh/instagram-archiver)](https://github.com/Tatsh/instagram-archiver/blob/master/LICENSE.txt)
7
+ [![GitHub commits since latest release (by SemVer including pre-releases)](https://img.shields.io/github/commits-since/Tatsh/instagram-archiver/v0.3.0/master)](https://github.com/Tatsh/instagram-archiver/compare/v0.3.0...master)
8
+ [![QA](https://github.com/Tatsh/instagram-archiver/actions/workflows/qa.yml/badge.svg)](https://github.com/Tatsh/instagram-archiver/actions/workflows/qa.yml)
9
+ [![Tests](https://github.com/Tatsh/instagram-archiver/actions/workflows/tests.yml/badge.svg)](https://github.com/Tatsh/instagram-archiver/actions/workflows/tests.yml)
10
+ [![Coverage Status](https://coveralls.io/repos/github/Tatsh/instagram-archiver/badge.svg?branch=master)](https://coveralls.io/github/Tatsh/instagram-archiver?branch=master)
11
+ [![Documentation Status](https://readthedocs.org/projects/instagram-archiver/badge/?version=latest)](https://instagram-archiver.readthedocs.org/?badge=latest)
12
+ [![mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
13
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
14
+ [![pydocstyle](https://img.shields.io/badge/pydocstyle-enabled-AD4CD3)](http://www.pydocstyle.org/en/stable/)
15
+ [![pytest](https://img.shields.io/badge/pytest-zz?logo=Pytest&labelColor=black&color=black)](https://docs.pytest.org/en/stable/)
16
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
17
+ [![Downloads](https://static.pepy.tech/badge/instagram-archiver/month)](https://pepy.tech/project/instagram-archiver)
18
+ [![Stargazers](https://img.shields.io/github/stars/Tatsh/instagram-archiver?logo=github&style=flat)](https://github.com/Tatsh/instagram-archiver/stargazers)
19
+
20
+ [![@Tatsh](https://img.shields.io/badge/dynamic/json?url=https%3A%2F%2Fpublic.api.bsky.app%2Fxrpc%2Fapp.bsky.actor.getProfile%2F%3Factor%3Ddid%3Aplc%3Auq42idtvuccnmtl57nsucz72%26query%3D%24.followersCount%26style%3Dsocial%26logo%3Dbluesky%26label%3DFollow%2520%40Tatsh&query=%24.followersCount&style=social&logo=bluesky&label=Follow%20%40Tatsh)](https://bsky.app/profile/Tatsh.bsky.social)
21
+ [![Mastodon Follow](https://img.shields.io/mastodon/follow/109370961877277568?domain=hostux.social&style=social)](https://hostux.social/@Tatsh)
22
+
23
+ Save Instagram content you have access to.
24
+
25
+ ## Installation
26
+
27
+ ### Poetry
28
+
29
+ ```shell
30
+ poetry add instagram-archiver
31
+ ```
32
+
33
+ ### Pip
34
+
35
+ ```shell
36
+ pip install instagram-archiver
37
+ ```
38
+
39
+ ## Usage
40
+
41
+ ```plain
42
+ Usage: instagram-archiver [OPTIONS] USERNAME
43
+
44
+ Archive a profile's posts.
45
+
46
+ Options:
47
+ -o, --output-dir DIRECTORY Output directory.
48
+ -b, --browser [brave|chrome|chromium|edge|opera|vivaldi|firefox|safari]
49
+ Browser to read cookies from.
50
+ -p, --profile TEXT Browser profile.
51
+ -d, --debug Enable debug output.
52
+ --no-log Ignore log (re-fetch everything).
53
+ -C, --include-comments Also download all comments (extends download
54
+ time significantly).
55
+ -h, --help Show this message and exit.
56
+ ```
57
+
58
+ Typical use:
59
+
60
+ ```shell
61
+ instagram-archiver -o ~/instagram-backups/username username
62
+ ```
63
+
64
+ ### `instagram-save-saved`
65
+
66
+ This tool saves your saved posts (at `www.instagram.com/username/saved/all-posts`).
67
+
68
+ ```plain
69
+ Usage: instagram-save-saved [OPTIONS]
70
+
71
+ Archive your saved posts.
72
+
73
+ Options:
74
+ -o, --output-dir DIRECTORY Output directory.
75
+ -b, --browser [brave|chrome|chromium|edge|opera|vivaldi|firefox|safari]
76
+ Browser to read cookies from.
77
+ -p, --profile TEXT Browser profile.
78
+ -d, --debug Enable debug output.
79
+ -C, --include-comments Also download all comments (extends download
80
+ time significantly).
81
+ -u, --unsave Unsave posts after successful archive.
82
+ -h, --help Show this message and exit.
83
+ ```
84
+
85
+ ## Notes
86
+
87
+ The default output path is the username under the current working directory.
88
+
89
+ Videos are saved using yt-dlp and its respective configuration.
@@ -0,0 +1,9 @@
1
+ """Instagram archiver."""
2
+ from __future__ import annotations
3
+
4
+ from .client import InstagramClient
5
+ from .profile_scraper import ProfileScraper
6
+ from .saved_scraper import SavedScraper
7
+
8
+ __all__ = ('InstagramClient', 'ProfileScraper', 'SavedScraper')
9
+ __version__ = 'v0.3.0'
@@ -0,0 +1,6 @@
1
+ """Entry point for ``python -m`` invocation."""
2
+ from __future__ import annotations
3
+
4
+ from .main import main
5
+
6
+ main()
@@ -0,0 +1,268 @@
1
+ """Generic client."""
2
+ from __future__ import annotations
3
+
4
+ from http import HTTPStatus
5
+ from os import utime
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any, Self, TypeVar, cast
8
+ import json
9
+ import logging
10
+
11
+ from bs4 import BeautifulSoup as Soup
12
+ from requests import HTTPError
13
+ from yt_dlp_utils import setup_session
14
+ import requests
15
+
16
+ from .constants import API_HEADERS, PAGE_FETCH_HEADERS, SHARED_HEADERS
17
+ from .typing import (
18
+ CarouselMedia,
19
+ Comments,
20
+ Edge,
21
+ HighlightsTray,
22
+ MediaInfo,
23
+ MediaInfoItem,
24
+ MediaInfoItemImageVersions2Candidate,
25
+ )
26
+ from .utils import get_extension, json_dumps_formatted, write_if_new
27
+
28
+ if TYPE_CHECKING:
29
+ from collections.abc import Iterable, Mapping
30
+ from types import TracebackType
31
+
32
+ from .typing import BrowserName
33
+
34
+ __all__ = ('CSRFTokenNotFound', 'InstagramClient')
35
+
36
+ T = TypeVar('T')
37
+ log = logging.getLogger(__name__)
38
+
39
+
40
+ class CSRFTokenNotFound(RuntimeError):
41
+ """CSRF token not found in cookies."""
42
+
43
+
44
+ class InstagramClient:
45
+ """Generic client for Instagram."""
46
+ def __init__(self, browser: BrowserName = 'chrome', browser_profile: str = 'Default') -> None:
47
+ """
48
+ Initialise the client.
49
+
50
+ Parameters
51
+ ----------
52
+ browser : str
53
+ The browser to use.
54
+
55
+ browser_profile : str
56
+ The browser profile to use.
57
+ """
58
+ self.session = setup_session(browser,
59
+ browser_profile,
60
+ SHARED_HEADERS,
61
+ domains={'instagram.com'},
62
+ setup_retry=True,
63
+ status_forcelist=(413, 429, 500, 502, 503, 504))
64
+ self.failed_urls: set[str] = set()
65
+ """Set of failed URLs."""
66
+ self.video_urls: list[str] = []
67
+ """List of video URLs to download."""
68
+
69
+ def add_video_url(self, url: str) -> None:
70
+ """Add a video URL to the list of video URLs."""
71
+ log.info('Added video URL: %s', url)
72
+ self.video_urls.append(url)
73
+
74
+ def add_csrf_token_header(self) -> None:
75
+ """
76
+ Add CSRF token header to the session.
77
+
78
+ Raises
79
+ ------
80
+ CSRFTokenNotFound
81
+ If the CSRF token is not found in the cookies.
82
+ """
83
+ token = self.session.cookies.get('csrftoken')
84
+ if not token:
85
+ raise CSRFTokenNotFound
86
+ self.session.headers.update({'x-csrftoken': token})
87
+
88
+ def graphql_query(self,
89
+ variables: Mapping[str, Any],
90
+ *,
91
+ cast_to: type[T],
92
+ doc_id: str = '9806959572732215') -> T | None:
93
+ """Make a GraphQL query."""
94
+ with self.session.post('https://www.instagram.com/graphql/query',
95
+ headers={
96
+ 'content-type': 'application/x-www-form-urlencoded',
97
+ } | API_HEADERS,
98
+ data={
99
+ 'doc_id': doc_id,
100
+ 'variables': json.dumps(variables, separators=(',', ':'))
101
+ }) as r:
102
+ if r.status_code != HTTPStatus.OK:
103
+ return None
104
+ data = r.json()
105
+ assert isinstance(data, dict)
106
+ if (status := data.get('status')) != 'ok':
107
+ log.error('GraphQL status not "ok": %s', status)
108
+ return None
109
+ if data.get('errors'):
110
+ log.warning('Response has errors.')
111
+ log.debug('Response: %s', json.dumps(data, indent=2))
112
+ if not data.get('data'):
113
+ log.error('No data in response.')
114
+ return cast('T', data['data'])
115
+
116
+ def get_text(self, url: str, *, params: Mapping[str, str] | None = None) -> str:
117
+ """Get text from a URL."""
118
+ with self.session.get(url, params=params, headers=API_HEADERS) as r:
119
+ r.raise_for_status()
120
+ return r.text
121
+
122
+ def highlights_tray(self, user_id: int | str) -> HighlightsTray:
123
+ """Get the highlights tray data for a user."""
124
+ return self.get_json(
125
+ f'https://i.instagram.com/api/v1/highlights/{user_id}/highlights_tray/',
126
+ cast_to=HighlightsTray)
127
+
128
+ def __enter__(self) -> Self: # pragma: no cover
129
+ """Recommended way to initialise the client."""
130
+ return self
131
+
132
+ def __exit__(self, _: type[BaseException] | None, __: BaseException | None,
133
+ ___: TracebackType | None) -> None:
134
+ """Clean up."""
135
+
136
+ def is_saved(self, url: str) -> bool: # pragma: no cover
137
+ """Check if a URL is already saved."""
138
+ return False
139
+
140
+ def save_to_log(self, url: str) -> None:
141
+ """Save a URL to the log."""
142
+
143
+ def save_image_versions2(self, sub_item: CarouselMedia | MediaInfoItem, timestamp: int) -> None:
144
+ """Save images in the image_versions2 dictionary."""
145
+ def key(x: MediaInfoItemImageVersions2Candidate) -> int:
146
+ return x['width'] * x['height']
147
+
148
+ best = max(sub_item['image_versions2']['candidates'], key=key)
149
+ if self.is_saved(best['url']):
150
+ return
151
+ r = self.session.head(best['url'])
152
+ if r.status_code != HTTPStatus.OK:
153
+ log.warning('HEAD request failed with status code %s.', r.status_code)
154
+ return
155
+ ext = get_extension(r.headers['content-type'])
156
+ name = f'{sub_item["id"]}.{ext}'
157
+ with Path(name).open('wb') as f:
158
+ f.writelines(self.session.get(best['url'], stream=True).iter_content(chunk_size=512))
159
+ utime(name, (timestamp, timestamp))
160
+ self.save_to_log(r.url)
161
+
162
+ def save_comments(self, edge: Edge) -> None:
163
+ """Save comments for an edge node."""
164
+ comment_url = ('https://www.instagram.com/api/v1/media/'
165
+ f'{edge["node"]["id"]}/comments/')
166
+ shared_params = {'can_support_threading': 'true'}
167
+ try:
168
+ comment_data = self.get_json(comment_url,
169
+ params={
170
+ **shared_params, 'permalink_enabled': 'false'
171
+ },
172
+ cast_to=Comments)
173
+ except HTTPError:
174
+ log.exception('Failed to get comments.')
175
+ return
176
+ top_comment_data: Any = comment_data
177
+ while comment_data['can_view_more_preview_comments'] and comment_data['next_min_id']:
178
+ try:
179
+ comment_data = self.get_json(comment_url,
180
+ params={
181
+ **shared_params,
182
+ 'min_id':
183
+ comment_data['next_min_id'],
184
+ },
185
+ cast_to=Comments)
186
+ except HTTPError:
187
+ log.exception('Failed to get comments.')
188
+ break
189
+ top_comment_data['comments'] = (list(top_comment_data['comments']) +
190
+ list(comment_data['comments']))
191
+ comments_json = f'{edge["node"]["id"]}-comments.json'
192
+ with Path(comments_json).open('w+', encoding='utf-8') as f:
193
+ json.dump(top_comment_data, f, sort_keys=True, indent=2)
194
+
195
+ def save_media(self, edge: Edge) -> None:
196
+ """Save media for an edge node."""
197
+ log.info('Saving media at URL: https://www.instagram.com/p/%s', edge['node']['code'])
198
+ media_info_url = f'https://www.instagram.com/p/{edge["node"]["code"]}/'
199
+ if self.is_saved(media_info_url):
200
+ return
201
+ r = self.session.get(media_info_url, headers=PAGE_FETCH_HEADERS)
202
+ if r.status_code != HTTPStatus.OK:
203
+ log.warning('GET request failed with status code %s.', r.status_code)
204
+ return
205
+ if 'image_versions2' not in r.text or 'taken_at' not in r.text:
206
+ log.warning('Invalid response. image_versions2 dict not found.')
207
+ return
208
+ soup = Soup(r.text, 'html5lib')
209
+ media_info_embedded = next(
210
+ json.loads(s) for s in (''.join(
211
+ getattr(c, 'text', '') for c in getattr(script, 'contents', ''))
212
+ for script in soup.select('script[type="application/json"]'))
213
+ if 'image_versions2' in s and 'taken_at' in s)
214
+ media_info: MediaInfo = (
215
+ media_info_embedded['require'][0][3][0]['__bbox']['require'][0][3][1]['__bbox']
216
+ ['result']['data']['xdt_api__v1__media__shortcode__web_info'])
217
+ timestamp = media_info['items'][0]['taken_at']
218
+ id_json_file = f'{edge["node"]["id"]}.json'
219
+ media_info_json_file = f'{edge["node"]["id"]}-media-info-0000.json'
220
+ write_if_new(id_json_file, str(json_dumps_formatted(edge['node'])))
221
+ write_if_new(media_info_json_file, str(json_dumps_formatted(media_info)))
222
+ for file in (id_json_file, media_info_json_file):
223
+ utime(file, (timestamp, timestamp))
224
+ self.save_to_log(media_info_url)
225
+ for item in media_info['items']:
226
+ timestamp = item['taken_at']
227
+ if (carousel_media := item.get('carousel_media')):
228
+ for sub_item in carousel_media:
229
+ self.save_image_versions2(sub_item, timestamp)
230
+ elif 'image_versions2' in item:
231
+ self.save_image_versions2(item, timestamp)
232
+
233
+ def save_edges(self, edges: Iterable[Edge], parent_edge: Edge | None = None) -> None:
234
+ """Save edge node media."""
235
+ for edge in edges:
236
+ if edge['node']['__typename'] == 'XDTMediaDict':
237
+ try:
238
+ shortcode = edge['node']['code']
239
+ except KeyError:
240
+ if parent_edge:
241
+ try:
242
+ shortcode = parent_edge['node']['code']
243
+ except KeyError:
244
+ log.exception('Unknown shortcode.')
245
+ return
246
+ else:
247
+ log.exception('Unknown shortcode.')
248
+ if edge['node'].get('video_dash_manifest'):
249
+ self.add_video_url(f'https://www.instagram.com/p/{shortcode}')
250
+ else:
251
+ try:
252
+ self.save_comments(edge)
253
+ self.save_media(edge)
254
+ except requests.exceptions.RetryError:
255
+ log.exception('Retries exhausted.')
256
+ return
257
+ else:
258
+ log.warning( # type: ignore[unreachable]
259
+ 'Unknown type: `%s`. Item %s will not be processed.',
260
+ edge['node']['__typename'], edge['node']['id'])
261
+ shortcode = edge['node']['code']
262
+ self.failed_urls.add(f'https://www.instagram.com/p/{shortcode}')
263
+
264
+ def get_json(self, url: str, *, cast_to: type[T], params: Mapping[str, str] | None = None) -> T:
265
+ """Get JSON data from a URL."""
266
+ with self.session.get(url, params=params, headers=API_HEADERS) as r:
267
+ r.raise_for_status()
268
+ return cast('T', r.json())
@@ -0,0 +1,63 @@
1
+ """Constants."""
2
+ from __future__ import annotations
3
+
4
+ __all__ = ('API_HEADERS', 'BROWSER_CHOICES', 'PAGE_FETCH_HEADERS', 'SHARED_HEADERS', 'USER_AGENT')
5
+
6
+ USER_AGENT = ('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
7
+ 'Chrome/137.0.0.0 Safari/537.36')
8
+ """
9
+ User agent.
10
+
11
+ :meta hide-value:
12
+ """
13
+ SHARED_HEADERS = {
14
+ 'accept': '*/*',
15
+ 'authority': 'www.instagram.com',
16
+ 'cache-control': 'no-cache',
17
+ 'dnt': '1',
18
+ 'pragma': 'no-cache',
19
+ 'user-agent': USER_AGENT,
20
+ # 'x-asbd-id': '359341',
21
+ # 'x-ig-app-id': '936619743392459',
22
+ }
23
+ """
24
+ Headers to use for requests.
25
+
26
+ :meta hide-value:
27
+ """
28
+ API_HEADERS = {
29
+ 'x-asbd-id': '359341',
30
+ 'x-ig-app-id': '936619743392459',
31
+ }
32
+ """
33
+ Headers to use for API requests.
34
+
35
+ :meta hide-value:
36
+ """
37
+ PAGE_FETCH_HEADERS = {
38
+ 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,'
39
+ 'image/apng,*/*;q=0.8',
40
+ 'dpr': '1.5',
41
+ 'sec-fetch-mode': 'navigate', # Definitely required.
42
+ 'viewport-width': '3840',
43
+ }
44
+ """
45
+ Headers to use for fetching HTML pages.
46
+
47
+ :meta hide-value:
48
+ """
49
+ LOG_SCHEMA = """CREATE TABLE log (
50
+ url TEXT PRIMARY KEY NOT NULL,
51
+ date TEXT DEFAULT CURRENT_TIMESTAMP NOT NULL
52
+ );"""
53
+ """
54
+ Schema for log database.
55
+
56
+ :meta hide-value:
57
+ """
58
+ BROWSER_CHOICES = ('brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'firefox', 'safari')
59
+ """
60
+ Possible browser choices to get cookies from.
61
+
62
+ :meta hide-value:
63
+ """