instapaper-scraper 1.1.0__py3-none-any.whl → 1.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- instapaper_scraper/api.py +14 -34
- instapaper_scraper/auth.py +4 -5
- instapaper_scraper/cli.py +6 -6
- instapaper_scraper/output.py +4 -4
- {instapaper_scraper-1.1.0.dist-info → instapaper_scraper-1.1.0rc1.dist-info}/METADATA +47 -106
- instapaper_scraper-1.1.0rc1.dist-info/RECORD +13 -0
- instapaper_scraper-1.1.0.dist-info/RECORD +0 -13
- {instapaper_scraper-1.1.0.dist-info → instapaper_scraper-1.1.0rc1.dist-info}/WHEEL +0 -0
- {instapaper_scraper-1.1.0.dist-info → instapaper_scraper-1.1.0rc1.dist-info}/entry_points.txt +0 -0
- {instapaper_scraper-1.1.0.dist-info → instapaper_scraper-1.1.0rc1.dist-info}/licenses/LICENSE +0 -0
- {instapaper_scraper-1.1.0.dist-info → instapaper_scraper-1.1.0rc1.dist-info}/top_level.txt +0 -0
instapaper_scraper/api.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import logging
|
|
3
3
|
import time
|
|
4
|
-
from typing import List, Dict, Tuple, Optional
|
|
4
|
+
from typing import List, Dict, Tuple, Optional
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
7
|
from bs4 import BeautifulSoup
|
|
8
|
-
from bs4.element import Tag
|
|
9
8
|
|
|
10
9
|
from .exceptions import ScraperStructureChanged
|
|
11
10
|
from .constants import INSTAPAPER_BASE_URL, KEY_ID, KEY_TITLE, KEY_URL
|
|
@@ -124,28 +123,14 @@ class InstapaperClient:
|
|
|
124
123
|
soup = BeautifulSoup(response.text, self.HTML_PARSER)
|
|
125
124
|
|
|
126
125
|
article_list = soup.find(id=self.ARTICLE_LIST_ID)
|
|
127
|
-
if not
|
|
126
|
+
if not article_list:
|
|
128
127
|
raise ScraperStructureChanged(self.MSG_ARTICLE_LIST_NOT_FOUND)
|
|
129
128
|
|
|
130
129
|
articles = article_list.find_all(self.ARTICLE_TAG)
|
|
131
|
-
article_ids = [
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
article_id_val = article.get(KEY_ID)
|
|
136
|
-
|
|
137
|
-
# Ensure article_id_val is a string before calling replace
|
|
138
|
-
# If it's a list, take the first element. This is a pragmatic
|
|
139
|
-
# approach since 'id' attributes should ideally be unique strings.
|
|
140
|
-
if isinstance(article_id_val, list):
|
|
141
|
-
article_id_val = article_id_val[0] if article_id_val else None
|
|
142
|
-
|
|
143
|
-
if isinstance(article_id_val, str) and article_id_val.startswith(
|
|
144
|
-
self.ARTICLE_ID_PREFIX
|
|
145
|
-
):
|
|
146
|
-
article_ids.append(
|
|
147
|
-
article_id_val.replace(self.ARTICLE_ID_PREFIX, "")
|
|
148
|
-
)
|
|
130
|
+
article_ids = [
|
|
131
|
+
article[KEY_ID].replace(self.ARTICLE_ID_PREFIX, "")
|
|
132
|
+
for article in articles
|
|
133
|
+
]
|
|
149
134
|
|
|
150
135
|
data = self._parse_article_data(soup, article_ids, page)
|
|
151
136
|
has_more = soup.find(class_=self.PAGINATE_OLDER_CLASS) is not None
|
|
@@ -218,14 +203,14 @@ class InstapaperClient:
|
|
|
218
203
|
|
|
219
204
|
def _parse_article_data(
|
|
220
205
|
self, soup: BeautifulSoup, article_ids: List[str], page: int
|
|
221
|
-
) -> List[Dict[str,
|
|
206
|
+
) -> List[Dict[str, str]]:
|
|
222
207
|
"""Parses the raw HTML to extract structured data for each article."""
|
|
223
208
|
data = []
|
|
224
209
|
for article_id in article_ids:
|
|
225
210
|
article_id_full = f"{self.ARTICLE_ID_PREFIX}{article_id}"
|
|
226
211
|
article_element = soup.find(id=article_id_full)
|
|
227
212
|
try:
|
|
228
|
-
if not
|
|
213
|
+
if not article_element:
|
|
229
214
|
raise AttributeError(
|
|
230
215
|
self.MSG_ARTICLE_ELEMENT_NOT_FOUND.format(
|
|
231
216
|
article_id_full=article_id_full
|
|
@@ -233,19 +218,14 @@ class InstapaperClient:
|
|
|
233
218
|
)
|
|
234
219
|
|
|
235
220
|
title_element = article_element.find(class_=self.ARTICLE_TITLE_CLASS)
|
|
236
|
-
if not
|
|
221
|
+
if not title_element:
|
|
237
222
|
raise AttributeError(self.MSG_TITLE_ELEMENT_NOT_FOUND)
|
|
238
223
|
title = title_element.get_text().strip()
|
|
239
224
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
link_element = meta_element.find("a")
|
|
245
|
-
if (
|
|
246
|
-
not isinstance(link_element, Tag)
|
|
247
|
-
or "href" not in link_element.attrs
|
|
248
|
-
):
|
|
225
|
+
link_element = article_element.find(class_=self.TITLE_META_CLASS).find(
|
|
226
|
+
"a"
|
|
227
|
+
)
|
|
228
|
+
if not link_element or "href" not in link_element.attrs:
|
|
249
229
|
raise AttributeError(self.MSG_LINK_ELEMENT_NOT_FOUND)
|
|
250
230
|
link = link_element["href"]
|
|
251
231
|
|
|
@@ -301,7 +281,7 @@ class InstapaperClient:
|
|
|
301
281
|
)
|
|
302
282
|
return False
|
|
303
283
|
|
|
304
|
-
def _wait_for_retry(self, attempt: int, reason: str)
|
|
284
|
+
def _wait_for_retry(self, attempt: int, reason: str):
|
|
305
285
|
"""Calculates and waits for an exponential backoff period."""
|
|
306
286
|
sleep_time = self.backoff_factor * (2**attempt)
|
|
307
287
|
logging.warning(
|
instapaper_scraper/auth.py
CHANGED
|
@@ -3,7 +3,7 @@ import getpass
|
|
|
3
3
|
import logging
|
|
4
4
|
import stat
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Union
|
|
6
|
+
from typing import Union
|
|
7
7
|
|
|
8
8
|
from cryptography.fernet import Fernet
|
|
9
9
|
import requests
|
|
@@ -67,12 +67,11 @@ class InstapaperAuthenticator:
|
|
|
67
67
|
session: requests.Session,
|
|
68
68
|
session_file: Union[str, Path],
|
|
69
69
|
key_file: Union[str, Path],
|
|
70
|
-
username:
|
|
71
|
-
password:
|
|
70
|
+
username: str = None,
|
|
71
|
+
password: str = None,
|
|
72
72
|
):
|
|
73
73
|
self.session = session
|
|
74
74
|
self.session_file = Path(session_file)
|
|
75
|
-
self.key_file = Path(key_file)
|
|
76
75
|
self.key = get_encryption_key(key_file)
|
|
77
76
|
self.fernet = Fernet(self.key)
|
|
78
77
|
self.username = username
|
|
@@ -176,7 +175,7 @@ class InstapaperAuthenticator:
|
|
|
176
175
|
logging.error(self.LOG_LOGIN_FAILED)
|
|
177
176
|
return False
|
|
178
177
|
|
|
179
|
-
def _save_session(self)
|
|
178
|
+
def _save_session(self):
|
|
180
179
|
"""Saves the current session cookies to an encrypted file."""
|
|
181
180
|
required_cookies = self.REQUIRED_COOKIES
|
|
182
181
|
cookies_to_save = [
|
instapaper_scraper/cli.py
CHANGED
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
import argparse
|
|
4
4
|
import requests
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Union
|
|
6
|
+
from typing import Union
|
|
7
7
|
|
|
8
8
|
if sys.version_info >= (3, 11):
|
|
9
9
|
import tomllib
|
|
@@ -39,7 +39,7 @@ def _resolve_path(
|
|
|
39
39
|
return user_dir_filename
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
def load_config(config_path_str: Union[str, None] = None) ->
|
|
42
|
+
def load_config(config_path_str: Union[str, None] = None) -> Union[dict, None]:
|
|
43
43
|
"""
|
|
44
44
|
Loads configuration from a TOML file.
|
|
45
45
|
It checks the provided path, then config.toml in the project root,
|
|
@@ -50,7 +50,7 @@ def load_config(config_path_str: Union[str, None] = None) -> Optional[Dict[str,
|
|
|
50
50
|
CONFIG_DIR / CONFIG_FILENAME,
|
|
51
51
|
]
|
|
52
52
|
|
|
53
|
-
paths_to_check
|
|
53
|
+
paths_to_check = []
|
|
54
54
|
if config_path_str:
|
|
55
55
|
paths_to_check.insert(0, Path(config_path_str).expanduser())
|
|
56
56
|
paths_to_check.extend(default_paths)
|
|
@@ -60,7 +60,7 @@ def load_config(config_path_str: Union[str, None] = None) -> Optional[Dict[str,
|
|
|
60
60
|
try:
|
|
61
61
|
with open(path, "rb") as f:
|
|
62
62
|
logging.info(f"Loading configuration from {path}")
|
|
63
|
-
return
|
|
63
|
+
return tomllib.load(f)
|
|
64
64
|
except tomllib.TOMLDecodeError as e:
|
|
65
65
|
logging.error(f"Error decoding TOML file at {path}: {e}")
|
|
66
66
|
return None
|
|
@@ -68,7 +68,7 @@ def load_config(config_path_str: Union[str, None] = None) -> Optional[Dict[str,
|
|
|
68
68
|
return None
|
|
69
69
|
|
|
70
70
|
|
|
71
|
-
def main()
|
|
71
|
+
def main():
|
|
72
72
|
"""
|
|
73
73
|
Main entry point for the Instapaper scraper CLI.
|
|
74
74
|
"""
|
|
@@ -144,7 +144,7 @@ def main() -> None:
|
|
|
144
144
|
print(" 0: none (non-folder mode)")
|
|
145
145
|
for i, folder in enumerate(folders):
|
|
146
146
|
display_name = folder.get("key") or folder.get("slug") or folder.get("id")
|
|
147
|
-
print(f" {i
|
|
147
|
+
print(f" {i+1}: {display_name}")
|
|
148
148
|
|
|
149
149
|
try:
|
|
150
150
|
choice = int(input("Select a folder (enter a number): "))
|
instapaper_scraper/output.py
CHANGED
|
@@ -53,7 +53,7 @@ def get_sqlite_insert_sql(add_instapaper_url_manually: bool = False) -> str:
|
|
|
53
53
|
|
|
54
54
|
def save_to_csv(
|
|
55
55
|
data: List[Dict[str, Any]], filename: str, add_instapaper_url: bool = False
|
|
56
|
-
)
|
|
56
|
+
):
|
|
57
57
|
"""Saves a list of articles to a CSV file."""
|
|
58
58
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
|
59
59
|
with open(filename, "w", newline="", encoding="utf-8") as f:
|
|
@@ -69,7 +69,7 @@ def save_to_csv(
|
|
|
69
69
|
logging.info(LOG_SAVED_ARTICLES.format(count=len(data), filename=filename))
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
def save_to_json(data: List[Dict[str, Any]], filename: str)
|
|
72
|
+
def save_to_json(data: List[Dict[str, Any]], filename: str):
|
|
73
73
|
"""Saves a list of articles to a JSON file."""
|
|
74
74
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
|
75
75
|
with open(filename, "w", encoding="utf-8") as f:
|
|
@@ -79,7 +79,7 @@ def save_to_json(data: List[Dict[str, Any]], filename: str) -> None:
|
|
|
79
79
|
|
|
80
80
|
def save_to_sqlite(
|
|
81
81
|
data: List[Dict[str, Any]], db_name: str, add_instapaper_url: bool = False
|
|
82
|
-
)
|
|
82
|
+
):
|
|
83
83
|
"""Saves a list of articles to a SQLite database."""
|
|
84
84
|
os.makedirs(os.path.dirname(db_name), exist_ok=True)
|
|
85
85
|
conn = sqlite3.connect(db_name)
|
|
@@ -131,7 +131,7 @@ def save_articles(
|
|
|
131
131
|
format: str,
|
|
132
132
|
filename: str,
|
|
133
133
|
add_instapaper_url: bool = False,
|
|
134
|
-
)
|
|
134
|
+
):
|
|
135
135
|
"""
|
|
136
136
|
Dispatches to the correct save function based on the format.
|
|
137
137
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: instapaper-scraper
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.0rc1
|
|
4
4
|
Summary: A tool to scrape articles from Instapaper.
|
|
5
5
|
Project-URL: Homepage, https://github.com/chriskyfung/InstapaperScraper
|
|
6
6
|
Project-URL: Source, https://github.com/chriskyfung/InstapaperScraper
|
|
@@ -35,52 +35,30 @@ Requires-Dist: tomli~=2.0.1; python_version < "3.11"
|
|
|
35
35
|
Provides-Extra: dev
|
|
36
36
|
Requires-Dist: pytest; extra == "dev"
|
|
37
37
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
38
|
+
Requires-Dist: black; extra == "dev"
|
|
38
39
|
Requires-Dist: ruff; extra == "dev"
|
|
39
40
|
Requires-Dist: types-requests; extra == "dev"
|
|
40
41
|
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
|
41
42
|
Requires-Dist: requests-mock; extra == "dev"
|
|
42
43
|
Requires-Dist: build; extra == "dev"
|
|
43
44
|
Requires-Dist: twine; extra == "dev"
|
|
44
|
-
Requires-Dist: mypy; extra == "dev"
|
|
45
|
-
Requires-Dist: pre-commit; extra == "dev"
|
|
46
45
|
Dynamic: license-file
|
|
47
46
|
|
|
48
47
|
# Instapaper Scraper
|
|
49
48
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
</a>
|
|
64
|
-
<a href="https://pypi.org/project/instapaper-scraper/">
|
|
65
|
-
<img src="https://img.shields.io/pypi/v/instapaper-scraper.svg" alt="PyPI version">
|
|
66
|
-
</a>
|
|
67
|
-
<a href="https://pepy.tech/projects/instapaper-scraper">
|
|
68
|
-
<img src="https://static.pepy.tech/personalized-badge/instapaper-scraper?period=total&left_text=downloads" alt="PyPI Downloads">
|
|
69
|
-
</a>
|
|
70
|
-
<a href="https://www.gnu.org/licenses/gpl-3.0.en.html">
|
|
71
|
-
<img src="https://img.shields.io/github/license/chriskyfung/InstapaperScraper" alt="GitHub License">
|
|
72
|
-
</a>
|
|
73
|
-
<a href="https://github.com/sponsors/chriskyfung" title="Sponsor on GitHub">
|
|
74
|
-
<img src="https://img.shields.io/badge/Sponsor-GitHub-purple?logo=github" alt="GitHub Sponsors Default">
|
|
75
|
-
</a>
|
|
76
|
-
<a href="https://www.buymeacoffee.com/chriskyfung" title="Buy Me A Coffee">
|
|
77
|
-
<img src="https://img.shields.io/badge/Support%20Me-Coffee-ffdd00?logo=buy-me-a-coffee&logoColor=white" alt="Buy Me A Coffee">
|
|
78
|
-
</a>
|
|
79
|
-
</p>
|
|
80
|
-
|
|
81
|
-
A powerful and reliable Python tool to automate the export of all your saved Instapaper bookmarks into various formats, giving you full ownership of your data.
|
|
82
|
-
|
|
83
|
-
## ✨ Features
|
|
49
|
+

|
|
50
|
+
[](https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml)
|
|
51
|
+
[](https://pypi.org/project/instapaper-scraper/)
|
|
52
|
+
[](https://pepy.tech/projects/instapaper-scraper)
|
|
53
|
+
[](https://github.com/psf/black)
|
|
54
|
+
[](https://github.com/astral-sh/ruff)
|
|
55
|
+
[
|
|
56
|
+
](https://www.gnu.org/licenses/gpl-3.0.en.html)
|
|
57
|
+
[](https://codecov.io/gh/chriskyfung/InstapaperScraper)
|
|
58
|
+
|
|
59
|
+
A Python tool to scrape all your saved Instapaper bookmarks and export them to various formats.
|
|
60
|
+
|
|
61
|
+
## Features
|
|
84
62
|
|
|
85
63
|
- Scrapes all bookmarks from your Instapaper account.
|
|
86
64
|
- Supports scraping from specific folders.
|
|
@@ -88,13 +66,13 @@ A powerful and reliable Python tool to automate the export of all your saved Ins
|
|
|
88
66
|
- Securely stores your session for future runs.
|
|
89
67
|
- Modern, modular, and tested architecture.
|
|
90
68
|
|
|
91
|
-
##
|
|
69
|
+
## Getting Started
|
|
92
70
|
|
|
93
|
-
###
|
|
71
|
+
### 1. Requirements
|
|
94
72
|
|
|
95
73
|
- Python 3.9+
|
|
96
74
|
|
|
97
|
-
###
|
|
75
|
+
### 2. Installation
|
|
98
76
|
|
|
99
77
|
This package is available on PyPI and can be installed with pip:
|
|
100
78
|
|
|
@@ -102,7 +80,7 @@ This package is available on PyPI and can be installed with pip:
|
|
|
102
80
|
pip install instapaper-scraper
|
|
103
81
|
```
|
|
104
82
|
|
|
105
|
-
###
|
|
83
|
+
### 3. Usage
|
|
106
84
|
|
|
107
85
|
Run the tool from the command line, specifying your desired output format:
|
|
108
86
|
|
|
@@ -117,35 +95,35 @@ instapaper-scraper --format json
|
|
|
117
95
|
instapaper-scraper --format sqlite --output my_articles.db
|
|
118
96
|
```
|
|
119
97
|
|
|
120
|
-
##
|
|
98
|
+
## Configuration
|
|
121
99
|
|
|
122
|
-
###
|
|
100
|
+
### Authentication
|
|
123
101
|
|
|
124
102
|
The script authenticates using one of the following methods, in order of priority:
|
|
125
103
|
|
|
126
|
-
1.
|
|
104
|
+
1. **Command-line Arguments**: Provide your username and password directly when running the script:
|
|
127
105
|
|
|
128
106
|
```sh
|
|
129
107
|
instapaper-scraper --username your_username --password your_password
|
|
130
108
|
```
|
|
131
109
|
|
|
132
|
-
2.
|
|
110
|
+
2. **Session Files (`.session_key`, `.instapaper_session`)**: The script attempts to load these files in the following order:
|
|
133
111
|
a. Path specified by `--session-file` or `--key-file` arguments.
|
|
134
112
|
b. Files in the current working directory (e.g., `./.session_key`).
|
|
135
113
|
c. Files in the user's configuration directory (`~/.config/instapaper-scraper/`).
|
|
136
114
|
After the first successful login, the script creates an encrypted `.instapaper_session` file and a `.session_key` file to reuse your session securely.
|
|
137
115
|
|
|
138
|
-
3.
|
|
116
|
+
3. **Interactive Prompt**: If no other method is available, the script will prompt you for your username and password.
|
|
139
117
|
|
|
140
118
|
> **Note on Security:** Your session file (`.instapaper_session`) and the encryption key (`.session_key`) are stored with secure permissions (read/write for the owner only) to protect your credentials.
|
|
141
119
|
|
|
142
|
-
###
|
|
120
|
+
### Folder Configuration
|
|
143
121
|
|
|
144
122
|
You can define and quickly access your Instapaper folders using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
|
|
145
123
|
|
|
146
|
-
1.
|
|
147
|
-
2.
|
|
148
|
-
3.
|
|
124
|
+
1. The path specified by the `--config-path` argument.
|
|
125
|
+
2. `config.toml` in the current working directory.
|
|
126
|
+
3. `~/.config/instapaper-scraper/config.toml`
|
|
149
127
|
|
|
150
128
|
Here is an example of `config.toml`:
|
|
151
129
|
|
|
@@ -174,7 +152,7 @@ output_filename = "python-articles.db"
|
|
|
174
152
|
|
|
175
153
|
When a `config.toml` file is present and no `--folder` argument is provided, the scraper will prompt you to select a folder. You can also specify a folder directly using the `--folder` argument with its key, ID, or slug. Use `--folder=none` to explicitly disable folder mode and scrape all articles.
|
|
176
154
|
|
|
177
|
-
###
|
|
155
|
+
### Command-line Arguments
|
|
178
156
|
|
|
179
157
|
| Argument | Description |
|
|
180
158
|
| --- | --- |
|
|
@@ -186,7 +164,7 @@ When a `config.toml` file is present and no `--folder` argument is provided, the
|
|
|
186
164
|
| `--password <pass>` | Your Instapaper account password. |
|
|
187
165
|
| `--add-instapaper-url` | Adds a `instapaper_url` column to the output, containing a full, clickable URL for each article. |
|
|
188
166
|
|
|
189
|
-
###
|
|
167
|
+
### Output Formats
|
|
190
168
|
|
|
191
169
|
You can control the output format using the `--format` argument. The supported formats are:
|
|
192
170
|
|
|
@@ -198,7 +176,7 @@ If the `--format` flag is omitted, the script will default to `csv`.
|
|
|
198
176
|
|
|
199
177
|
When using `--output <filename>`, the file extension is automatically corrected to match the chosen format. For example, `instapaper-scraper --format json --output my_articles.txt` will create `my_articles.json`.
|
|
200
178
|
|
|
201
|
-
####
|
|
179
|
+
#### Opening Articles in Instapaper
|
|
202
180
|
|
|
203
181
|
The output data includes a unique `id` for each article. You can use this ID to construct a URL to the article's reader view: `https://www.instapaper.com/read/<article_id>`.
|
|
204
182
|
|
|
@@ -210,7 +188,7 @@ instapaper-scraper --add-instapaper-url
|
|
|
210
188
|
|
|
211
189
|
This adds a `instapaper_url` field to each article in the JSON output and a `instapaper_url` column in the CSV and SQLite outputs. The original `id` field is preserved.
|
|
212
190
|
|
|
213
|
-
##
|
|
191
|
+
## How It Works
|
|
214
192
|
|
|
215
193
|
The tool is designed with a modular architecture for reliability and maintainability.
|
|
216
194
|
|
|
@@ -219,9 +197,9 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
219
197
|
3. **Data Collection**: All fetched articles are aggregated into a single list.
|
|
220
198
|
4. **Export**: Finally, the collected data is written to a file in your chosen format (`.csv`, `.json`, or `.db`).
|
|
221
199
|
|
|
222
|
-
##
|
|
200
|
+
## Example Output
|
|
223
201
|
|
|
224
|
-
###
|
|
202
|
+
### CSV (`output/bookmarks.csv`) (with --add-instapaper-url)
|
|
225
203
|
|
|
226
204
|
```csv
|
|
227
205
|
"id","instapaper_url","title","url"
|
|
@@ -229,7 +207,7 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
229
207
|
"999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/"
|
|
230
208
|
```
|
|
231
209
|
|
|
232
|
-
###
|
|
210
|
+
### JSON (`output/bookmarks.json`) (with --add-instapaper-url)
|
|
233
211
|
|
|
234
212
|
```json
|
|
235
213
|
[
|
|
@@ -248,33 +226,15 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
248
226
|
]
|
|
249
227
|
```
|
|
250
228
|
|
|
251
|
-
###
|
|
229
|
+
### SQLite (`output/bookmarks.db`)
|
|
252
230
|
|
|
253
231
|
A SQLite database file is created with an `articles` table. The table includes `id`, `title`, and `url` columns. If the `--add-instapaper-url` flag is used, a `instapaper_url` column is also included. This feature is fully backward-compatible and will automatically adapt to the user's installed SQLite version, using an efficient generated column on modern versions (3.31.0+) and a fallback for older versions.
|
|
254
232
|
|
|
255
|
-
##
|
|
256
|
-
|
|
257
|
-
- **🐛 Bug Reports:** For any bugs or unexpected behavior, please [open an issue on GitHub](https://github.com/chriskyfung/InstapaperScraper/issues).
|
|
258
|
-
- **💬 Questions & General Discussion:** For questions, feature requests, or general discussion, please use our [GitHub Discussions](https://github.com/chriskyfung/InstapaperScraper/discussions).
|
|
259
|
-
|
|
260
|
-
## 🙏 Support the Project
|
|
261
|
-
|
|
262
|
-
`Instapaper Scraper` is a free and open-source project that requires significant time and effort to maintain and improve. If you find this tool useful, please consider supporting its development. Your contribution helps ensure the project stays healthy, active, and continuously updated.
|
|
263
|
-
|
|
264
|
-
- **[Sponsor on GitHub](https://github.com/sponsors/chriskyfung):** The best way to support the project with recurring monthly donations. Tiers with special rewards like priority support are available!
|
|
265
|
-
- **[Buy Me a Coffee](https://www.buymeacoffee.com/chriskyfung):** Perfect for a one-time thank you.
|
|
233
|
+
## Development & Testing
|
|
266
234
|
|
|
267
|
-
|
|
235
|
+
This project uses `pytest` for testing, `black` for code formatting, and `ruff` for linting.
|
|
268
236
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
Please read the **[Contribution Guidelines](CONTRIBUTING.md)** before you start.
|
|
272
|
-
|
|
273
|
-
## 🧑💻 Development & Testing
|
|
274
|
-
|
|
275
|
-
This project uses `pytest` for testing, `ruff` for code formatting and linting, and `mypy` for static type checking.
|
|
276
|
-
|
|
277
|
-
### 🔧 Setup
|
|
237
|
+
### Setup
|
|
278
238
|
|
|
279
239
|
To install the development dependencies:
|
|
280
240
|
|
|
@@ -282,13 +242,7 @@ To install the development dependencies:
|
|
|
282
242
|
pip install -e .[dev]
|
|
283
243
|
```
|
|
284
244
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
```sh
|
|
288
|
-
pre-commit install
|
|
289
|
-
```
|
|
290
|
-
|
|
291
|
-
### ▶️ Running the Scraper
|
|
245
|
+
### Running the Scraper
|
|
292
246
|
|
|
293
247
|
To run the scraper directly without installing the package:
|
|
294
248
|
|
|
@@ -296,7 +250,7 @@ To run the scraper directly without installing the package:
|
|
|
296
250
|
python -m src.instapaper_scraper.cli
|
|
297
251
|
```
|
|
298
252
|
|
|
299
|
-
###
|
|
253
|
+
### Testing
|
|
300
254
|
|
|
301
255
|
To run the tests, execute the following command from the project root:
|
|
302
256
|
|
|
@@ -310,12 +264,12 @@ To check test coverage:
|
|
|
310
264
|
pytest --cov=src/instapaper_scraper --cov-report=term-missing
|
|
311
265
|
```
|
|
312
266
|
|
|
313
|
-
###
|
|
267
|
+
### Code Quality
|
|
314
268
|
|
|
315
|
-
To format the code with `
|
|
269
|
+
To format the code with `black`:
|
|
316
270
|
|
|
317
271
|
```sh
|
|
318
|
-
|
|
272
|
+
black .
|
|
319
273
|
```
|
|
320
274
|
|
|
321
275
|
To check for linting errors with `ruff`:
|
|
@@ -330,23 +284,10 @@ To automatically fix linting errors:
|
|
|
330
284
|
ruff check . --fix
|
|
331
285
|
```
|
|
332
286
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
```sh
|
|
336
|
-
mypy src
|
|
337
|
-
```
|
|
338
|
-
|
|
339
|
-
## 📜 Disclaimer
|
|
287
|
+
## Disclaimer
|
|
340
288
|
|
|
341
289
|
This script requires valid Instapaper credentials. Use it responsibly and in accordance with Instapaper’s Terms of Service.
|
|
342
290
|
|
|
343
|
-
##
|
|
344
|
-
|
|
345
|
-
This project is licensed under the terms of the **GNU General Public License v3.0**. See the [LICENSE](LICENSE) file for the full license text.
|
|
346
|
-
|
|
347
|
-
## 🙏 Support the Project
|
|
348
|
-
|
|
349
|
-
`Instapaper Scraper` is a free and open-source project that requires significant time and effort to maintain and improve. If you find this tool useful, please consider supporting its development. Your contribution helps ensure the project stays healthy, active, and continuously updated.
|
|
291
|
+
## License
|
|
350
292
|
|
|
351
|
-
|
|
352
|
-
- **[Buy Me a Coffee](https://www.buymeacoffee.com/chriskyfung):** Perfect for a one-time thank you.
|
|
293
|
+
This project is licensed under the terms of the GNU General Public License v3.0. See the [LICENSE](LICENSE) file for the full license text.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
instapaper_scraper/__init__.py,sha256=qdcT3tp4KLufWH1u6tOuPVUQaXwakQD0gdjkwY4ljfg,206
|
|
2
|
+
instapaper_scraper/api.py,sha256=-Dq5fOAGSGopb-qonIbETd9ZlxWdULKRgl1DCOuVemY,11618
|
|
3
|
+
instapaper_scraper/auth.py,sha256=VTBE9KhGGJm0KbMT5DCTMCbh-N3HiJuJ9wMDb8CyZT4,7015
|
|
4
|
+
instapaper_scraper/cli.py,sha256=wsQxTVFIyJq3EQiAtz7dCjg1vI2_Y9quZv4ifuEPDU8,7495
|
|
5
|
+
instapaper_scraper/constants.py,sha256=ubFWa47985lIz58qokMC0xQzTmCB6NOa17KFgWLn65E,403
|
|
6
|
+
instapaper_scraper/exceptions.py,sha256=CptHoZe4NOhdjOoyXkZEMFgQC6oKtzjRljywwDEtsTg,134
|
|
7
|
+
instapaper_scraper/output.py,sha256=lxJgW71-m1YuMYJHeK6nu479pk_3bQGc0axzNCvxtZQ,5338
|
|
8
|
+
instapaper_scraper-1.1.0rc1.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
|
|
9
|
+
instapaper_scraper-1.1.0rc1.dist-info/METADATA,sha256=O-VJZg1yN3cuPRfBCevmD9_IrOR07NGpzrgZXI2-6hk,11637
|
|
10
|
+
instapaper_scraper-1.1.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
instapaper_scraper-1.1.0rc1.dist-info/entry_points.txt,sha256=7AvRgN5fvtas_Duxdz-JPbDN6A1Lq2GaTfTSv54afxA,67
|
|
12
|
+
instapaper_scraper-1.1.0rc1.dist-info/top_level.txt,sha256=kiU9nLkqPOVPLsP4QMHuBFjAmoIKfftYmGV05daLrcc,19
|
|
13
|
+
instapaper_scraper-1.1.0rc1.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
instapaper_scraper/__init__.py,sha256=qdcT3tp4KLufWH1u6tOuPVUQaXwakQD0gdjkwY4ljfg,206
|
|
2
|
-
instapaper_scraper/api.py,sha256=67ZeiVjsZpGspB8S3ni8FS6LBAOHXBc_oz3vEDWDNms,12672
|
|
3
|
-
instapaper_scraper/auth.py,sha256=OpgjbdI697FitumiyznWjey5-R2ZuxAEATaMz9NNnTc,7092
|
|
4
|
-
instapaper_scraper/cli.py,sha256=YL9c7kksmj5iGKRvVqG0KO4rBbhTg5c9Lgvsf_brRPA,7579
|
|
5
|
-
instapaper_scraper/constants.py,sha256=ubFWa47985lIz58qokMC0xQzTmCB6NOa17KFgWLn65E,403
|
|
6
|
-
instapaper_scraper/exceptions.py,sha256=CptHoZe4NOhdjOoyXkZEMFgQC6oKtzjRljywwDEtsTg,134
|
|
7
|
-
instapaper_scraper/output.py,sha256=DdwVNZ6dVK95rEGjIO0vR6h34sg6GGJjEe6ZFZc0LtE,5370
|
|
8
|
-
instapaper_scraper-1.1.0.dist-info/licenses/LICENSE,sha256=IwGE9guuL-ryRPEKi6wFPI_zOhg7zDZbTYuHbSt_SAk,35823
|
|
9
|
-
instapaper_scraper-1.1.0.dist-info/METADATA,sha256=joCv87uWUarw_1Re2_2WxNm1ypPnMXqSVY1lYdmpNzI,14554
|
|
10
|
-
instapaper_scraper-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
instapaper_scraper-1.1.0.dist-info/entry_points.txt,sha256=7AvRgN5fvtas_Duxdz-JPbDN6A1Lq2GaTfTSv54afxA,67
|
|
12
|
-
instapaper_scraper-1.1.0.dist-info/top_level.txt,sha256=kiU9nLkqPOVPLsP4QMHuBFjAmoIKfftYmGV05daLrcc,19
|
|
13
|
-
instapaper_scraper-1.1.0.dist-info/RECORD,,
|
|
File without changes
|
{instapaper_scraper-1.1.0.dist-info → instapaper_scraper-1.1.0rc1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{instapaper_scraper-1.1.0.dist-info → instapaper_scraper-1.1.0rc1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|