instapaper-scraper 1.1.0rc1__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {instapaper_scraper-1.1.0rc1/src/instapaper_scraper.egg-info → instapaper_scraper-1.1.1}/PKG-INFO +114 -47
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/README.md +110 -45
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/pyproject.toml +23 -7
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper/api.py +34 -14
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper/auth.py +5 -4
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper/cli.py +6 -6
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper/output.py +20 -8
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1/src/instapaper_scraper.egg-info}/PKG-INFO +114 -47
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper.egg-info/requires.txt +3 -1
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/tests/test_api.py +57 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/tests/test_auth.py +44 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/tests/test_cli.py +5 -10
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/tests/test_output.py +43 -13
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/LICENSE +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/setup.cfg +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper/__init__.py +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper/constants.py +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper/exceptions.py +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper.egg-info/SOURCES.txt +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper.egg-info/dependency_links.txt +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper.egg-info/entry_points.txt +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/src/instapaper_scraper.egg-info/top_level.txt +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/tests/test_cli_priority.py +0 -0
- {instapaper_scraper-1.1.0rc1 → instapaper_scraper-1.1.1}/tests/test_init.py +0 -0
{instapaper_scraper-1.1.0rc1/src/instapaper_scraper.egg-info → instapaper_scraper-1.1.1}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: instapaper-scraper
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: A tool to scrape articles from Instapaper.
|
|
5
5
|
Project-URL: Homepage, https://github.com/chriskyfung/InstapaperScraper
|
|
6
6
|
Project-URL: Source, https://github.com/chriskyfung/InstapaperScraper
|
|
@@ -35,30 +35,54 @@ Requires-Dist: tomli~=2.0.1; python_version < "3.11"
|
|
|
35
35
|
Provides-Extra: dev
|
|
36
36
|
Requires-Dist: pytest; extra == "dev"
|
|
37
37
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
38
|
-
Requires-Dist: black; extra == "dev"
|
|
39
38
|
Requires-Dist: ruff; extra == "dev"
|
|
40
39
|
Requires-Dist: types-requests; extra == "dev"
|
|
41
40
|
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
|
42
41
|
Requires-Dist: requests-mock; extra == "dev"
|
|
43
42
|
Requires-Dist: build; extra == "dev"
|
|
44
43
|
Requires-Dist: twine; extra == "dev"
|
|
44
|
+
Requires-Dist: mypy; extra == "dev"
|
|
45
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
46
|
+
Requires-Dist: licensecheck; extra == "dev"
|
|
45
47
|
Dynamic: license-file
|
|
46
48
|
|
|
47
49
|
# Instapaper Scraper
|
|
48
50
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
51
|
+
<!-- Badges -->
|
|
52
|
+
<p align="center">
|
|
53
|
+
<a href="https://pypi.org/project/instapaper-scraper/">
|
|
54
|
+
<img src="https://img.shields.io/pypi/v/instapaper-scraper.svg" alt="PyPI version">
|
|
55
|
+
</a>
|
|
56
|
+
<a href="https://pepy.tech/projects/instapaper-scraper">
|
|
57
|
+
<img src="https://static.pepy.tech/personalized-badge/instapaper-scraper?period=total&left_text=downloads" alt="PyPI Downloads">
|
|
58
|
+
</a>
|
|
59
|
+
<a href="https://github.com/chriskyfung/InstapaperScraper">
|
|
60
|
+
<img src="https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fchriskyfung%2FInstapaperScraper%2Frefs%2Fheads%2Fmaster%2Fpyproject.toml" alt="Python Version from PEP 621 TOML">
|
|
61
|
+
</a>
|
|
62
|
+
<a href="https://github.com/astral-sh/ruff">
|
|
63
|
+
<img src="https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fastral-sh%2Fruff%2Fmain%2Fassets%2Fbadge%2Fv2.json" alt="Ruff">
|
|
64
|
+
</a>
|
|
65
|
+
<a href="https://codecov.io/gh/chriskyfung/InstapaperScraper">
|
|
66
|
+
<img src="https://codecov.io/gh/chriskyfung/InstapaperScraper/graph/badge.svg" alt="Code Coverage">
|
|
67
|
+
</a>
|
|
68
|
+
<wbr />
|
|
69
|
+
<a href="https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml">
|
|
70
|
+
<img src="https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml/badge.svg" alt="CI Status">
|
|
71
|
+
</a>
|
|
72
|
+
<a href="https://www.gnu.org/licenses/gpl-3.0.en.html">
|
|
73
|
+
<img src="https://img.shields.io/github/license/chriskyfung/InstapaperScraper" alt="GitHub License">
|
|
74
|
+
</a>
|
|
75
|
+
<a href="https://github.com/sponsors/chriskyfung" title="Sponsor on GitHub">
|
|
76
|
+
<img src="https://img.shields.io/badge/Sponsor-GitHub-blue?logo=github-sponsors&colorA=263238&colorB=EC407A" alt="GitHub Sponsors Default">
|
|
77
|
+
</a>
|
|
78
|
+
<a href="https://www.buymeacoffee.com/chriskyfung" title="Support Coffee">
|
|
79
|
+
<img src="https://img.shields.io/badge/Support-Coffee-ffdd00?logo=buy-me-a-coffee&logoColor=ffdd00&colorA=263238" alt="Buy Me A Coffee">
|
|
80
|
+
</a>
|
|
81
|
+
</p>
|
|
82
|
+
|
|
83
|
+
A powerful and reliable Python tool to automate the export of all your saved Instapaper bookmarks into various formats, giving you full ownership of your data.
|
|
84
|
+
|
|
85
|
+
## ✨ Features
|
|
62
86
|
|
|
63
87
|
- Scrapes all bookmarks from your Instapaper account.
|
|
64
88
|
- Supports scraping from specific folders.
|
|
@@ -66,13 +90,13 @@ A Python tool to scrape all your saved Instapaper bookmarks and export them to v
|
|
|
66
90
|
- Securely stores your session for future runs.
|
|
67
91
|
- Modern, modular, and tested architecture.
|
|
68
92
|
|
|
69
|
-
## Getting Started
|
|
93
|
+
## 🚀 Getting Started
|
|
70
94
|
|
|
71
|
-
### 1. Requirements
|
|
95
|
+
### 📋 1. Requirements
|
|
72
96
|
|
|
73
97
|
- Python 3.9+
|
|
74
98
|
|
|
75
|
-
### 2. Installation
|
|
99
|
+
### 📦 2. Installation
|
|
76
100
|
|
|
77
101
|
This package is available on PyPI and can be installed with pip:
|
|
78
102
|
|
|
@@ -80,7 +104,7 @@ This package is available on PyPI and can be installed with pip:
|
|
|
80
104
|
pip install instapaper-scraper
|
|
81
105
|
```
|
|
82
106
|
|
|
83
|
-
### 3. Usage
|
|
107
|
+
### 💻 3. Usage
|
|
84
108
|
|
|
85
109
|
Run the tool from the command line, specifying your desired output format:
|
|
86
110
|
|
|
@@ -95,35 +119,35 @@ instapaper-scraper --format json
|
|
|
95
119
|
instapaper-scraper --format sqlite --output my_articles.db
|
|
96
120
|
```
|
|
97
121
|
|
|
98
|
-
## Configuration
|
|
122
|
+
## ⚙️ Configuration
|
|
99
123
|
|
|
100
|
-
### Authentication
|
|
124
|
+
### 🔐 Authentication
|
|
101
125
|
|
|
102
126
|
The script authenticates using one of the following methods, in order of priority:
|
|
103
127
|
|
|
104
|
-
1.
|
|
128
|
+
1. **Command-line Arguments**: Provide your username and password directly when running the script:
|
|
105
129
|
|
|
106
130
|
```sh
|
|
107
131
|
instapaper-scraper --username your_username --password your_password
|
|
108
132
|
```
|
|
109
133
|
|
|
110
|
-
2.
|
|
134
|
+
2. **Session Files (`.session_key`, `.instapaper_session`)**: The script attempts to load these files in the following order:
|
|
111
135
|
a. Path specified by `--session-file` or `--key-file` arguments.
|
|
112
136
|
b. Files in the current working directory (e.g., `./.session_key`).
|
|
113
137
|
c. Files in the user's configuration directory (`~/.config/instapaper-scraper/`).
|
|
114
138
|
After the first successful login, the script creates an encrypted `.instapaper_session` file and a `.session_key` file to reuse your session securely.
|
|
115
139
|
|
|
116
|
-
3.
|
|
140
|
+
3. **Interactive Prompt**: If no other method is available, the script will prompt you for your username and password.
|
|
117
141
|
|
|
118
142
|
> **Note on Security:** Your session file (`.instapaper_session`) and the encryption key (`.session_key`) are stored with secure permissions (read/write for the owner only) to protect your credentials.
|
|
119
143
|
|
|
120
|
-
### Folder Configuration
|
|
144
|
+
### 📁 Folder Configuration
|
|
121
145
|
|
|
122
146
|
You can define and quickly access your Instapaper folders using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
|
|
123
147
|
|
|
124
|
-
1.
|
|
125
|
-
2.
|
|
126
|
-
3.
|
|
148
|
+
1. The path specified by the `--config-path` argument.
|
|
149
|
+
2. `config.toml` in the current working directory.
|
|
150
|
+
3. `~/.config/instapaper-scraper/config.toml`
|
|
127
151
|
|
|
128
152
|
Here is an example of `config.toml`:
|
|
129
153
|
|
|
@@ -152,7 +176,7 @@ output_filename = "python-articles.db"
|
|
|
152
176
|
|
|
153
177
|
When a `config.toml` file is present and no `--folder` argument is provided, the scraper will prompt you to select a folder. You can also specify a folder directly using the `--folder` argument with its key, ID, or slug. Use `--folder=none` to explicitly disable folder mode and scrape all articles.
|
|
154
178
|
|
|
155
|
-
### Command-line Arguments
|
|
179
|
+
### 💻 Command-line Arguments
|
|
156
180
|
|
|
157
181
|
| Argument | Description |
|
|
158
182
|
| --- | --- |
|
|
@@ -164,7 +188,7 @@ When a `config.toml` file is present and no `--folder` argument is provided, the
|
|
|
164
188
|
| `--password <pass>` | Your Instapaper account password. |
|
|
165
189
|
| `--add-instapaper-url` | Adds a `instapaper_url` column to the output, containing a full, clickable URL for each article. |
|
|
166
190
|
|
|
167
|
-
### Output Formats
|
|
191
|
+
### 📄 Output Formats
|
|
168
192
|
|
|
169
193
|
You can control the output format using the `--format` argument. The supported formats are:
|
|
170
194
|
|
|
@@ -176,7 +200,7 @@ If the `--format` flag is omitted, the script will default to `csv`.
|
|
|
176
200
|
|
|
177
201
|
When using `--output <filename>`, the file extension is automatically corrected to match the chosen format. For example, `instapaper-scraper --format json --output my_articles.txt` will create `my_articles.json`.
|
|
178
202
|
|
|
179
|
-
#### Opening Articles in Instapaper
|
|
203
|
+
#### 📖 Opening Articles in Instapaper
|
|
180
204
|
|
|
181
205
|
The output data includes a unique `id` for each article. You can use this ID to construct a URL to the article's reader view: `https://www.instapaper.com/read/<article_id>`.
|
|
182
206
|
|
|
@@ -188,7 +212,7 @@ instapaper-scraper --add-instapaper-url
|
|
|
188
212
|
|
|
189
213
|
This adds a `instapaper_url` field to each article in the JSON output and a `instapaper_url` column in the CSV and SQLite outputs. The original `id` field is preserved.
|
|
190
214
|
|
|
191
|
-
## How It Works
|
|
215
|
+
## 🛠️ How It Works
|
|
192
216
|
|
|
193
217
|
The tool is designed with a modular architecture for reliability and maintainability.
|
|
194
218
|
|
|
@@ -197,9 +221,9 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
197
221
|
3. **Data Collection**: All fetched articles are aggregated into a single list.
|
|
198
222
|
4. **Export**: Finally, the collected data is written to a file in your chosen format (`.csv`, `.json`, or `.db`).
|
|
199
223
|
|
|
200
|
-
## Example Output
|
|
224
|
+
## 📊 Example Output
|
|
201
225
|
|
|
202
|
-
### CSV (`output/bookmarks.csv`) (with --add-instapaper-url)
|
|
226
|
+
### 📄 CSV (`output/bookmarks.csv`) (with --add-instapaper-url)
|
|
203
227
|
|
|
204
228
|
```csv
|
|
205
229
|
"id","instapaper_url","title","url"
|
|
@@ -207,7 +231,7 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
207
231
|
"999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/"
|
|
208
232
|
```
|
|
209
233
|
|
|
210
|
-
### JSON (`output/bookmarks.json`) (with --add-instapaper-url)
|
|
234
|
+
### 📄 JSON (`output/bookmarks.json`) (with --add-instapaper-url)
|
|
211
235
|
|
|
212
236
|
```json
|
|
213
237
|
[
|
|
@@ -226,15 +250,33 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
226
250
|
]
|
|
227
251
|
```
|
|
228
252
|
|
|
229
|
-
### SQLite (`output/bookmarks.db`)
|
|
253
|
+
### 🗄️ SQLite (`output/bookmarks.db`)
|
|
230
254
|
|
|
231
255
|
A SQLite database file is created with an `articles` table. The table includes `id`, `title`, and `url` columns. If the `--add-instapaper-url` flag is used, a `instapaper_url` column is also included. This feature is fully backward-compatible and will automatically adapt to the user's installed SQLite version, using an efficient generated column on modern versions (3.31.0+) and a fallback for older versions.
|
|
232
256
|
|
|
233
|
-
##
|
|
257
|
+
## 🤗 Support and Community
|
|
258
|
+
|
|
259
|
+
- **🐛 Bug Reports:** For any bugs or unexpected behavior, please [open an issue on GitHub](https://github.com/chriskyfung/InstapaperScraper/issues).
|
|
260
|
+
- **💬 Questions & General Discussion:** For questions, feature requests, or general discussion, please use our [GitHub Discussions](https://github.com/chriskyfung/InstapaperScraper/discussions).
|
|
261
|
+
|
|
262
|
+
## 🙏 Support the Project
|
|
263
|
+
|
|
264
|
+
`Instapaper Scraper` is a free and open-source project that requires significant time and effort to maintain and improve. If you find this tool useful, please consider supporting its development. Your contribution helps ensure the project stays healthy, active, and continuously updated.
|
|
265
|
+
|
|
266
|
+
- **[Sponsor on GitHub](https://github.com/sponsors/chriskyfung):** The best way to support the project with recurring monthly donations. Tiers with special rewards like priority support are available!
|
|
267
|
+
- **[Buy Me a Coffee](https://www.buymeacoffee.com/chriskyfung):** Perfect for a one-time thank you.
|
|
268
|
+
|
|
269
|
+
## 🤝 Contributing
|
|
234
270
|
|
|
235
|
-
|
|
271
|
+
Contributions are welcome! Whether it's a bug fix, a new feature, or documentation improvements, please feel free to open a pull request.
|
|
236
272
|
|
|
237
|
-
|
|
273
|
+
Please read the **[Contribution Guidelines](CONTRIBUTING.md)** before you start.
|
|
274
|
+
|
|
275
|
+
## 🧑💻 Development & Testing
|
|
276
|
+
|
|
277
|
+
This project uses `pytest` for testing, `ruff` for code formatting and linting, and `mypy` for static type checking.
|
|
278
|
+
|
|
279
|
+
### 🔧 Setup
|
|
238
280
|
|
|
239
281
|
To install the development dependencies:
|
|
240
282
|
|
|
@@ -242,7 +284,13 @@ To install the development dependencies:
|
|
|
242
284
|
pip install -e .[dev]
|
|
243
285
|
```
|
|
244
286
|
|
|
245
|
-
|
|
287
|
+
To set up the pre-commit hooks:
|
|
288
|
+
|
|
289
|
+
```sh
|
|
290
|
+
pre-commit install
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
### ▶️ Running the Scraper
|
|
246
294
|
|
|
247
295
|
To run the scraper directly without installing the package:
|
|
248
296
|
|
|
@@ -250,7 +298,7 @@ To run the scraper directly without installing the package:
|
|
|
250
298
|
python -m src.instapaper_scraper.cli
|
|
251
299
|
```
|
|
252
300
|
|
|
253
|
-
### Testing
|
|
301
|
+
### ✅ Testing
|
|
254
302
|
|
|
255
303
|
To run the tests, execute the following command from the project root:
|
|
256
304
|
|
|
@@ -264,12 +312,12 @@ To check test coverage:
|
|
|
264
312
|
pytest --cov=src/instapaper_scraper --cov-report=term-missing
|
|
265
313
|
```
|
|
266
314
|
|
|
267
|
-
### Code Quality
|
|
315
|
+
### ✨ Code Quality
|
|
268
316
|
|
|
269
|
-
To format the code with `
|
|
317
|
+
To format the code with `ruff`:
|
|
270
318
|
|
|
271
319
|
```sh
|
|
272
|
-
|
|
320
|
+
ruff format .
|
|
273
321
|
```
|
|
274
322
|
|
|
275
323
|
To check for linting errors with `ruff`:
|
|
@@ -284,10 +332,29 @@ To automatically fix linting errors:
|
|
|
284
332
|
ruff check . --fix
|
|
285
333
|
```
|
|
286
334
|
|
|
287
|
-
|
|
335
|
+
To run static type checking with `mypy`:
|
|
336
|
+
|
|
337
|
+
```sh
|
|
338
|
+
mypy src
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
To run license checks:
|
|
342
|
+
|
|
343
|
+
```sh
|
|
344
|
+
licensecheck --show-only-failing
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
## 📜 Disclaimer
|
|
288
349
|
|
|
289
350
|
This script requires valid Instapaper credentials. Use it responsibly and in accordance with Instapaper’s Terms of Service.
|
|
290
351
|
|
|
291
|
-
## License
|
|
352
|
+
## 📄 License
|
|
353
|
+
|
|
354
|
+
This project is licensed under the terms of the **GNU General Public License v3.0**. See the [LICENSE](LICENSE) file for the full license text.
|
|
355
|
+
|
|
356
|
+
## Contributors
|
|
357
|
+
|
|
358
|
+
[](https://github.com/chriskyfung/InstapaperScraper/graphs/contributors)
|
|
292
359
|
|
|
293
|
-
|
|
360
|
+
Made with [contrib.rocks](https://contrib.rocks).
|
|
@@ -1,18 +1,40 @@
|
|
|
1
1
|
# Instapaper Scraper
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
3
|
+
<!-- Badges -->
|
|
4
|
+
<p align="center">
|
|
5
|
+
<a href="https://pypi.org/project/instapaper-scraper/">
|
|
6
|
+
<img src="https://img.shields.io/pypi/v/instapaper-scraper.svg" alt="PyPI version">
|
|
7
|
+
</a>
|
|
8
|
+
<a href="https://pepy.tech/projects/instapaper-scraper">
|
|
9
|
+
<img src="https://static.pepy.tech/personalized-badge/instapaper-scraper?period=total&left_text=downloads" alt="PyPI Downloads">
|
|
10
|
+
</a>
|
|
11
|
+
<a href="https://github.com/chriskyfung/InstapaperScraper">
|
|
12
|
+
<img src="https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2Fchriskyfung%2FInstapaperScraper%2Frefs%2Fheads%2Fmaster%2Fpyproject.toml" alt="Python Version from PEP 621 TOML">
|
|
13
|
+
</a>
|
|
14
|
+
<a href="https://github.com/astral-sh/ruff">
|
|
15
|
+
<img src="https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fastral-sh%2Fruff%2Fmain%2Fassets%2Fbadge%2Fv2.json" alt="Ruff">
|
|
16
|
+
</a>
|
|
17
|
+
<a href="https://codecov.io/gh/chriskyfung/InstapaperScraper">
|
|
18
|
+
<img src="https://codecov.io/gh/chriskyfung/InstapaperScraper/graph/badge.svg" alt="Code Coverage">
|
|
19
|
+
</a>
|
|
20
|
+
<wbr />
|
|
21
|
+
<a href="https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml">
|
|
22
|
+
<img src="https://github.com/chriskyfung/InstapaperScraper/actions/workflows/ci.yml/badge.svg" alt="CI Status">
|
|
23
|
+
</a>
|
|
24
|
+
<a href="https://www.gnu.org/licenses/gpl-3.0.en.html">
|
|
25
|
+
<img src="https://img.shields.io/github/license/chriskyfung/InstapaperScraper" alt="GitHub License">
|
|
26
|
+
</a>
|
|
27
|
+
<a href="https://github.com/sponsors/chriskyfung" title="Sponsor on GitHub">
|
|
28
|
+
<img src="https://img.shields.io/badge/Sponsor-GitHub-blue?logo=github-sponsors&colorA=263238&colorB=EC407A" alt="GitHub Sponsors Default">
|
|
29
|
+
</a>
|
|
30
|
+
<a href="https://www.buymeacoffee.com/chriskyfung" title="Support Coffee">
|
|
31
|
+
<img src="https://img.shields.io/badge/Support-Coffee-ffdd00?logo=buy-me-a-coffee&logoColor=ffdd00&colorA=263238" alt="Buy Me A Coffee">
|
|
32
|
+
</a>
|
|
33
|
+
</p>
|
|
34
|
+
|
|
35
|
+
A powerful and reliable Python tool to automate the export of all your saved Instapaper bookmarks into various formats, giving you full ownership of your data.
|
|
36
|
+
|
|
37
|
+
## ✨ Features
|
|
16
38
|
|
|
17
39
|
- Scrapes all bookmarks from your Instapaper account.
|
|
18
40
|
- Supports scraping from specific folders.
|
|
@@ -20,13 +42,13 @@ A Python tool to scrape all your saved Instapaper bookmarks and export them to v
|
|
|
20
42
|
- Securely stores your session for future runs.
|
|
21
43
|
- Modern, modular, and tested architecture.
|
|
22
44
|
|
|
23
|
-
## Getting Started
|
|
45
|
+
## 🚀 Getting Started
|
|
24
46
|
|
|
25
|
-
### 1. Requirements
|
|
47
|
+
### 📋 1. Requirements
|
|
26
48
|
|
|
27
49
|
- Python 3.9+
|
|
28
50
|
|
|
29
|
-
### 2. Installation
|
|
51
|
+
### 📦 2. Installation
|
|
30
52
|
|
|
31
53
|
This package is available on PyPI and can be installed with pip:
|
|
32
54
|
|
|
@@ -34,7 +56,7 @@ This package is available on PyPI and can be installed with pip:
|
|
|
34
56
|
pip install instapaper-scraper
|
|
35
57
|
```
|
|
36
58
|
|
|
37
|
-
### 3. Usage
|
|
59
|
+
### 💻 3. Usage
|
|
38
60
|
|
|
39
61
|
Run the tool from the command line, specifying your desired output format:
|
|
40
62
|
|
|
@@ -49,35 +71,35 @@ instapaper-scraper --format json
|
|
|
49
71
|
instapaper-scraper --format sqlite --output my_articles.db
|
|
50
72
|
```
|
|
51
73
|
|
|
52
|
-
## Configuration
|
|
74
|
+
## ⚙️ Configuration
|
|
53
75
|
|
|
54
|
-
### Authentication
|
|
76
|
+
### 🔐 Authentication
|
|
55
77
|
|
|
56
78
|
The script authenticates using one of the following methods, in order of priority:
|
|
57
79
|
|
|
58
|
-
1.
|
|
80
|
+
1. **Command-line Arguments**: Provide your username and password directly when running the script:
|
|
59
81
|
|
|
60
82
|
```sh
|
|
61
83
|
instapaper-scraper --username your_username --password your_password
|
|
62
84
|
```
|
|
63
85
|
|
|
64
|
-
2.
|
|
86
|
+
2. **Session Files (`.session_key`, `.instapaper_session`)**: The script attempts to load these files in the following order:
|
|
65
87
|
a. Path specified by `--session-file` or `--key-file` arguments.
|
|
66
88
|
b. Files in the current working directory (e.g., `./.session_key`).
|
|
67
89
|
c. Files in the user's configuration directory (`~/.config/instapaper-scraper/`).
|
|
68
90
|
After the first successful login, the script creates an encrypted `.instapaper_session` file and a `.session_key` file to reuse your session securely.
|
|
69
91
|
|
|
70
|
-
3.
|
|
92
|
+
3. **Interactive Prompt**: If no other method is available, the script will prompt you for your username and password.
|
|
71
93
|
|
|
72
94
|
> **Note on Security:** Your session file (`.instapaper_session`) and the encryption key (`.session_key`) are stored with secure permissions (read/write for the owner only) to protect your credentials.
|
|
73
95
|
|
|
74
|
-
### Folder Configuration
|
|
96
|
+
### 📁 Folder Configuration
|
|
75
97
|
|
|
76
98
|
You can define and quickly access your Instapaper folders using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
|
|
77
99
|
|
|
78
|
-
1.
|
|
79
|
-
2.
|
|
80
|
-
3.
|
|
100
|
+
1. The path specified by the `--config-path` argument.
|
|
101
|
+
2. `config.toml` in the current working directory.
|
|
102
|
+
3. `~/.config/instapaper-scraper/config.toml`
|
|
81
103
|
|
|
82
104
|
Here is an example of `config.toml`:
|
|
83
105
|
|
|
@@ -106,7 +128,7 @@ output_filename = "python-articles.db"
|
|
|
106
128
|
|
|
107
129
|
When a `config.toml` file is present and no `--folder` argument is provided, the scraper will prompt you to select a folder. You can also specify a folder directly using the `--folder` argument with its key, ID, or slug. Use `--folder=none` to explicitly disable folder mode and scrape all articles.
|
|
108
130
|
|
|
109
|
-
### Command-line Arguments
|
|
131
|
+
### 💻 Command-line Arguments
|
|
110
132
|
|
|
111
133
|
| Argument | Description |
|
|
112
134
|
| --- | --- |
|
|
@@ -118,7 +140,7 @@ When a `config.toml` file is present and no `--folder` argument is provided, the
|
|
|
118
140
|
| `--password <pass>` | Your Instapaper account password. |
|
|
119
141
|
| `--add-instapaper-url` | Adds a `instapaper_url` column to the output, containing a full, clickable URL for each article. |
|
|
120
142
|
|
|
121
|
-
### Output Formats
|
|
143
|
+
### 📄 Output Formats
|
|
122
144
|
|
|
123
145
|
You can control the output format using the `--format` argument. The supported formats are:
|
|
124
146
|
|
|
@@ -130,7 +152,7 @@ If the `--format` flag is omitted, the script will default to `csv`.
|
|
|
130
152
|
|
|
131
153
|
When using `--output <filename>`, the file extension is automatically corrected to match the chosen format. For example, `instapaper-scraper --format json --output my_articles.txt` will create `my_articles.json`.
|
|
132
154
|
|
|
133
|
-
#### Opening Articles in Instapaper
|
|
155
|
+
#### 📖 Opening Articles in Instapaper
|
|
134
156
|
|
|
135
157
|
The output data includes a unique `id` for each article. You can use this ID to construct a URL to the article's reader view: `https://www.instapaper.com/read/<article_id>`.
|
|
136
158
|
|
|
@@ -142,7 +164,7 @@ instapaper-scraper --add-instapaper-url
|
|
|
142
164
|
|
|
143
165
|
This adds a `instapaper_url` field to each article in the JSON output and a `instapaper_url` column in the CSV and SQLite outputs. The original `id` field is preserved.
|
|
144
166
|
|
|
145
|
-
## How It Works
|
|
167
|
+
## 🛠️ How It Works
|
|
146
168
|
|
|
147
169
|
The tool is designed with a modular architecture for reliability and maintainability.
|
|
148
170
|
|
|
@@ -151,9 +173,9 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
151
173
|
3. **Data Collection**: All fetched articles are aggregated into a single list.
|
|
152
174
|
4. **Export**: Finally, the collected data is written to a file in your chosen format (`.csv`, `.json`, or `.db`).
|
|
153
175
|
|
|
154
|
-
## Example Output
|
|
176
|
+
## 📊 Example Output
|
|
155
177
|
|
|
156
|
-
### CSV (`output/bookmarks.csv`) (with --add-instapaper-url)
|
|
178
|
+
### 📄 CSV (`output/bookmarks.csv`) (with --add-instapaper-url)
|
|
157
179
|
|
|
158
180
|
```csv
|
|
159
181
|
"id","instapaper_url","title","url"
|
|
@@ -161,7 +183,7 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
161
183
|
"999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/"
|
|
162
184
|
```
|
|
163
185
|
|
|
164
|
-
### JSON (`output/bookmarks.json`) (with --add-instapaper-url)
|
|
186
|
+
### 📄 JSON (`output/bookmarks.json`) (with --add-instapaper-url)
|
|
165
187
|
|
|
166
188
|
```json
|
|
167
189
|
[
|
|
@@ -180,15 +202,33 @@ The tool is designed with a modular architecture for reliability and maintainabi
|
|
|
180
202
|
]
|
|
181
203
|
```
|
|
182
204
|
|
|
183
|
-
### SQLite (`output/bookmarks.db`)
|
|
205
|
+
### 🗄️ SQLite (`output/bookmarks.db`)
|
|
184
206
|
|
|
185
207
|
A SQLite database file is created with an `articles` table. The table includes `id`, `title`, and `url` columns. If the `--add-instapaper-url` flag is used, a `instapaper_url` column is also included. This feature is fully backward-compatible and will automatically adapt to the user's installed SQLite version, using an efficient generated column on modern versions (3.31.0+) and a fallback for older versions.
|
|
186
208
|
|
|
187
|
-
##
|
|
209
|
+
## 🤗 Support and Community
|
|
210
|
+
|
|
211
|
+
- **🐛 Bug Reports:** For any bugs or unexpected behavior, please [open an issue on GitHub](https://github.com/chriskyfung/InstapaperScraper/issues).
|
|
212
|
+
- **💬 Questions & General Discussion:** For questions, feature requests, or general discussion, please use our [GitHub Discussions](https://github.com/chriskyfung/InstapaperScraper/discussions).
|
|
213
|
+
|
|
214
|
+
## 🙏 Support the Project
|
|
215
|
+
|
|
216
|
+
`Instapaper Scraper` is a free and open-source project that requires significant time and effort to maintain and improve. If you find this tool useful, please consider supporting its development. Your contribution helps ensure the project stays healthy, active, and continuously updated.
|
|
217
|
+
|
|
218
|
+
- **[Sponsor on GitHub](https://github.com/sponsors/chriskyfung):** The best way to support the project with recurring monthly donations. Tiers with special rewards like priority support are available!
|
|
219
|
+
- **[Buy Me a Coffee](https://www.buymeacoffee.com/chriskyfung):** Perfect for a one-time thank you.
|
|
220
|
+
|
|
221
|
+
## 🤝 Contributing
|
|
188
222
|
|
|
189
|
-
|
|
223
|
+
Contributions are welcome! Whether it's a bug fix, a new feature, or documentation improvements, please feel free to open a pull request.
|
|
190
224
|
|
|
191
|
-
|
|
225
|
+
Please read the **[Contribution Guidelines](CONTRIBUTING.md)** before you start.
|
|
226
|
+
|
|
227
|
+
## 🧑💻 Development & Testing
|
|
228
|
+
|
|
229
|
+
This project uses `pytest` for testing, `ruff` for code formatting and linting, and `mypy` for static type checking.
|
|
230
|
+
|
|
231
|
+
### 🔧 Setup
|
|
192
232
|
|
|
193
233
|
To install the development dependencies:
|
|
194
234
|
|
|
@@ -196,7 +236,13 @@ To install the development dependencies:
|
|
|
196
236
|
pip install -e .[dev]
|
|
197
237
|
```
|
|
198
238
|
|
|
199
|
-
|
|
239
|
+
To set up the pre-commit hooks:
|
|
240
|
+
|
|
241
|
+
```sh
|
|
242
|
+
pre-commit install
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### ▶️ Running the Scraper
|
|
200
246
|
|
|
201
247
|
To run the scraper directly without installing the package:
|
|
202
248
|
|
|
@@ -204,7 +250,7 @@ To run the scraper directly without installing the package:
|
|
|
204
250
|
python -m src.instapaper_scraper.cli
|
|
205
251
|
```
|
|
206
252
|
|
|
207
|
-
### Testing
|
|
253
|
+
### ✅ Testing
|
|
208
254
|
|
|
209
255
|
To run the tests, execute the following command from the project root:
|
|
210
256
|
|
|
@@ -218,12 +264,12 @@ To check test coverage:
|
|
|
218
264
|
pytest --cov=src/instapaper_scraper --cov-report=term-missing
|
|
219
265
|
```
|
|
220
266
|
|
|
221
|
-
### Code Quality
|
|
267
|
+
### ✨ Code Quality
|
|
222
268
|
|
|
223
|
-
To format the code with `
|
|
269
|
+
To format the code with `ruff`:
|
|
224
270
|
|
|
225
271
|
```sh
|
|
226
|
-
|
|
272
|
+
ruff format .
|
|
227
273
|
```
|
|
228
274
|
|
|
229
275
|
To check for linting errors with `ruff`:
|
|
@@ -238,10 +284,29 @@ To automatically fix linting errors:
|
|
|
238
284
|
ruff check . --fix
|
|
239
285
|
```
|
|
240
286
|
|
|
241
|
-
|
|
287
|
+
To run static type checking with `mypy`:
|
|
288
|
+
|
|
289
|
+
```sh
|
|
290
|
+
mypy src
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
To run license checks:
|
|
294
|
+
|
|
295
|
+
```sh
|
|
296
|
+
licensecheck --show-only-failing
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
## 📜 Disclaimer
|
|
242
301
|
|
|
243
302
|
This script requires valid Instapaper credentials. Use it responsibly and in accordance with Instapaper’s Terms of Service.
|
|
244
303
|
|
|
245
|
-
## License
|
|
304
|
+
## 📄 License
|
|
305
|
+
|
|
306
|
+
This project is licensed under the terms of the **GNU General Public License v3.0**. See the [LICENSE](LICENSE) file for the full license text.
|
|
307
|
+
|
|
308
|
+
## Contributors
|
|
309
|
+
|
|
310
|
+
[](https://github.com/chriskyfung/InstapaperScraper/graphs/contributors)
|
|
246
311
|
|
|
247
|
-
|
|
312
|
+
Made with [contrib.rocks](https://contrib.rocks).
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "instapaper-scraper"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.1"
|
|
8
8
|
description = "A tool to scrape articles from Instapaper."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -35,7 +35,7 @@ dependencies = [
|
|
|
35
35
|
"soupsieve~=2.8",
|
|
36
36
|
"typing_extensions~=4.15.0",
|
|
37
37
|
"urllib3>=2.5,<2.7",
|
|
38
|
-
"tomli~=2.0.1; python_version < '3.11'"
|
|
38
|
+
"tomli~=2.0.1; python_version < '3.11'"
|
|
39
39
|
]
|
|
40
40
|
|
|
41
41
|
[project.urls]
|
|
@@ -46,24 +46,40 @@ Issues = "https://github.com/chriskyfung/InstapaperScraper/issues"
|
|
|
46
46
|
[project.scripts]
|
|
47
47
|
instapaper-scraper = "instapaper_scraper.cli:main"
|
|
48
48
|
|
|
49
|
+
[tool.licensecheck]
|
|
50
|
+
license = "GPL-3.0"
|
|
51
|
+
|
|
49
52
|
[tool.pytest.ini_options]
|
|
50
53
|
pythonpath = "src"
|
|
51
54
|
|
|
52
|
-
[tool.black]
|
|
53
|
-
line-length = 88
|
|
54
|
-
|
|
55
55
|
[tool.ruff]
|
|
56
56
|
line-length = 88
|
|
57
57
|
|
|
58
|
+
[tool.ruff.format]
|
|
59
|
+
quote-style = "double"
|
|
60
|
+
|
|
61
|
+
[tool.mypy]
|
|
62
|
+
python_version = "3.9"
|
|
63
|
+
warn_return_any = true
|
|
64
|
+
warn_unused_configs = true
|
|
65
|
+
ignore_missing_imports = true
|
|
66
|
+
disallow_untyped_defs = true
|
|
67
|
+
|
|
68
|
+
[[tool.mypy.overrides]]
|
|
69
|
+
module = "tests.*"
|
|
70
|
+
disallow_untyped_defs = false
|
|
71
|
+
|
|
58
72
|
[project.optional-dependencies]
|
|
59
73
|
dev = [
|
|
60
74
|
"pytest",
|
|
61
75
|
"pytest-cov",
|
|
62
|
-
"black",
|
|
63
76
|
"ruff",
|
|
64
77
|
"types-requests",
|
|
65
78
|
"types-beautifulsoup4",
|
|
66
79
|
"requests-mock",
|
|
67
80
|
"build",
|
|
68
|
-
"twine"
|
|
81
|
+
"twine",
|
|
82
|
+
"mypy",
|
|
83
|
+
"pre-commit",
|
|
84
|
+
"licensecheck"
|
|
69
85
|
]
|