instapaper-scraper 1.1.1__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {instapaper_scraper-1.1.1/src/instapaper_scraper.egg-info → instapaper_scraper-1.2.0}/PKG-INFO +56 -32
  2. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/README.md +54 -30
  3. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/pyproject.toml +2 -2
  4. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper/api.py +41 -6
  5. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper/cli.py +30 -5
  6. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper/constants.py +1 -0
  7. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper/output.py +50 -10
  8. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0/src/instapaper_scraper.egg-info}/PKG-INFO +56 -32
  9. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper.egg-info/SOURCES.txt +1 -0
  10. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper.egg-info/requires.txt +1 -1
  11. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/tests/test_api.py +108 -9
  12. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/tests/test_cli.py +69 -20
  13. instapaper_scraper-1.2.0/tests/test_cli_config_flags.py +367 -0
  14. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/tests/test_output.py +37 -3
  15. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/LICENSE +0 -0
  16. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/setup.cfg +0 -0
  17. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper/__init__.py +0 -0
  18. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper/auth.py +0 -0
  19. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper/exceptions.py +0 -0
  20. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper.egg-info/dependency_links.txt +0 -0
  21. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper.egg-info/entry_points.txt +0 -0
  22. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/src/instapaper_scraper.egg-info/top_level.txt +0 -0
  23. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/tests/test_auth.py +0 -0
  24. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/tests/test_cli_priority.py +0 -0
  25. {instapaper_scraper-1.1.1 → instapaper_scraper-1.2.0}/tests/test_init.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: instapaper-scraper
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: A tool to scrape articles from Instapaper.
5
5
  Project-URL: Homepage, https://github.com/chriskyfung/InstapaperScraper
6
6
  Project-URL: Source, https://github.com/chriskyfung/InstapaperScraper
@@ -21,7 +21,7 @@ Requires-Python: >=3.9
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: beautifulsoup4~=4.14.2
24
- Requires-Dist: certifi~=2025.11.12
24
+ Requires-Dist: certifi<2026.2.0,>=2025.11.12
25
25
  Requires-Dist: charset-normalizer~=3.4.3
26
26
  Requires-Dist: cryptography~=46.0.3
27
27
  Requires-Dist: guara~=0.0.14
@@ -72,16 +72,20 @@ Dynamic: license-file
72
72
  <a href="https://www.gnu.org/licenses/gpl-3.0.en.html">
73
73
  <img src="https://img.shields.io/github/license/chriskyfung/InstapaperScraper" alt="GitHub License">
74
74
  </a>
75
+ </p>
76
+
77
+ A powerful and reliable Python tool to automate the export of all your saved Instapaper bookmarks into various formats, giving you full ownership of your data.
78
+
79
+ <!-- Sponsors -->
80
+ <p align="center">
75
81
  <a href="https://github.com/sponsors/chriskyfung" title="Sponsor on GitHub">
76
- <img src="https://img.shields.io/badge/Sponsor-GitHub-blue?logo=github-sponsors&colorA=263238&colorB=EC407A" alt="GitHub Sponsors Default">
82
+ <img src="https://img.shields.io/badge/Sponsor-GitHub-blue?style=for-the-badge&logo=github-sponsors&colorA=263238&colorB=EC407A" alt="GitHub Sponsors Default">
77
83
  </a>
78
84
  <a href="https://www.buymeacoffee.com/chriskyfung" title="Support Coffee">
79
- <img src="https://img.shields.io/badge/Support-Coffee-ffdd00?logo=buy-me-a-coffee&logoColor=ffdd00&colorA=263238" alt="Buy Me A Coffee">
85
+ <img src="https://img.shields.io/badge/Support-Coffee-ffdd00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=ffdd00&colorA=263238" alt="Buy Me A Coffee">
80
86
  </a>
81
87
  </p>
82
88
 
83
- A powerful and reliable Python tool to automate the export of all your saved Instapaper bookmarks into various formats, giving you full ownership of your data.
84
-
85
89
  ## ✨ Features
86
90
 
87
91
  - Scrapes all bookmarks from your Instapaper account.
@@ -141,9 +145,9 @@ The script authenticates using one of the following methods, in order of priorit
141
145
 
142
146
  > **Note on Security:** Your session file (`.instapaper_session`) and the encryption key (`.session_key`) are stored with secure permissions (read/write for the owner only) to protect your credentials.
143
147
 
144
- ### 📁 Folder Configuration
148
+ ### 📁 Folder and Field Configuration
145
149
 
146
- You can define and quickly access your Instapaper folders using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
150
+ You can define and quickly access your Instapaper folders and set default output fields using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
147
151
 
148
152
  1. The path specified by the `--config-path` argument.
149
153
  2. `config.toml` in the current working directory.
@@ -155,6 +159,12 @@ Here is an example of `config.toml`:
155
159
  # Default output filename for non-folder mode
156
160
  output_filename = "home-articles.csv"
157
161
 
162
+ # Optional fields to include in the output.
163
+ # These can be overridden by command-line flags.
164
+ [fields]
165
+ read_url = false
166
+ article_preview = false
167
+
158
168
  [[folders]]
159
169
  key = "ml"
160
170
  id = "1234567"
@@ -169,10 +179,14 @@ output_filename = "python-articles.db"
169
179
  ```
170
180
 
171
181
  - **output_filename (top-level)**: The default output filename to use when not in folder mode.
172
- - **key**: A short alias for the folder.
173
- - **id**: The folder ID from the Instapaper URL.
174
- - **slug**: The human-readable part of the folder URL.
175
- - **output_filename (folder-specific)**: A preset output filename for scraped articles from this specific folder.
182
+ - **[fields]**: A section to control which optional data fields are included in the output.
183
+ - `read_url`: Set to `true` to include the Instapaper read URL for each article.
184
+ - `article_preview`: Set to `true` to include the article's text preview.
185
+ - **[[folders]]**: Each `[[folders]]` block defines a specific folder.
186
+ - **key**: A short alias for the folder.
187
+ - **id**: The folder ID from the Instapaper URL.
188
+ - **slug**: The human-readable part of the folder URL.
189
+ - **output_filename (folder-specific)**: A preset output filename for scraped articles from this specific folder.
176
190
 
177
191
  When a `config.toml` file is present and no `--folder` argument is provided, the scraper will prompt you to select a folder. You can also specify a folder directly using the `--folder` argument with its key, ID, or slug. Use `--folder=none` to explicitly disable folder mode and scrape all articles.
178
192
 
@@ -186,7 +200,8 @@ When a `config.toml` file is present and no `--folder` argument is provided, the
186
200
  | `--output <filename>` | Specify a custom output filename. The file extension will be automatically corrected to match the selected format. |
187
201
  | `--username <user>` | Your Instapaper account username. |
188
202
  | `--password <pass>` | Your Instapaper account password. |
189
- | `--add-instapaper-url` | Adds a `instapaper_url` column to the output, containing a full, clickable URL for each article. |
203
+ | `--[no-]read-url` | Includes the Instapaper read URL. (Old flag `--add-instapaper-url` is deprecated but supported). Can be set in `config.toml`. Overrides config. |
204
+ | `--[no-]article-preview` | Includes the article preview text. (Old flag `--add-article-preview` is deprecated but supported). Can be set in `config.toml`. Overrides config. |
190
205
 
191
206
  ### 📄 Output Formats
192
207
 
@@ -204,10 +219,10 @@ When using `--output <filename>`, the file extension is automatically corrected
204
219
 
205
220
  The output data includes a unique `id` for each article. You can use this ID to construct a URL to the article's reader view: `https://www.instapaper.com/read/<article_id>`.
206
221
 
207
- For convenience, you can use the `--add-instapaper-url` flag to have the script include a full, clickable URL in the output.
222
+ For convenience, you can use the `--read-url` flag to have the script include a full, clickable URL in the output.
208
223
 
209
224
  ```sh
210
- instapaper-scraper --add-instapaper-url
225
+ instapaper-scraper --read-url
211
226
  ```
212
227
 
213
228
  This adds a `instapaper_url` field to each article in the JSON output and a `instapaper_url` column in the CSV and SQLite outputs. The original `id` field is preserved.
@@ -223,15 +238,15 @@ The tool is designed with a modular architecture for reliability and maintainabi
223
238
 
224
239
  ## 📊 Example Output
225
240
 
226
- ### 📄 CSV (`output/bookmarks.csv`) (with --add-instapaper-url)
241
+ ### 📄 CSV (`output/bookmarks.csv`) (with --add-instapaper-url and --add-article-preview)
227
242
 
228
243
  ```csv
229
- "id","instapaper_url","title","url"
230
- "999901234","https://www.instapaper.com/read/999901234","Article 1","https://www.example.com/page-1/"
231
- "999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/"
244
+ "id","instapaper_url","title","url","article_preview"
245
+ "999901234","https://www.instapaper.com/read/999901234","Article 1","https://www.example.com/page-1/","This is a preview of article 1."
246
+ "999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/","This is a preview of article 2."
232
247
  ```
233
248
 
234
- ### 📄 JSON (`output/bookmarks.json`) (with --add-instapaper-url)
249
+ ### 📄 JSON (`output/bookmarks.json`) (with --add-instapaper-url and --add-article-preview)
235
250
 
236
251
  ```json
237
252
  [
@@ -239,13 +254,15 @@ The tool is designed with a modular architecture for reliability and maintainabi
239
254
  "id": "999901234",
240
255
  "title": "Article 1",
241
256
  "url": "https://www.example.com/page-1/",
242
- "instapaper_url": "https://www.instapaper.com/read/999901234"
257
+ "instapaper_url": "https://www.instapaper.com/read/999901234",
258
+ "article_preview": "This is a preview of article 1."
243
259
  },
244
260
  {
245
261
  "id": "999002345",
246
262
  "title": "Article 2",
247
263
  "url": "https://www.example.com/page-2/",
248
- "instapaper_url": "https://www.instapaper.com/read/999002345"
264
+ "instapaper_url": "https://www.instapaper.com/read/999002345",
265
+ "article_preview": "This is a preview of article 2."
249
266
  }
250
267
  ]
251
268
  ```
@@ -274,7 +291,18 @@ Please read the **[Contribution Guidelines](CONTRIBUTING.md)** before you start.
274
291
 
275
292
  ## 🧑‍💻 Development & Testing
276
293
 
277
- This project uses `pytest` for testing, `ruff` for code formatting and linting, and `mypy` for static type checking.
294
+ This project uses `pytest` for testing, `ruff` for code formatting and linting, and `mypy` for static type checking. A `Makefile` is provided to simplify common development tasks.
295
+
296
+ ### 🚀 Using the Makefile
297
+
298
+ The most common commands are:
299
+ - `make install`: Installs development dependencies.
300
+ - `make format`: Formats the entire codebase.
301
+ - `make check`: Runs the linter, type checker, and test suite.
302
+ - `make test`: Runs the test suite.
303
+ - `make build`: Builds the distributable packages.
304
+
305
+ Run `make help` to see all available commands.
278
306
 
279
307
  ### 🔧 Setup
280
308
 
@@ -300,13 +328,13 @@ python -m src.instapaper_scraper.cli
300
328
 
301
329
  ### ✅ Testing
302
330
 
303
- To run the tests, execute the following command from the project root:
331
+ To run the tests, execute the following command from the project root (or use `make test`):
304
332
 
305
333
  ```sh
306
334
  pytest
307
335
  ```
308
336
 
309
- To check test coverage:
337
+ To check test coverage (or use `make test-cov`):
310
338
 
311
339
  ```sh
312
340
  pytest --cov=src/instapaper_scraper --cov-report=term-missing
@@ -314,6 +342,8 @@ pytest --cov=src/instapaper_scraper --cov-report=term-missing
314
342
 
315
343
  ### ✨ Code Quality
316
344
 
345
+ You can use the `Makefile` for convenience (e.g., `make format`, `make lint`).
346
+
317
347
  To format the code with `ruff`:
318
348
 
319
349
  ```sh
@@ -326,12 +356,6 @@ To check for linting errors with `ruff`:
326
356
  ruff check .
327
357
  ```
328
358
 
329
- To automatically fix linting errors:
330
-
331
- ```sh
332
- ruff check . --fix
333
- ```
334
-
335
359
  To run static type checking with `mypy`:
336
360
 
337
361
  ```sh
@@ -341,7 +365,7 @@ mypy src
341
365
  To run license checks:
342
366
 
343
367
  ```sh
344
- licensecheck --show-only-failing
368
+ licensecheck --zero
345
369
  ```
346
370
 
347
371
 
@@ -24,16 +24,20 @@
24
24
  <a href="https://www.gnu.org/licenses/gpl-3.0.en.html">
25
25
  <img src="https://img.shields.io/github/license/chriskyfung/InstapaperScraper" alt="GitHub License">
26
26
  </a>
27
+ </p>
28
+
29
+ A powerful and reliable Python tool to automate the export of all your saved Instapaper bookmarks into various formats, giving you full ownership of your data.
30
+
31
+ <!-- Sponsors -->
32
+ <p align="center">
27
33
  <a href="https://github.com/sponsors/chriskyfung" title="Sponsor on GitHub">
28
- <img src="https://img.shields.io/badge/Sponsor-GitHub-blue?logo=github-sponsors&colorA=263238&colorB=EC407A" alt="GitHub Sponsors Default">
34
+ <img src="https://img.shields.io/badge/Sponsor-GitHub-blue?style=for-the-badge&logo=github-sponsors&colorA=263238&colorB=EC407A" alt="GitHub Sponsors Default">
29
35
  </a>
30
36
  <a href="https://www.buymeacoffee.com/chriskyfung" title="Support Coffee">
31
- <img src="https://img.shields.io/badge/Support-Coffee-ffdd00?logo=buy-me-a-coffee&logoColor=ffdd00&colorA=263238" alt="Buy Me A Coffee">
37
+ <img src="https://img.shields.io/badge/Support-Coffee-ffdd00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=ffdd00&colorA=263238" alt="Buy Me A Coffee">
32
38
  </a>
33
39
  </p>
34
40
 
35
- A powerful and reliable Python tool to automate the export of all your saved Instapaper bookmarks into various formats, giving you full ownership of your data.
36
-
37
41
  ## ✨ Features
38
42
 
39
43
  - Scrapes all bookmarks from your Instapaper account.
@@ -93,9 +97,9 @@ The script authenticates using one of the following methods, in order of priorit
93
97
 
94
98
  > **Note on Security:** Your session file (`.instapaper_session`) and the encryption key (`.session_key`) are stored with secure permissions (read/write for the owner only) to protect your credentials.
95
99
 
96
- ### 📁 Folder Configuration
100
+ ### 📁 Folder and Field Configuration
97
101
 
98
- You can define and quickly access your Instapaper folders using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
102
+ You can define and quickly access your Instapaper folders and set default output fields using a `config.toml` file. The scraper will look for this file in the following locations (in order of precedence):
99
103
 
100
104
  1. The path specified by the `--config-path` argument.
101
105
  2. `config.toml` in the current working directory.
@@ -107,6 +111,12 @@ Here is an example of `config.toml`:
107
111
  # Default output filename for non-folder mode
108
112
  output_filename = "home-articles.csv"
109
113
 
114
+ # Optional fields to include in the output.
115
+ # These can be overridden by command-line flags.
116
+ [fields]
117
+ read_url = false
118
+ article_preview = false
119
+
110
120
  [[folders]]
111
121
  key = "ml"
112
122
  id = "1234567"
@@ -121,10 +131,14 @@ output_filename = "python-articles.db"
121
131
  ```
122
132
 
123
133
  - **output_filename (top-level)**: The default output filename to use when not in folder mode.
124
- - **key**: A short alias for the folder.
125
- - **id**: The folder ID from the Instapaper URL.
126
- - **slug**: The human-readable part of the folder URL.
127
- - **output_filename (folder-specific)**: A preset output filename for scraped articles from this specific folder.
134
+ - **[fields]**: A section to control which optional data fields are included in the output.
135
+ - `read_url`: Set to `true` to include the Instapaper read URL for each article.
136
+ - `article_preview`: Set to `true` to include the article's text preview.
137
+ - **[[folders]]**: Each `[[folders]]` block defines a specific folder.
138
+ - **key**: A short alias for the folder.
139
+ - **id**: The folder ID from the Instapaper URL.
140
+ - **slug**: The human-readable part of the folder URL.
141
+ - **output_filename (folder-specific)**: A preset output filename for scraped articles from this specific folder.
128
142
 
129
143
  When a `config.toml` file is present and no `--folder` argument is provided, the scraper will prompt you to select a folder. You can also specify a folder directly using the `--folder` argument with its key, ID, or slug. Use `--folder=none` to explicitly disable folder mode and scrape all articles.
130
144
 
@@ -138,7 +152,8 @@ When a `config.toml` file is present and no `--folder` argument is provided, the
138
152
  | `--output <filename>` | Specify a custom output filename. The file extension will be automatically corrected to match the selected format. |
139
153
  | `--username <user>` | Your Instapaper account username. |
140
154
  | `--password <pass>` | Your Instapaper account password. |
141
- | `--add-instapaper-url` | Adds a `instapaper_url` column to the output, containing a full, clickable URL for each article. |
155
+ | `--[no-]read-url` | Includes the Instapaper read URL. (Old flag `--add-instapaper-url` is deprecated but supported). Can be set in `config.toml`. Overrides config. |
156
+ | `--[no-]article-preview` | Includes the article preview text. (Old flag `--add-article-preview` is deprecated but supported). Can be set in `config.toml`. Overrides config. |
142
157
 
143
158
  ### 📄 Output Formats
144
159
 
@@ -156,10 +171,10 @@ When using `--output <filename>`, the file extension is automatically corrected
156
171
 
157
172
  The output data includes a unique `id` for each article. You can use this ID to construct a URL to the article's reader view: `https://www.instapaper.com/read/<article_id>`.
158
173
 
159
- For convenience, you can use the `--add-instapaper-url` flag to have the script include a full, clickable URL in the output.
174
+ For convenience, you can use the `--read-url` flag to have the script include a full, clickable URL in the output.
160
175
 
161
176
  ```sh
162
- instapaper-scraper --add-instapaper-url
177
+ instapaper-scraper --read-url
163
178
  ```
164
179
 
165
180
  This adds a `instapaper_url` field to each article in the JSON output and a `instapaper_url` column in the CSV and SQLite outputs. The original `id` field is preserved.
@@ -175,15 +190,15 @@ The tool is designed with a modular architecture for reliability and maintainabi
175
190
 
176
191
  ## 📊 Example Output
177
192
 
178
- ### 📄 CSV (`output/bookmarks.csv`) (with --add-instapaper-url)
193
+ ### 📄 CSV (`output/bookmarks.csv`) (with --add-instapaper-url and --add-article-preview)
179
194
 
180
195
  ```csv
181
- "id","instapaper_url","title","url"
182
- "999901234","https://www.instapaper.com/read/999901234","Article 1","https://www.example.com/page-1/"
183
- "999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/"
196
+ "id","instapaper_url","title","url","article_preview"
197
+ "999901234","https://www.instapaper.com/read/999901234","Article 1","https://www.example.com/page-1/","This is a preview of article 1."
198
+ "999002345","https://www.instapaper.com/read/999002345","Article 2","https://www.example.com/page-2/","This is a preview of article 2."
184
199
  ```
185
200
 
186
- ### 📄 JSON (`output/bookmarks.json`) (with --add-instapaper-url)
201
+ ### 📄 JSON (`output/bookmarks.json`) (with --add-instapaper-url and --add-article-preview)
187
202
 
188
203
  ```json
189
204
  [
@@ -191,13 +206,15 @@ The tool is designed with a modular architecture for reliability and maintainabi
191
206
  "id": "999901234",
192
207
  "title": "Article 1",
193
208
  "url": "https://www.example.com/page-1/",
194
- "instapaper_url": "https://www.instapaper.com/read/999901234"
209
+ "instapaper_url": "https://www.instapaper.com/read/999901234",
210
+ "article_preview": "This is a preview of article 1."
195
211
  },
196
212
  {
197
213
  "id": "999002345",
198
214
  "title": "Article 2",
199
215
  "url": "https://www.example.com/page-2/",
200
- "instapaper_url": "https://www.instapaper.com/read/999002345"
216
+ "instapaper_url": "https://www.instapaper.com/read/999002345",
217
+ "article_preview": "This is a preview of article 2."
201
218
  }
202
219
  ]
203
220
  ```
@@ -226,7 +243,18 @@ Please read the **[Contribution Guidelines](CONTRIBUTING.md)** before you start.
226
243
 
227
244
  ## 🧑‍💻 Development & Testing
228
245
 
229
- This project uses `pytest` for testing, `ruff` for code formatting and linting, and `mypy` for static type checking.
246
+ This project uses `pytest` for testing, `ruff` for code formatting and linting, and `mypy` for static type checking. A `Makefile` is provided to simplify common development tasks.
247
+
248
+ ### 🚀 Using the Makefile
249
+
250
+ The most common commands are:
251
+ - `make install`: Installs development dependencies.
252
+ - `make format`: Formats the entire codebase.
253
+ - `make check`: Runs the linter, type checker, and test suite.
254
+ - `make test`: Runs the test suite.
255
+ - `make build`: Builds the distributable packages.
256
+
257
+ Run `make help` to see all available commands.
230
258
 
231
259
  ### 🔧 Setup
232
260
 
@@ -252,13 +280,13 @@ python -m src.instapaper_scraper.cli
252
280
 
253
281
  ### ✅ Testing
254
282
 
255
- To run the tests, execute the following command from the project root:
283
+ To run the tests, execute the following command from the project root (or use `make test`):
256
284
 
257
285
  ```sh
258
286
  pytest
259
287
  ```
260
288
 
261
- To check test coverage:
289
+ To check test coverage (or use `make test-cov`):
262
290
 
263
291
  ```sh
264
292
  pytest --cov=src/instapaper_scraper --cov-report=term-missing
@@ -266,6 +294,8 @@ pytest --cov=src/instapaper_scraper --cov-report=term-missing
266
294
 
267
295
  ### ✨ Code Quality
268
296
 
297
+ You can use the `Makefile` for convenience (e.g., `make format`, `make lint`).
298
+
269
299
  To format the code with `ruff`:
270
300
 
271
301
  ```sh
@@ -278,12 +308,6 @@ To check for linting errors with `ruff`:
278
308
  ruff check .
279
309
  ```
280
310
 
281
- To automatically fix linting errors:
282
-
283
- ```sh
284
- ruff check . --fix
285
- ```
286
-
287
311
  To run static type checking with `mypy`:
288
312
 
289
313
  ```sh
@@ -293,7 +317,7 @@ mypy src
293
317
  To run license checks:
294
318
 
295
319
  ```sh
296
- licensecheck --show-only-failing
320
+ licensecheck --zero
297
321
  ```
298
322
 
299
323
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "instapaper-scraper"
7
- version = "1.1.1"
7
+ version = "1.2.0"
8
8
  description = "A tool to scrape articles from Instapaper."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -25,7 +25,7 @@ classifiers = [
25
25
  license-files = ["LICEN[CS]E*"]
26
26
  dependencies = [
27
27
  "beautifulsoup4~=4.14.2",
28
- "certifi~=2025.11.12",
28
+ "certifi>=2025.11.12,<2026.2.0",
29
29
  "charset-normalizer~=3.4.3",
30
30
  "cryptography~=46.0.3",
31
31
  "guara~=0.0.14",
@@ -8,7 +8,13 @@ from bs4 import BeautifulSoup
8
8
  from bs4.element import Tag
9
9
 
10
10
  from .exceptions import ScraperStructureChanged
11
- from .constants import INSTAPAPER_BASE_URL, KEY_ID, KEY_TITLE, KEY_URL
11
+ from .constants import (
12
+ INSTAPAPER_BASE_URL,
13
+ KEY_ID,
14
+ KEY_TITLE,
15
+ KEY_URL,
16
+ KEY_ARTICLE_PREVIEW,
17
+ )
12
18
 
13
19
 
14
20
  class InstapaperClient:
@@ -34,6 +40,7 @@ class InstapaperClient:
34
40
  PAGINATE_OLDER_CLASS = "paginate_older"
35
41
  ARTICLE_TITLE_CLASS = "article_title"
36
42
  TITLE_META_CLASS = "title_meta"
43
+ ARTICLE_PREVIEW_CLASS = "article_preview"
37
44
 
38
45
  # URL paths
39
46
  URL_PATH_USER = "/u/"
@@ -102,12 +109,14 @@ class InstapaperClient:
102
109
  self,
103
110
  page: int = DEFAULT_PAGE_START,
104
111
  folder_info: Optional[Dict[str, str]] = None,
112
+ add_article_preview: bool = False,
105
113
  ) -> Tuple[List[Dict[str, str]], bool]:
106
114
  """
107
115
  Fetches a single page of articles and determines if there are more pages.
108
116
  Args:
109
117
  page: The page number to fetch.
110
118
  folder_info: A dictionary containing 'id' and 'slug' of the folder to fetch articles from.
119
+ add_article_preview: Whether to include the article preview.
111
120
  Returns:
112
121
  A tuple containing:
113
122
  - A list of article data (dictionaries with id, title, url).
@@ -147,7 +156,9 @@ class InstapaperClient:
147
156
  article_id_val.replace(self.ARTICLE_ID_PREFIX, "")
148
157
  )
149
158
 
150
- data = self._parse_article_data(soup, article_ids, page)
159
+ data = self._parse_article_data(
160
+ soup, article_ids, page, add_article_preview
161
+ )
151
162
  has_more = soup.find(class_=self.PAGINATE_OLDER_CLASS) is not None
152
163
 
153
164
  return data, has_more
@@ -185,13 +196,17 @@ class InstapaperClient:
185
196
  raise Exception(self.MSG_SCRAPING_FAILED_UNKNOWN)
186
197
 
187
198
  def get_all_articles(
188
- self, limit: Optional[int] = None, folder_info: Optional[Dict[str, str]] = None
199
+ self,
200
+ limit: Optional[int] = None,
201
+ folder_info: Optional[Dict[str, str]] = None,
202
+ add_article_preview: bool = False,
189
203
  ) -> List[Dict[str, str]]:
190
204
  """
191
205
  Iterates through pages and fetches articles up to a specified limit.
192
206
  Args:
193
207
  limit: The maximum number of pages to scrape. If None, scrapes all pages.
194
208
  folder_info: A dictionary containing 'id' and 'slug' of the folder to fetch articles from.
209
+ add_article_preview: Whether to include the article preview.
195
210
  """
196
211
  all_articles = []
197
212
  page = self.DEFAULT_PAGE_START
@@ -202,7 +217,11 @@ class InstapaperClient:
202
217
  break
203
218
 
204
219
  logging.info(self.MSG_SCRAPING_PAGE.format(page=page))
205
- data, has_more = self.get_articles(page=page, folder_info=folder_info)
220
+ data, has_more = self.get_articles(
221
+ page=page,
222
+ folder_info=folder_info,
223
+ add_article_preview=add_article_preview,
224
+ )
206
225
  if data:
207
226
  all_articles.extend(data)
208
227
  page += 1
@@ -217,7 +236,11 @@ class InstapaperClient:
217
236
  return f"{INSTAPAPER_BASE_URL}{self.URL_PATH_USER}{page}"
218
237
 
219
238
  def _parse_article_data(
220
- self, soup: BeautifulSoup, article_ids: List[str], page: int
239
+ self,
240
+ soup: BeautifulSoup,
241
+ article_ids: List[str],
242
+ page: int,
243
+ add_article_preview: bool = False,
221
244
  ) -> List[Dict[str, Any]]:
222
245
  """Parses the raw HTML to extract structured data for each article."""
223
246
  data = []
@@ -249,7 +272,19 @@ class InstapaperClient:
249
272
  raise AttributeError(self.MSG_LINK_ELEMENT_NOT_FOUND)
250
273
  link = link_element["href"]
251
274
 
252
- data.append({KEY_ID: article_id, KEY_TITLE: title, KEY_URL: link})
275
+ article_data = {KEY_ID: article_id, KEY_TITLE: title, KEY_URL: link}
276
+
277
+ if add_article_preview:
278
+ preview_element = article_element.find(
279
+ class_=self.ARTICLE_PREVIEW_CLASS
280
+ )
281
+ article_data[KEY_ARTICLE_PREVIEW] = (
282
+ preview_element.get_text().strip()
283
+ if isinstance(preview_element, Tag)
284
+ else ""
285
+ )
286
+
287
+ data.append(article_data)
253
288
  except AttributeError as e:
254
289
  logging.warning(
255
290
  self.MSG_PARSE_ARTICLE_WARNING.format(
@@ -102,9 +102,18 @@ def main() -> None:
102
102
  parser.add_argument("--username", help="Instapaper username.")
103
103
  parser.add_argument("--password", help="Instapaper password.")
104
104
  parser.add_argument(
105
- "--add-instapaper-url",
106
- action="store_true",
107
- help="Add an 'instapaper_url' column to the output with the full Instapaper read URL.",
105
+ "--read-url", # New, preferred flag
106
+ "--add-instapaper-url", # Old, for backward compatibility
107
+ dest="add_instapaper_url",
108
+ action=argparse.BooleanOptionalAction,
109
+ help="Include the Instapaper read URL. Overrides config.",
110
+ )
111
+ parser.add_argument(
112
+ "--article-preview", # New, preferred flag
113
+ "--add-article-preview", # Old, for backward compatibility
114
+ dest="add_article_preview",
115
+ action=argparse.BooleanOptionalAction,
116
+ help="Include the article preview text. Overrides config.",
108
117
  )
109
118
  parser.add_argument(
110
119
  "--limit",
@@ -120,8 +129,21 @@ def main() -> None:
120
129
 
121
130
  config = load_config(args.config_path)
122
131
  folders = config.get("folders", []) if config else []
132
+ fields_config = config.get("fields", {}) if config else {}
123
133
  selected_folder = None
124
134
 
135
+ # Resolve boolean flags, giving CLI priority over config
136
+ final_add_instapaper_url = (
137
+ args.add_instapaper_url
138
+ if args.add_instapaper_url is not None
139
+ else fields_config.get("read_url", False)
140
+ )
141
+ final_add_article_preview = (
142
+ args.add_article_preview
143
+ if args.add_article_preview is not None
144
+ else fields_config.get("article_preview", False)
145
+ )
146
+
125
147
  if args.folder:
126
148
  if args.folder.lower() == "none":
127
149
  selected_folder = None
@@ -196,7 +218,9 @@ def main() -> None:
196
218
  try:
197
219
  folder_info = selected_folder if selected_folder else None
198
220
  all_articles = client.get_all_articles(
199
- limit=args.limit, folder_info=folder_info
221
+ limit=args.limit,
222
+ folder_info=folder_info,
223
+ add_article_preview=final_add_article_preview,
200
224
  )
201
225
  except ScraperStructureChanged as e:
202
226
  logging.error(f"Stopping scraper due to an unrecoverable error: {e}")
@@ -214,7 +238,8 @@ def main() -> None:
214
238
  all_articles,
215
239
  args.format,
216
240
  output_filename,
217
- add_instapaper_url=args.add_instapaper_url,
241
+ add_instapaper_url=final_add_instapaper_url,
242
+ add_article_preview=final_add_article_preview,
218
243
  )
219
244
  logging.info("Articles scraped and saved successfully.")
220
245
  except Exception as e:
@@ -15,3 +15,4 @@ CONFIG_DIR = Path.home() / ".config" / APP_NAME
15
15
  KEY_ID = "id"
16
16
  KEY_TITLE = "title"
17
17
  KEY_URL = "url"
18
+ KEY_ARTICLE_PREVIEW = "article_preview"