ilpost-api-wrapper 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ilpost_api_wrapper-0.1.0/LICENSE +21 -0
- ilpost_api_wrapper-0.1.0/PKG-INFO +202 -0
- ilpost_api_wrapper-0.1.0/README.md +176 -0
- ilpost_api_wrapper-0.1.0/ilpost/__init__.py +13 -0
- ilpost_api_wrapper-0.1.0/ilpost/client.py +204 -0
- ilpost_api_wrapper-0.1.0/ilpost/models.py +145 -0
- ilpost_api_wrapper-0.1.0/ilpost/py.typed +0 -0
- ilpost_api_wrapper-0.1.0/ilpost_api_wrapper.egg-info/PKG-INFO +202 -0
- ilpost_api_wrapper-0.1.0/ilpost_api_wrapper.egg-info/SOURCES.txt +11 -0
- ilpost_api_wrapper-0.1.0/ilpost_api_wrapper.egg-info/dependency_links.txt +1 -0
- ilpost_api_wrapper-0.1.0/ilpost_api_wrapper.egg-info/top_level.txt +2 -0
- ilpost_api_wrapper-0.1.0/pyproject.toml +37 -0
- ilpost_api_wrapper-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Antonio Girasella
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ilpost-api-wrapper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python wrapper for Il Post newspaper API
|
|
5
|
+
Author: Antonio Girasella
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/girasella/ilpost-api-wrapper
|
|
8
|
+
Project-URL: Repository, https://github.com/girasella/ilpost-api-wrapper
|
|
9
|
+
Project-URL: Issues, https://github.com/girasella/ilpost-api-wrapper/issues
|
|
10
|
+
Keywords: ilpost,api,news,italy,search
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# ilpost-api-wrapper
|
|
28
|
+
|
|
29
|
+
A Python wrapper for the [Il Post](https://www.ilpost.it) public search API.
|
|
30
|
+
Searches articles, podcast episodes, and newsletters — no authentication required.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install ilpost-api-wrapper
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Requires Python 3.9+. No third-party dependencies.
|
|
39
|
+
|
|
40
|
+
## Quick start
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from ilpost import IlPostClient, SortOrder, ContentType, DateRange
|
|
44
|
+
|
|
45
|
+
client = IlPostClient()
|
|
46
|
+
|
|
47
|
+
result = client.search("berlusconi")
|
|
48
|
+
for doc in result.docs:
|
|
49
|
+
print(doc.title, doc.link)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## API reference
|
|
53
|
+
|
|
54
|
+
### `IlPostClient(timeout=10)`
|
|
55
|
+
|
|
56
|
+
| Method | Description |
|
|
57
|
+
|--------|-------------|
|
|
58
|
+
| `search(query, ...)` | General search across all content types |
|
|
59
|
+
| `search_articles(query, ...)` | Articles only |
|
|
60
|
+
| `search_podcasts(query, ...)` | Podcast episodes only |
|
|
61
|
+
| `search_newsletters(query, ...)` | Newsletter issues only |
|
|
62
|
+
| `paginate(query, ...)` | Generator that yields one `SearchResult` per page |
|
|
63
|
+
|
|
64
|
+
#### Common parameters
|
|
65
|
+
|
|
66
|
+
| Parameter | Type | Default | Description |
|
|
67
|
+
|-----------|------|---------|-------------|
|
|
68
|
+
| `query` | `str` | — | Search term |
|
|
69
|
+
| `page` | `int` | `1` | Page number (1-based) |
|
|
70
|
+
| `hits` | `int` | `10` | Results per page |
|
|
71
|
+
| `sort` | `SortOrder` | `RELEVANCE` | Sort order |
|
|
72
|
+
| `content_type` | `ContentType` | `None` | Filter by content type |
|
|
73
|
+
| `category` | `str` | `None` | Editorial category (articles only) |
|
|
74
|
+
| `date_range` | `DateRange` | `None` | Publication date filter |
|
|
75
|
+
|
|
76
|
+
#### Enums
|
|
77
|
+
|
|
78
|
+
**`SortOrder`**
|
|
79
|
+
| Value | Description |
|
|
80
|
+
|-------|-------------|
|
|
81
|
+
| `RELEVANCE` | Sort by relevance score (default) |
|
|
82
|
+
| `NEWEST` | Most recent first |
|
|
83
|
+
| `OLDEST` | Oldest first |
|
|
84
|
+
|
|
85
|
+
**`ContentType`**
|
|
86
|
+
| Value | Description |
|
|
87
|
+
|-------|-------------|
|
|
88
|
+
| `ARTICLES` | Articles and news posts |
|
|
89
|
+
| `PODCASTS` | Podcast episodes |
|
|
90
|
+
| `NEWSLETTERS` | Newsletter issues |
|
|
91
|
+
|
|
92
|
+
**`DateRange`**
|
|
93
|
+
| Value | Description |
|
|
94
|
+
|-------|-------------|
|
|
95
|
+
| `ALL_TIME` | Entire archive (default) |
|
|
96
|
+
| `PAST_YEAR` | Past 12 months |
|
|
97
|
+
| `PAST_30_DAYS` | Past 30 days |
|
|
98
|
+
|
|
99
|
+
### `SearchResult`
|
|
100
|
+
|
|
101
|
+
| Attribute | Type | Description |
|
|
102
|
+
|-----------|------|-------------|
|
|
103
|
+
| `total` | `int` | Total number of matching results |
|
|
104
|
+
| `docs` | `list[Document]` | Results for this page |
|
|
105
|
+
| `filters` | `list[FilterGroup]` | Available filters with counts |
|
|
106
|
+
| `sort` | `str` | Active sort value |
|
|
107
|
+
| `hits` | `int` | Page size |
|
|
108
|
+
| `page` | `int` | Current page number |
|
|
109
|
+
| `total_pages` | `int` | Total number of pages |
|
|
110
|
+
| `has_next_page` | `bool` | Whether a next page exists |
|
|
111
|
+
| `has_prev_page` | `bool` | Whether a previous page exists |
|
|
112
|
+
|
|
113
|
+
### `Document`
|
|
114
|
+
|
|
115
|
+
| Attribute | Type | Description |
|
|
116
|
+
|-----------|------|-------------|
|
|
117
|
+
| `id` | `int` | Unique content identifier |
|
|
118
|
+
| `type` | `str` | `"post"`, `"episodes"`, or `"newsletter"` |
|
|
119
|
+
| `title` | `str` | Content title |
|
|
120
|
+
| `link` | `str` | URL to the content page |
|
|
121
|
+
| `timestamp` | `str` | Publication date (ISO 8601, Italian local time) |
|
|
122
|
+
| `summary` | `str` | Short excerpt |
|
|
123
|
+
| `image` | `str` | Cover image URL |
|
|
124
|
+
| `score` | `float` | Relevance score (`0.0` when sorting by date) |
|
|
125
|
+
| `subscriber` | `bool` | `True` if content is paywalled |
|
|
126
|
+
| `highlight` | `str \| None` | Snippet with matched term in `<span>` tags |
|
|
127
|
+
| `category` | `str \| None` | Editorial category (articles only) |
|
|
128
|
+
| `post_tag_text` | `list[str]` | Tags (articles only) |
|
|
129
|
+
| `derived_info` | `dict` | Extra data: episode or newsletter metadata |
|
|
130
|
+
| `is_article` | `bool` | Convenience property |
|
|
131
|
+
| `is_podcast` | `bool` | Convenience property |
|
|
132
|
+
| `is_newsletter` | `bool` | Convenience property |
|
|
133
|
+
| `is_paywalled` | `bool` | Alias for `subscriber` |
|
|
134
|
+
|
|
135
|
+
## Examples
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from ilpost import IlPostClient, SortOrder, ContentType, DateRange
|
|
139
|
+
|
|
140
|
+
client = IlPostClient()
|
|
141
|
+
|
|
142
|
+
# Most recent articles in politics
|
|
143
|
+
result = client.search_articles(
|
|
144
|
+
"renzi",
|
|
145
|
+
sort=SortOrder.NEWEST,
|
|
146
|
+
category="politica",
|
|
147
|
+
date_range=DateRange.PAST_30_DAYS,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Podcast search
|
|
151
|
+
result = client.search_podcasts("cacao", sort=SortOrder.NEWEST)
|
|
152
|
+
|
|
153
|
+
# Paginate through all results, 5 per page
|
|
154
|
+
for page in client.paginate("sicilia", hits=5, max_pages=10):
|
|
155
|
+
print(f"Page {page.page}/{page.total_pages}")
|
|
156
|
+
for doc in page.docs:
|
|
157
|
+
print(f" [{doc.type}] {doc.title}")
|
|
158
|
+
|
|
159
|
+
# Access filter counts from a response
|
|
160
|
+
result = client.search("europa")
|
|
161
|
+
for group in result.filters:
|
|
162
|
+
print(f"{group.label}:")
|
|
163
|
+
for opt in group.options:
|
|
164
|
+
print(f" {opt.label}: {opt.doc_count}")
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## CLI
|
|
168
|
+
|
|
169
|
+
A command-line interface is provided via `main.py`:
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
usage: ilpost-search [-h] [--type {articles,podcasts,newsletters}]
|
|
173
|
+
[--sort {relevance,newest,oldest}]
|
|
174
|
+
[--date {all,year,month}] [--category CATEGORY]
|
|
175
|
+
[--page PAGE] [--hits HITS] [--all-pages]
|
|
176
|
+
[--max-pages N]
|
|
177
|
+
query
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
# Basic search
|
|
182
|
+
python main.py berlusconi
|
|
183
|
+
|
|
184
|
+
# Most recent articles in politics
|
|
185
|
+
python main.py renzi --type articles --sort newest --category politica
|
|
186
|
+
|
|
187
|
+
# Podcast search, past 30 days
|
|
188
|
+
python main.py cacao --type podcasts --date month
|
|
189
|
+
|
|
190
|
+
# Page 2, 5 results per page, oldest first
|
|
191
|
+
python main.py sicilia --sort oldest --hits 5 --page 2
|
|
192
|
+
|
|
193
|
+
# Fetch all pages of newsletter results (up to 3 pages)
|
|
194
|
+
python main.py economia --type newsletters --all-pages --max-pages 3
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Notes
|
|
198
|
+
|
|
199
|
+
- Paywalled content (`subscriber: true`) is included in search results — title, summary, and highlight are visible, but the full article requires an active ilpost.it subscription.
|
|
200
|
+
- When sorting by date (`NEWEST` or `OLDEST`), `score` is always `0.0`.
|
|
201
|
+
- The `category` filter only applies to articles. It is ignored by the server when `content_type=PODCASTS`.
|
|
202
|
+
- Timestamps are in Italian local time (CET/CEST) with no UTC offset.
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# ilpost-api-wrapper
|
|
2
|
+
|
|
3
|
+
A Python wrapper for the [Il Post](https://www.ilpost.it) public search API.
|
|
4
|
+
Searches articles, podcast episodes, and newsletters — no authentication required.
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install ilpost-api-wrapper
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Requires Python 3.9+. No third-party dependencies.
|
|
13
|
+
|
|
14
|
+
## Quick start
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
from ilpost import IlPostClient, SortOrder, ContentType, DateRange
|
|
18
|
+
|
|
19
|
+
client = IlPostClient()
|
|
20
|
+
|
|
21
|
+
result = client.search("berlusconi")
|
|
22
|
+
for doc in result.docs:
|
|
23
|
+
print(doc.title, doc.link)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## API reference
|
|
27
|
+
|
|
28
|
+
### `IlPostClient(timeout=10)`
|
|
29
|
+
|
|
30
|
+
| Method | Description |
|
|
31
|
+
|--------|-------------|
|
|
32
|
+
| `search(query, ...)` | General search across all content types |
|
|
33
|
+
| `search_articles(query, ...)` | Articles only |
|
|
34
|
+
| `search_podcasts(query, ...)` | Podcast episodes only |
|
|
35
|
+
| `search_newsletters(query, ...)` | Newsletter issues only |
|
|
36
|
+
| `paginate(query, ...)` | Generator that yields one `SearchResult` per page |
|
|
37
|
+
|
|
38
|
+
#### Common parameters
|
|
39
|
+
|
|
40
|
+
| Parameter | Type | Default | Description |
|
|
41
|
+
|-----------|------|---------|-------------|
|
|
42
|
+
| `query` | `str` | — | Search term |
|
|
43
|
+
| `page` | `int` | `1` | Page number (1-based) |
|
|
44
|
+
| `hits` | `int` | `10` | Results per page |
|
|
45
|
+
| `sort` | `SortOrder` | `RELEVANCE` | Sort order |
|
|
46
|
+
| `content_type` | `ContentType` | `None` | Filter by content type |
|
|
47
|
+
| `category` | `str` | `None` | Editorial category (articles only) |
|
|
48
|
+
| `date_range` | `DateRange` | `None` | Publication date filter |
|
|
49
|
+
|
|
50
|
+
#### Enums
|
|
51
|
+
|
|
52
|
+
**`SortOrder`**
|
|
53
|
+
| Value | Description |
|
|
54
|
+
|-------|-------------|
|
|
55
|
+
| `RELEVANCE` | Sort by relevance score (default) |
|
|
56
|
+
| `NEWEST` | Most recent first |
|
|
57
|
+
| `OLDEST` | Oldest first |
|
|
58
|
+
|
|
59
|
+
**`ContentType`**
|
|
60
|
+
| Value | Description |
|
|
61
|
+
|-------|-------------|
|
|
62
|
+
| `ARTICLES` | Articles and news posts |
|
|
63
|
+
| `PODCASTS` | Podcast episodes |
|
|
64
|
+
| `NEWSLETTERS` | Newsletter issues |
|
|
65
|
+
|
|
66
|
+
**`DateRange`**
|
|
67
|
+
| Value | Description |
|
|
68
|
+
|-------|-------------|
|
|
69
|
+
| `ALL_TIME` | Entire archive (default) |
|
|
70
|
+
| `PAST_YEAR` | Past 12 months |
|
|
71
|
+
| `PAST_30_DAYS` | Past 30 days |
|
|
72
|
+
|
|
73
|
+
### `SearchResult`
|
|
74
|
+
|
|
75
|
+
| Attribute | Type | Description |
|
|
76
|
+
|-----------|------|-------------|
|
|
77
|
+
| `total` | `int` | Total number of matching results |
|
|
78
|
+
| `docs` | `list[Document]` | Results for this page |
|
|
79
|
+
| `filters` | `list[FilterGroup]` | Available filters with counts |
|
|
80
|
+
| `sort` | `str` | Active sort value |
|
|
81
|
+
| `hits` | `int` | Page size |
|
|
82
|
+
| `page` | `int` | Current page number |
|
|
83
|
+
| `total_pages` | `int` | Total number of pages |
|
|
84
|
+
| `has_next_page` | `bool` | Whether a next page exists |
|
|
85
|
+
| `has_prev_page` | `bool` | Whether a previous page exists |
|
|
86
|
+
|
|
87
|
+
### `Document`
|
|
88
|
+
|
|
89
|
+
| Attribute | Type | Description |
|
|
90
|
+
|-----------|------|-------------|
|
|
91
|
+
| `id` | `int` | Unique content identifier |
|
|
92
|
+
| `type` | `str` | `"post"`, `"episodes"`, or `"newsletter"` |
|
|
93
|
+
| `title` | `str` | Content title |
|
|
94
|
+
| `link` | `str` | URL to the content page |
|
|
95
|
+
| `timestamp` | `str` | Publication date (ISO 8601, Italian local time) |
|
|
96
|
+
| `summary` | `str` | Short excerpt |
|
|
97
|
+
| `image` | `str` | Cover image URL |
|
|
98
|
+
| `score` | `float` | Relevance score (`0.0` when sorting by date) |
|
|
99
|
+
| `subscriber` | `bool` | `True` if content is paywalled |
|
|
100
|
+
| `highlight` | `str \| None` | Snippet with matched term in `<span>` tags |
|
|
101
|
+
| `category` | `str \| None` | Editorial category (articles only) |
|
|
102
|
+
| `post_tag_text` | `list[str]` | Tags (articles only) |
|
|
103
|
+
| `derived_info` | `dict` | Extra data: episode or newsletter metadata |
|
|
104
|
+
| `is_article` | `bool` | Convenience property |
|
|
105
|
+
| `is_podcast` | `bool` | Convenience property |
|
|
106
|
+
| `is_newsletter` | `bool` | Convenience property |
|
|
107
|
+
| `is_paywalled` | `bool` | Alias for `subscriber` |
|
|
108
|
+
|
|
109
|
+
## Examples
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from ilpost import IlPostClient, SortOrder, ContentType, DateRange
|
|
113
|
+
|
|
114
|
+
client = IlPostClient()
|
|
115
|
+
|
|
116
|
+
# Most recent articles in politics
|
|
117
|
+
result = client.search_articles(
|
|
118
|
+
"renzi",
|
|
119
|
+
sort=SortOrder.NEWEST,
|
|
120
|
+
category="politica",
|
|
121
|
+
date_range=DateRange.PAST_30_DAYS,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Podcast search
|
|
125
|
+
result = client.search_podcasts("cacao", sort=SortOrder.NEWEST)
|
|
126
|
+
|
|
127
|
+
# Paginate through all results, 5 per page
|
|
128
|
+
for page in client.paginate("sicilia", hits=5, max_pages=10):
|
|
129
|
+
print(f"Page {page.page}/{page.total_pages}")
|
|
130
|
+
for doc in page.docs:
|
|
131
|
+
print(f" [{doc.type}] {doc.title}")
|
|
132
|
+
|
|
133
|
+
# Access filter counts from a response
|
|
134
|
+
result = client.search("europa")
|
|
135
|
+
for group in result.filters:
|
|
136
|
+
print(f"{group.label}:")
|
|
137
|
+
for opt in group.options:
|
|
138
|
+
print(f" {opt.label}: {opt.doc_count}")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## CLI
|
|
142
|
+
|
|
143
|
+
A command-line interface is provided via `main.py`:
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
usage: ilpost-search [-h] [--type {articles,podcasts,newsletters}]
|
|
147
|
+
[--sort {relevance,newest,oldest}]
|
|
148
|
+
[--date {all,year,month}] [--category CATEGORY]
|
|
149
|
+
[--page PAGE] [--hits HITS] [--all-pages]
|
|
150
|
+
[--max-pages N]
|
|
151
|
+
query
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Basic search
|
|
156
|
+
python main.py berlusconi
|
|
157
|
+
|
|
158
|
+
# Most recent articles in politics
|
|
159
|
+
python main.py renzi --type articles --sort newest --category politica
|
|
160
|
+
|
|
161
|
+
# Podcast search, past 30 days
|
|
162
|
+
python main.py cacao --type podcasts --date month
|
|
163
|
+
|
|
164
|
+
# Page 2, 5 results per page, oldest first
|
|
165
|
+
python main.py sicilia --sort oldest --hits 5 --page 2
|
|
166
|
+
|
|
167
|
+
# Fetch all pages of newsletter results (up to 3 pages)
|
|
168
|
+
python main.py economia --type newsletters --all-pages --max-pages 3
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Notes
|
|
172
|
+
|
|
173
|
+
- Paywalled content (`subscriber: true`) is included in search results — title, summary, and highlight are visible, but the full article requires an active ilpost.it subscription.
|
|
174
|
+
- When sorting by date (`NEWEST` or `OLDEST`), `score` is always `0.0`.
|
|
175
|
+
- The `category` filter only applies to articles. It is ignored by the server when `content_type=PODCASTS`.
|
|
176
|
+
- Timestamps are in Italian local time (CET/CEST) with no UTC offset.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .client import IlPostClient
|
|
2
|
+
from .models import SearchResult, Document, FilterGroup, FilterOption, SortOrder, ContentType, DateRange
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"IlPostClient",
|
|
6
|
+
"SearchResult",
|
|
7
|
+
"Document",
|
|
8
|
+
"FilterGroup",
|
|
9
|
+
"FilterOption",
|
|
10
|
+
"SortOrder",
|
|
11
|
+
"ContentType",
|
|
12
|
+
"DateRange",
|
|
13
|
+
]
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
from urllib.parse import urlencode, quote
|
|
5
|
+
|
|
6
|
+
import urllib.request
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
from .models import SearchResult, SortOrder, ContentType, DateRange
|
|
10
|
+
|
|
11
|
+
_BASE_URL = "https://api.ilpost.org/search/api/site_search/"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _build_filters(
|
|
15
|
+
content_type: Optional[ContentType] = None,
|
|
16
|
+
category: Optional[str] = None,
|
|
17
|
+
date_range: Optional[DateRange] = None,
|
|
18
|
+
) -> str:
|
|
19
|
+
parts: list[str] = []
|
|
20
|
+
if content_type is not None:
|
|
21
|
+
parts.append(f"ctype:{content_type.value}")
|
|
22
|
+
if category is not None:
|
|
23
|
+
parts.append(f"category:{category}")
|
|
24
|
+
if date_range is not None:
|
|
25
|
+
parts.append(f"pub_date:{date_range.value}")
|
|
26
|
+
return ",".join(parts)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class IlPostClient:
|
|
30
|
+
"""Thin wrapper around the Il Post public search API.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
timeout:
|
|
35
|
+
HTTP request timeout in seconds (default: 10).
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, timeout: int = 10) -> None:
|
|
39
|
+
self.timeout = timeout
|
|
40
|
+
|
|
41
|
+
def _get(self, params: dict) -> dict:
|
|
42
|
+
qs = urlencode(params, quote_via=quote)
|
|
43
|
+
url = f"{_BASE_URL}?{qs}"
|
|
44
|
+
with urllib.request.urlopen(url, timeout=self.timeout) as response:
|
|
45
|
+
return json.loads(response.read().decode("utf-8"))
|
|
46
|
+
|
|
47
|
+
def search(
|
|
48
|
+
self,
|
|
49
|
+
query: str,
|
|
50
|
+
*,
|
|
51
|
+
page: int = 1,
|
|
52
|
+
hits: int = 10,
|
|
53
|
+
sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
|
|
54
|
+
content_type: Optional[ContentType] = None,
|
|
55
|
+
category: Optional[str] = None,
|
|
56
|
+
date_range: Optional[DateRange] = None,
|
|
57
|
+
filters: Optional[str] = None,
|
|
58
|
+
) -> SearchResult:
|
|
59
|
+
"""Search Il Post content.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
query:
|
|
64
|
+
Search term.
|
|
65
|
+
page:
|
|
66
|
+
1-based page number (default: 1).
|
|
67
|
+
hits:
|
|
68
|
+
Results per page (default: 10).
|
|
69
|
+
sort:
|
|
70
|
+
Sort order. ``SortOrder.RELEVANCE`` (default), ``SortOrder.NEWEST``,
|
|
71
|
+
or ``SortOrder.OLDEST``.
|
|
72
|
+
content_type:
|
|
73
|
+
Filter by content type: ``ContentType.ARTICLES``, ``ContentType.PODCASTS``,
|
|
74
|
+
or ``ContentType.NEWSLETTERS``.
|
|
75
|
+
category:
|
|
76
|
+
Filter articles by editorial category (e.g. ``"politica"``, ``"cultura"``).
|
|
77
|
+
Only meaningful when ``content_type=ContentType.ARTICLES`` or no content
|
|
78
|
+
type filter is set.
|
|
79
|
+
date_range:
|
|
80
|
+
Filter by publication date: ``DateRange.ALL_TIME``, ``DateRange.PAST_YEAR``,
|
|
81
|
+
or ``DateRange.PAST_30_DAYS``.
|
|
82
|
+
filters:
|
|
83
|
+
Raw pre-encoded filter string (e.g. ``"ctype:articoli,pub_date:ultimo_anno"``).
|
|
84
|
+
When provided, overrides ``content_type``, ``category``, and ``date_range``.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
SearchResult
|
|
89
|
+
"""
|
|
90
|
+
if filters is None:
|
|
91
|
+
filters = _build_filters(content_type, category, date_range)
|
|
92
|
+
|
|
93
|
+
sort_value = sort.value if isinstance(sort, SortOrder) else sort
|
|
94
|
+
|
|
95
|
+
params = {
|
|
96
|
+
"qs": query,
|
|
97
|
+
"pg": page,
|
|
98
|
+
"sort": sort_value,
|
|
99
|
+
"filters": filters,
|
|
100
|
+
"hits": hits,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
data = self._get(params)
|
|
104
|
+
return SearchResult.from_dict(data, page=page, query=query)
|
|
105
|
+
|
|
106
|
+
def search_articles(
|
|
107
|
+
self,
|
|
108
|
+
query: str,
|
|
109
|
+
*,
|
|
110
|
+
page: int = 1,
|
|
111
|
+
hits: int = 10,
|
|
112
|
+
sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
|
|
113
|
+
category: Optional[str] = None,
|
|
114
|
+
date_range: Optional[DateRange] = None,
|
|
115
|
+
) -> SearchResult:
|
|
116
|
+
"""Search articles only. Convenience wrapper around :meth:`search`."""
|
|
117
|
+
return self.search(
|
|
118
|
+
query,
|
|
119
|
+
page=page,
|
|
120
|
+
hits=hits,
|
|
121
|
+
sort=sort,
|
|
122
|
+
content_type=ContentType.ARTICLES,
|
|
123
|
+
category=category,
|
|
124
|
+
date_range=date_range,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def search_podcasts(
|
|
128
|
+
self,
|
|
129
|
+
query: str,
|
|
130
|
+
*,
|
|
131
|
+
page: int = 1,
|
|
132
|
+
hits: int = 10,
|
|
133
|
+
sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
|
|
134
|
+
date_range: Optional[DateRange] = None,
|
|
135
|
+
) -> SearchResult:
|
|
136
|
+
"""Search podcast episodes only. Convenience wrapper around :meth:`search`."""
|
|
137
|
+
return self.search(
|
|
138
|
+
query,
|
|
139
|
+
page=page,
|
|
140
|
+
hits=hits,
|
|
141
|
+
sort=sort,
|
|
142
|
+
content_type=ContentType.PODCASTS,
|
|
143
|
+
date_range=date_range,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def search_newsletters(
|
|
147
|
+
self,
|
|
148
|
+
query: str,
|
|
149
|
+
*,
|
|
150
|
+
page: int = 1,
|
|
151
|
+
hits: int = 10,
|
|
152
|
+
sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
|
|
153
|
+
date_range: Optional[DateRange] = None,
|
|
154
|
+
) -> SearchResult:
|
|
155
|
+
"""Search newsletter issues only. Convenience wrapper around :meth:`search`."""
|
|
156
|
+
return self.search(
|
|
157
|
+
query,
|
|
158
|
+
page=page,
|
|
159
|
+
hits=hits,
|
|
160
|
+
sort=sort,
|
|
161
|
+
content_type=ContentType.NEWSLETTERS,
|
|
162
|
+
date_range=date_range,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def paginate(
|
|
166
|
+
self,
|
|
167
|
+
query: str,
|
|
168
|
+
*,
|
|
169
|
+
hits: int = 10,
|
|
170
|
+
sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
|
|
171
|
+
content_type: Optional[ContentType] = None,
|
|
172
|
+
category: Optional[str] = None,
|
|
173
|
+
date_range: Optional[DateRange] = None,
|
|
174
|
+
max_pages: Optional[int] = None,
|
|
175
|
+
):
|
|
176
|
+
"""Iterate over all pages of search results, yielding one :class:`SearchResult`
|
|
177
|
+
per page.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
max_pages:
|
|
182
|
+
Stop after this many pages. ``None`` means iterate until exhausted.
|
|
183
|
+
|
|
184
|
+
Yields
|
|
185
|
+
------
|
|
186
|
+
SearchResult
|
|
187
|
+
"""
|
|
188
|
+
page = 1
|
|
189
|
+
while True:
|
|
190
|
+
result = self.search(
|
|
191
|
+
query,
|
|
192
|
+
page=page,
|
|
193
|
+
hits=hits,
|
|
194
|
+
sort=sort,
|
|
195
|
+
content_type=content_type,
|
|
196
|
+
category=category,
|
|
197
|
+
date_range=date_range,
|
|
198
|
+
)
|
|
199
|
+
yield result
|
|
200
|
+
if not result.has_next_page:
|
|
201
|
+
break
|
|
202
|
+
if max_pages is not None and page >= max_pages:
|
|
203
|
+
break
|
|
204
|
+
page += 1
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SortOrder(str, Enum):
|
|
9
|
+
RELEVANCE = "default"
|
|
10
|
+
NEWEST = "date_d"
|
|
11
|
+
OLDEST = "date_a"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ContentType(str, Enum):
|
|
15
|
+
ARTICLES = "articoli"
|
|
16
|
+
PODCASTS = "podcast"
|
|
17
|
+
NEWSLETTERS = "newsletter"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DateRange(str, Enum):
|
|
21
|
+
ALL_TIME = "da_sempre"
|
|
22
|
+
PAST_YEAR = "ultimo_anno"
|
|
23
|
+
PAST_30_DAYS = "ultimi_30_giorni"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class FilterOption:
|
|
28
|
+
key: str
|
|
29
|
+
label: str
|
|
30
|
+
doc_count: int
|
|
31
|
+
selected: bool
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_dict(cls, data: dict) -> FilterOption:
|
|
35
|
+
return cls(
|
|
36
|
+
key=data["key"],
|
|
37
|
+
label=data["label"],
|
|
38
|
+
doc_count=data["doc_count"],
|
|
39
|
+
selected=data["selected"],
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class FilterGroup:
|
|
45
|
+
name: str
|
|
46
|
+
label: str
|
|
47
|
+
multi: bool
|
|
48
|
+
options: list[FilterOption]
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_dict(cls, data: dict) -> FilterGroup:
|
|
52
|
+
return cls(
|
|
53
|
+
name=data["name"],
|
|
54
|
+
label=data["label"],
|
|
55
|
+
multi=data["multi"],
|
|
56
|
+
options=[FilterOption.from_dict(c) for c in data.get("contents", [])],
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class Document:
|
|
62
|
+
id: int
|
|
63
|
+
type: str
|
|
64
|
+
title: str
|
|
65
|
+
link: str
|
|
66
|
+
timestamp: str
|
|
67
|
+
summary: str
|
|
68
|
+
image: str
|
|
69
|
+
score: float
|
|
70
|
+
subscriber: bool
|
|
71
|
+
highlight: Optional[str] = None
|
|
72
|
+
category: Optional[str] = None
|
|
73
|
+
post_tag_text: list[str] = field(default_factory=list)
|
|
74
|
+
derived_info: dict = field(default_factory=dict)
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def from_dict(cls, data: dict) -> Document:
|
|
78
|
+
return cls(
|
|
79
|
+
id=data["id"],
|
|
80
|
+
type=data["type"],
|
|
81
|
+
title=data["title"],
|
|
82
|
+
link=data["link"],
|
|
83
|
+
timestamp=data["timestamp"],
|
|
84
|
+
summary=data.get("summary", ""),
|
|
85
|
+
image=data.get("image", ""),
|
|
86
|
+
score=data.get("score", 0.0),
|
|
87
|
+
subscriber=data.get("subscriber", False),
|
|
88
|
+
highlight=data.get("highlight", {}).get("content"),
|
|
89
|
+
category=data.get("category"),
|
|
90
|
+
post_tag_text=data.get("post_tag_text", []),
|
|
91
|
+
derived_info=data.get("derived_info", {}),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def is_article(self) -> bool:
|
|
96
|
+
return self.type == "post"
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def is_podcast(self) -> bool:
|
|
100
|
+
return self.type == "episodes"
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def is_newsletter(self) -> bool:
|
|
104
|
+
return self.type == "newsletter"
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def is_paywalled(self) -> bool:
|
|
108
|
+
return self.subscriber
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class SearchResult:
|
|
113
|
+
total: int
|
|
114
|
+
docs: list[Document]
|
|
115
|
+
filters: list[FilterGroup]
|
|
116
|
+
sort: str
|
|
117
|
+
hits: int
|
|
118
|
+
page: int
|
|
119
|
+
query: str
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def from_dict(cls, data: dict, page: int, query: str) -> SearchResult:
|
|
123
|
+
return cls(
|
|
124
|
+
total=data["total"],
|
|
125
|
+
docs=[Document.from_dict(d) for d in data.get("docs", [])],
|
|
126
|
+
filters=[FilterGroup.from_dict(f) for f in data.get("filters", [])],
|
|
127
|
+
sort=data.get("sort", "default"),
|
|
128
|
+
hits=data.get("hits", 10),
|
|
129
|
+
page=page,
|
|
130
|
+
query=query,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def total_pages(self) -> int:
|
|
135
|
+
if self.hits == 0:
|
|
136
|
+
return 0
|
|
137
|
+
return -(-self.total // self.hits) # ceiling division
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def has_next_page(self) -> bool:
|
|
141
|
+
return self.page < self.total_pages
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def has_prev_page(self) -> bool:
|
|
145
|
+
return self.page > 1
|
|
File without changes
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ilpost-api-wrapper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python wrapper for Il Post newspaper API
|
|
5
|
+
Author: Antonio Girasella
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/girasella/ilpost-api-wrapper
|
|
8
|
+
Project-URL: Repository, https://github.com/girasella/ilpost-api-wrapper
|
|
9
|
+
Project-URL: Issues, https://github.com/girasella/ilpost-api-wrapper/issues
|
|
10
|
+
Keywords: ilpost,api,news,italy,search
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# ilpost-api-wrapper
|
|
28
|
+
|
|
29
|
+
A Python wrapper for the [Il Post](https://www.ilpost.it) public search API.
|
|
30
|
+
Searches articles, podcast episodes, and newsletters — no authentication required.
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install ilpost-api-wrapper
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Requires Python 3.9+. No third-party dependencies.
|
|
39
|
+
|
|
40
|
+
## Quick start
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from ilpost import IlPostClient, SortOrder, ContentType, DateRange
|
|
44
|
+
|
|
45
|
+
client = IlPostClient()
|
|
46
|
+
|
|
47
|
+
result = client.search("berlusconi")
|
|
48
|
+
for doc in result.docs:
|
|
49
|
+
print(doc.title, doc.link)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## API reference
|
|
53
|
+
|
|
54
|
+
### `IlPostClient(timeout=10)`
|
|
55
|
+
|
|
56
|
+
| Method | Description |
|
|
57
|
+
|--------|-------------|
|
|
58
|
+
| `search(query, ...)` | General search across all content types |
|
|
59
|
+
| `search_articles(query, ...)` | Articles only |
|
|
60
|
+
| `search_podcasts(query, ...)` | Podcast episodes only |
|
|
61
|
+
| `search_newsletters(query, ...)` | Newsletter issues only |
|
|
62
|
+
| `paginate(query, ...)` | Generator that yields one `SearchResult` per page |
|
|
63
|
+
|
|
64
|
+
#### Common parameters
|
|
65
|
+
|
|
66
|
+
| Parameter | Type | Default | Description |
|
|
67
|
+
|-----------|------|---------|-------------|
|
|
68
|
+
| `query` | `str` | — | Search term |
|
|
69
|
+
| `page` | `int` | `1` | Page number (1-based) |
|
|
70
|
+
| `hits` | `int` | `10` | Results per page |
|
|
71
|
+
| `sort` | `SortOrder` | `RELEVANCE` | Sort order |
|
|
72
|
+
| `content_type` | `ContentType` | `None` | Filter by content type |
|
|
73
|
+
| `category` | `str` | `None` | Editorial category (articles only) |
|
|
74
|
+
| `date_range` | `DateRange` | `None` | Publication date filter |
|
|
75
|
+
|
|
76
|
+
#### Enums
|
|
77
|
+
|
|
78
|
+
**`SortOrder`**
|
|
79
|
+
| Value | Description |
|
|
80
|
+
|-------|-------------|
|
|
81
|
+
| `RELEVANCE` | Sort by relevance score (default) |
|
|
82
|
+
| `NEWEST` | Most recent first |
|
|
83
|
+
| `OLDEST` | Oldest first |
|
|
84
|
+
|
|
85
|
+
**`ContentType`**
|
|
86
|
+
| Value | Description |
|
|
87
|
+
|-------|-------------|
|
|
88
|
+
| `ARTICLES` | Articles and news posts |
|
|
89
|
+
| `PODCASTS` | Podcast episodes |
|
|
90
|
+
| `NEWSLETTERS` | Newsletter issues |
|
|
91
|
+
|
|
92
|
+
**`DateRange`**
|
|
93
|
+
| Value | Description |
|
|
94
|
+
|-------|-------------|
|
|
95
|
+
| `ALL_TIME` | Entire archive (default) |
|
|
96
|
+
| `PAST_YEAR` | Past 12 months |
|
|
97
|
+
| `PAST_30_DAYS` | Past 30 days |
|
|
98
|
+
|
|
99
|
+
### `SearchResult`
|
|
100
|
+
|
|
101
|
+
| Attribute | Type | Description |
|
|
102
|
+
|-----------|------|-------------|
|
|
103
|
+
| `total` | `int` | Total number of matching results |
|
|
104
|
+
| `docs` | `list[Document]` | Results for this page |
|
|
105
|
+
| `filters` | `list[FilterGroup]` | Available filters with counts |
|
|
106
|
+
| `sort` | `str` | Active sort value |
|
|
107
|
+
| `hits` | `int` | Page size |
|
|
108
|
+
| `page` | `int` | Current page number |
|
|
109
|
+
| `total_pages` | `int` | Total number of pages |
|
|
110
|
+
| `has_next_page` | `bool` | Whether a next page exists |
|
|
111
|
+
| `has_prev_page` | `bool` | Whether a previous page exists |
|
|
112
|
+
|
|
113
|
+
### `Document`
|
|
114
|
+
|
|
115
|
+
| Attribute | Type | Description |
|
|
116
|
+
|-----------|------|-------------|
|
|
117
|
+
| `id` | `int` | Unique content identifier |
|
|
118
|
+
| `type` | `str` | `"post"`, `"episodes"`, or `"newsletter"` |
|
|
119
|
+
| `title` | `str` | Content title |
|
|
120
|
+
| `link` | `str` | URL to the content page |
|
|
121
|
+
| `timestamp` | `str` | Publication date (ISO 8601, Italian local time) |
|
|
122
|
+
| `summary` | `str` | Short excerpt |
|
|
123
|
+
| `image` | `str` | Cover image URL |
|
|
124
|
+
| `score` | `float` | Relevance score (`0.0` when sorting by date) |
|
|
125
|
+
| `subscriber` | `bool` | `True` if content is paywalled |
|
|
126
|
+
| `highlight` | `str \| None` | Snippet with matched term in `<span>` tags |
|
|
127
|
+
| `category` | `str \| None` | Editorial category (articles only) |
|
|
128
|
+
| `post_tag_text` | `list[str]` | Tags (articles only) |
|
|
129
|
+
| `derived_info` | `dict` | Extra data: episode or newsletter metadata |
|
|
130
|
+
| `is_article` | `bool` | Convenience property |
|
|
131
|
+
| `is_podcast` | `bool` | Convenience property |
|
|
132
|
+
| `is_newsletter` | `bool` | Convenience property |
|
|
133
|
+
| `is_paywalled` | `bool` | Alias for `subscriber` |
|
|
134
|
+
|
|
135
|
+
## Examples
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from ilpost import IlPostClient, SortOrder, ContentType, DateRange
|
|
139
|
+
|
|
140
|
+
client = IlPostClient()
|
|
141
|
+
|
|
142
|
+
# Most recent articles in politics
|
|
143
|
+
result = client.search_articles(
|
|
144
|
+
"renzi",
|
|
145
|
+
sort=SortOrder.NEWEST,
|
|
146
|
+
category="politica",
|
|
147
|
+
date_range=DateRange.PAST_30_DAYS,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Podcast search
|
|
151
|
+
result = client.search_podcasts("cacao", sort=SortOrder.NEWEST)
|
|
152
|
+
|
|
153
|
+
# Paginate through all results, 5 per page
|
|
154
|
+
for page in client.paginate("sicilia", hits=5, max_pages=10):
|
|
155
|
+
print(f"Page {page.page}/{page.total_pages}")
|
|
156
|
+
for doc in page.docs:
|
|
157
|
+
print(f" [{doc.type}] {doc.title}")
|
|
158
|
+
|
|
159
|
+
# Access filter counts from a response
|
|
160
|
+
result = client.search("europa")
|
|
161
|
+
for group in result.filters:
|
|
162
|
+
print(f"{group.label}:")
|
|
163
|
+
for opt in group.options:
|
|
164
|
+
print(f" {opt.label}: {opt.doc_count}")
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## CLI
|
|
168
|
+
|
|
169
|
+
A command-line interface is provided via `main.py`:
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
usage: ilpost-search [-h] [--type {articles,podcasts,newsletters}]
|
|
173
|
+
[--sort {relevance,newest,oldest}]
|
|
174
|
+
[--date {all,year,month}] [--category CATEGORY]
|
|
175
|
+
[--page PAGE] [--hits HITS] [--all-pages]
|
|
176
|
+
[--max-pages N]
|
|
177
|
+
query
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
# Basic search
|
|
182
|
+
python main.py berlusconi
|
|
183
|
+
|
|
184
|
+
# Most recent articles in politics
|
|
185
|
+
python main.py renzi --type articles --sort newest --category politica
|
|
186
|
+
|
|
187
|
+
# Podcast search, past 30 days
|
|
188
|
+
python main.py cacao --type podcasts --date month
|
|
189
|
+
|
|
190
|
+
# Page 2, 5 results per page, oldest first
|
|
191
|
+
python main.py sicilia --sort oldest --hits 5 --page 2
|
|
192
|
+
|
|
193
|
+
# Fetch all pages of newsletter results (up to 3 pages)
|
|
194
|
+
python main.py economia --type newsletters --all-pages --max-pages 3
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Notes
|
|
198
|
+
|
|
199
|
+
- Paywalled content (`subscriber: true`) is included in search results — title, summary, and highlight are visible, but the full article requires an active ilpost.it subscription.
|
|
200
|
+
- When sorting by date (`NEWEST` or `OLDEST`), `score` is always `0.0`.
|
|
201
|
+
- The `category` filter only applies to articles. It is ignored by the server when `content_type=PODCASTS`.
|
|
202
|
+
- Timestamps are in Italian local time (CET/CEST) with no UTC offset.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
ilpost/__init__.py
|
|
5
|
+
ilpost/client.py
|
|
6
|
+
ilpost/models.py
|
|
7
|
+
ilpost/py.typed
|
|
8
|
+
ilpost_api_wrapper.egg-info/PKG-INFO
|
|
9
|
+
ilpost_api_wrapper.egg-info/SOURCES.txt
|
|
10
|
+
ilpost_api_wrapper.egg-info/dependency_links.txt
|
|
11
|
+
ilpost_api_wrapper.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "ilpost-api-wrapper"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python wrapper for Il Post newspaper API"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "Antonio Girasella" },
|
|
15
|
+
]
|
|
16
|
+
keywords = ["ilpost", "api", "news", "italy", "search"]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 3 - Alpha",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Programming Language :: Python :: 3.13",
|
|
27
|
+
"Topic :: Software Development :: Libraries",
|
|
28
|
+
"Typing :: Typed",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/girasella/ilpost-api-wrapper"
|
|
33
|
+
Repository = "https://github.com/girasella/ilpost-api-wrapper"
|
|
34
|
+
Issues = "https://github.com/girasella/ilpost-api-wrapper/issues"
|
|
35
|
+
|
|
36
|
+
[tool.setuptools.packages.find]
|
|
37
|
+
where = ["."]
|