ilpost-api-wrapper 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Antonio Girasella
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: ilpost-api-wrapper
3
+ Version: 0.1.0
4
+ Summary: Python wrapper for Il Post newspaper API
5
+ Author: Antonio Girasella
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/girasella/ilpost-api-wrapper
8
+ Project-URL: Repository, https://github.com/girasella/ilpost-api-wrapper
9
+ Project-URL: Issues, https://github.com/girasella/ilpost-api-wrapper/issues
10
+ Keywords: ilpost,api,news,italy,search
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Dynamic: license-file
26
+
27
+ # ilpost-api-wrapper
28
+
29
+ A Python wrapper for the [Il Post](https://www.ilpost.it) public search API.
30
+ Searches articles, podcast episodes, and newsletters — no authentication required.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install ilpost-api-wrapper
36
+ ```
37
+
38
+ Requires Python 3.9+. No third-party dependencies.
39
+
40
+ ## Quick start
41
+
42
+ ```python
43
+ from ilpost import IlPostClient, SortOrder, ContentType, DateRange
44
+
45
+ client = IlPostClient()
46
+
47
+ result = client.search("berlusconi")
48
+ for doc in result.docs:
49
+ print(doc.title, doc.link)
50
+ ```
51
+
52
+ ## API reference
53
+
54
+ ### `IlPostClient(timeout=10)`
55
+
56
+ | Method | Description |
57
+ |--------|-------------|
58
+ | `search(query, ...)` | General search across all content types |
59
+ | `search_articles(query, ...)` | Articles only |
60
+ | `search_podcasts(query, ...)` | Podcast episodes only |
61
+ | `search_newsletters(query, ...)` | Newsletter issues only |
62
+ | `paginate(query, ...)` | Generator that yields one `SearchResult` per page |
63
+
64
+ #### Common parameters
65
+
66
+ | Parameter | Type | Default | Description |
67
+ |-----------|------|---------|-------------|
68
+ | `query` | `str` | — | Search term |
69
+ | `page` | `int` | `1` | Page number (1-based) |
70
+ | `hits` | `int` | `10` | Results per page |
71
+ | `sort` | `SortOrder` | `RELEVANCE` | Sort order |
72
+ | `content_type` | `ContentType` | `None` | Filter by content type |
73
+ | `category` | `str` | `None` | Editorial category (articles only) |
74
+ | `date_range` | `DateRange` | `None` | Publication date filter |
75
+
76
+ #### Enums
77
+
78
+ **`SortOrder`**
79
+ | Value | Description |
80
+ |-------|-------------|
81
+ | `RELEVANCE` | Sort by relevance score (default) |
82
+ | `NEWEST` | Most recent first |
83
+ | `OLDEST` | Oldest first |
84
+
85
+ **`ContentType`**
86
+ | Value | Description |
87
+ |-------|-------------|
88
+ | `ARTICLES` | Articles and news posts |
89
+ | `PODCASTS` | Podcast episodes |
90
+ | `NEWSLETTERS` | Newsletter issues |
91
+
92
+ **`DateRange`**
93
+ | Value | Description |
94
+ |-------|-------------|
95
+ | `ALL_TIME` | Entire archive (default) |
96
+ | `PAST_YEAR` | Past 12 months |
97
+ | `PAST_30_DAYS` | Past 30 days |
98
+
99
+ ### `SearchResult`
100
+
101
+ | Attribute | Type | Description |
102
+ |-----------|------|-------------|
103
+ | `total` | `int` | Total number of matching results |
104
+ | `docs` | `list[Document]` | Results for this page |
105
+ | `filters` | `list[FilterGroup]` | Available filters with counts |
106
+ | `sort` | `str` | Active sort value |
107
+ | `hits` | `int` | Page size |
108
+ | `page` | `int` | Current page number |
109
+ | `total_pages` | `int` | Total number of pages |
110
+ | `has_next_page` | `bool` | Whether a next page exists |
111
+ | `has_prev_page` | `bool` | Whether a previous page exists |
112
+
113
+ ### `Document`
114
+
115
+ | Attribute | Type | Description |
116
+ |-----------|------|-------------|
117
+ | `id` | `int` | Unique content identifier |
118
+ | `type` | `str` | `"post"`, `"episodes"`, or `"newsletter"` |
119
+ | `title` | `str` | Content title |
120
+ | `link` | `str` | URL to the content page |
121
+ | `timestamp` | `str` | Publication date (ISO 8601, Italian local time) |
122
+ | `summary` | `str` | Short excerpt |
123
+ | `image` | `str` | Cover image URL |
124
+ | `score` | `float` | Relevance score (`0.0` when sorting by date) |
125
+ | `subscriber` | `bool` | `True` if content is paywalled |
126
+ | `highlight` | `str \| None` | Snippet with matched term in `<span>` tags |
127
+ | `category` | `str \| None` | Editorial category (articles only) |
128
+ | `post_tag_text` | `list[str]` | Tags (articles only) |
129
+ | `derived_info` | `dict` | Extra data: episode or newsletter metadata |
130
+ | `is_article` | `bool` | Convenience property |
131
+ | `is_podcast` | `bool` | Convenience property |
132
+ | `is_newsletter` | `bool` | Convenience property |
133
+ | `is_paywalled` | `bool` | Alias for `subscriber` |
134
+
135
+ ## Examples
136
+
137
+ ```python
138
+ from ilpost import IlPostClient, SortOrder, ContentType, DateRange
139
+
140
+ client = IlPostClient()
141
+
142
+ # Most recent articles in politics
143
+ result = client.search_articles(
144
+ "renzi",
145
+ sort=SortOrder.NEWEST,
146
+ category="politica",
147
+ date_range=DateRange.PAST_30_DAYS,
148
+ )
149
+
150
+ # Podcast search
151
+ result = client.search_podcasts("cacao", sort=SortOrder.NEWEST)
152
+
153
+ # Paginate through all results, 5 per page
154
+ for page in client.paginate("sicilia", hits=5, max_pages=10):
155
+ print(f"Page {page.page}/{page.total_pages}")
156
+ for doc in page.docs:
157
+ print(f" [{doc.type}] {doc.title}")
158
+
159
+ # Access filter counts from a response
160
+ result = client.search("europa")
161
+ for group in result.filters:
162
+ print(f"{group.label}:")
163
+ for opt in group.options:
164
+ print(f" {opt.label}: {opt.doc_count}")
165
+ ```
166
+
167
+ ## CLI
168
+
169
+ A command-line interface is provided via `main.py`:
170
+
171
+ ```
172
+ usage: ilpost-search [-h] [--type {articles,podcasts,newsletters}]
173
+ [--sort {relevance,newest,oldest}]
174
+ [--date {all,year,month}] [--category CATEGORY]
175
+ [--page PAGE] [--hits HITS] [--all-pages]
176
+ [--max-pages N]
177
+ query
178
+ ```
179
+
180
+ ```bash
181
+ # Basic search
182
+ python main.py berlusconi
183
+
184
+ # Most recent articles in politics
185
+ python main.py renzi --type articles --sort newest --category politica
186
+
187
+ # Podcast search, past 30 days
188
+ python main.py cacao --type podcasts --date month
189
+
190
+ # Page 2, 5 results per page, oldest first
191
+ python main.py sicilia --sort oldest --hits 5 --page 2
192
+
193
+ # Fetch all pages of newsletter results (up to 3 pages)
194
+ python main.py economia --type newsletters --all-pages --max-pages 3
195
+ ```
196
+
197
+ ## Notes
198
+
199
+ - Paywalled content (`subscriber: true`) is included in search results — title, summary, and highlight are visible, but the full article requires an active ilpost.it subscription.
200
+ - When sorting by date (`NEWEST` or `OLDEST`), `score` is always `0.0`.
201
+ - The `category` filter only applies to articles. It is ignored by the server when `content_type=PODCASTS`.
202
+ - Timestamps are in Italian local time (CET/CEST) with no UTC offset.
@@ -0,0 +1,176 @@
1
+ # ilpost-api-wrapper
2
+
3
+ A Python wrapper for the [Il Post](https://www.ilpost.it) public search API.
4
+ Searches articles, podcast episodes, and newsletters — no authentication required.
5
+
6
+ ## Installation
7
+
8
+ ```bash
9
+ pip install ilpost-api-wrapper
10
+ ```
11
+
12
+ Requires Python 3.9+. No third-party dependencies.
13
+
14
+ ## Quick start
15
+
16
+ ```python
17
+ from ilpost import IlPostClient, SortOrder, ContentType, DateRange
18
+
19
+ client = IlPostClient()
20
+
21
+ result = client.search("berlusconi")
22
+ for doc in result.docs:
23
+ print(doc.title, doc.link)
24
+ ```
25
+
26
+ ## API reference
27
+
28
+ ### `IlPostClient(timeout=10)`
29
+
30
+ | Method | Description |
31
+ |--------|-------------|
32
+ | `search(query, ...)` | General search across all content types |
33
+ | `search_articles(query, ...)` | Articles only |
34
+ | `search_podcasts(query, ...)` | Podcast episodes only |
35
+ | `search_newsletters(query, ...)` | Newsletter issues only |
36
+ | `paginate(query, ...)` | Generator that yields one `SearchResult` per page |
37
+
38
+ #### Common parameters
39
+
40
+ | Parameter | Type | Default | Description |
41
+ |-----------|------|---------|-------------|
42
+ | `query` | `str` | — | Search term |
43
+ | `page` | `int` | `1` | Page number (1-based) |
44
+ | `hits` | `int` | `10` | Results per page |
45
+ | `sort` | `SortOrder` | `RELEVANCE` | Sort order |
46
+ | `content_type` | `ContentType` | `None` | Filter by content type |
47
+ | `category` | `str` | `None` | Editorial category (articles only) |
48
+ | `date_range` | `DateRange` | `None` | Publication date filter |
49
+
50
+ #### Enums
51
+
52
+ **`SortOrder`**
53
+ | Value | Description |
54
+ |-------|-------------|
55
+ | `RELEVANCE` | Sort by relevance score (default) |
56
+ | `NEWEST` | Most recent first |
57
+ | `OLDEST` | Oldest first |
58
+
59
+ **`ContentType`**
60
+ | Value | Description |
61
+ |-------|-------------|
62
+ | `ARTICLES` | Articles and news posts |
63
+ | `PODCASTS` | Podcast episodes |
64
+ | `NEWSLETTERS` | Newsletter issues |
65
+
66
+ **`DateRange`**
67
+ | Value | Description |
68
+ |-------|-------------|
69
+ | `ALL_TIME` | Entire archive (default) |
70
+ | `PAST_YEAR` | Past 12 months |
71
+ | `PAST_30_DAYS` | Past 30 days |
72
+
73
+ ### `SearchResult`
74
+
75
+ | Attribute | Type | Description |
76
+ |-----------|------|-------------|
77
+ | `total` | `int` | Total number of matching results |
78
+ | `docs` | `list[Document]` | Results for this page |
79
+ | `filters` | `list[FilterGroup]` | Available filters with counts |
80
+ | `sort` | `str` | Active sort value |
81
+ | `hits` | `int` | Page size |
82
+ | `page` | `int` | Current page number |
83
+ | `total_pages` | `int` | Total number of pages |
84
+ | `has_next_page` | `bool` | Whether a next page exists |
85
+ | `has_prev_page` | `bool` | Whether a previous page exists |
86
+
87
+ ### `Document`
88
+
89
+ | Attribute | Type | Description |
90
+ |-----------|------|-------------|
91
+ | `id` | `int` | Unique content identifier |
92
+ | `type` | `str` | `"post"`, `"episodes"`, or `"newsletter"` |
93
+ | `title` | `str` | Content title |
94
+ | `link` | `str` | URL to the content page |
95
+ | `timestamp` | `str` | Publication date (ISO 8601, Italian local time) |
96
+ | `summary` | `str` | Short excerpt |
97
+ | `image` | `str` | Cover image URL |
98
+ | `score` | `float` | Relevance score (`0.0` when sorting by date) |
99
+ | `subscriber` | `bool` | `True` if content is paywalled |
100
+ | `highlight` | `str \| None` | Snippet with matched term in `<span>` tags |
101
+ | `category` | `str \| None` | Editorial category (articles only) |
102
+ | `post_tag_text` | `list[str]` | Tags (articles only) |
103
+ | `derived_info` | `dict` | Extra data: episode or newsletter metadata |
104
+ | `is_article` | `bool` | Convenience property |
105
+ | `is_podcast` | `bool` | Convenience property |
106
+ | `is_newsletter` | `bool` | Convenience property |
107
+ | `is_paywalled` | `bool` | Alias for `subscriber` |
108
+
109
+ ## Examples
110
+
111
+ ```python
112
+ from ilpost import IlPostClient, SortOrder, ContentType, DateRange
113
+
114
+ client = IlPostClient()
115
+
116
+ # Most recent articles in politics
117
+ result = client.search_articles(
118
+ "renzi",
119
+ sort=SortOrder.NEWEST,
120
+ category="politica",
121
+ date_range=DateRange.PAST_30_DAYS,
122
+ )
123
+
124
+ # Podcast search
125
+ result = client.search_podcasts("cacao", sort=SortOrder.NEWEST)
126
+
127
+ # Paginate through all results, 5 per page
128
+ for page in client.paginate("sicilia", hits=5, max_pages=10):
129
+ print(f"Page {page.page}/{page.total_pages}")
130
+ for doc in page.docs:
131
+ print(f" [{doc.type}] {doc.title}")
132
+
133
+ # Access filter counts from a response
134
+ result = client.search("europa")
135
+ for group in result.filters:
136
+ print(f"{group.label}:")
137
+ for opt in group.options:
138
+ print(f" {opt.label}: {opt.doc_count}")
139
+ ```
140
+
141
+ ## CLI
142
+
143
+ A command-line interface is provided via `main.py`:
144
+
145
+ ```
146
+ usage: ilpost-search [-h] [--type {articles,podcasts,newsletters}]
147
+ [--sort {relevance,newest,oldest}]
148
+ [--date {all,year,month}] [--category CATEGORY]
149
+ [--page PAGE] [--hits HITS] [--all-pages]
150
+ [--max-pages N]
151
+ query
152
+ ```
153
+
154
+ ```bash
155
+ # Basic search
156
+ python main.py berlusconi
157
+
158
+ # Most recent articles in politics
159
+ python main.py renzi --type articles --sort newest --category politica
160
+
161
+ # Podcast search, past 30 days
162
+ python main.py cacao --type podcasts --date month
163
+
164
+ # Page 2, 5 results per page, oldest first
165
+ python main.py sicilia --sort oldest --hits 5 --page 2
166
+
167
+ # Fetch all pages of newsletter results (up to 3 pages)
168
+ python main.py economia --type newsletters --all-pages --max-pages 3
169
+ ```
170
+
171
+ ## Notes
172
+
173
+ - Paywalled content (`subscriber: true`) is included in search results — title, summary, and highlight are visible, but the full article requires an active ilpost.it subscription.
174
+ - When sorting by date (`NEWEST` or `OLDEST`), `score` is always `0.0`.
175
+ - The `category` filter only applies to articles. It is ignored by the server when `content_type=PODCASTS`.
176
+ - Timestamps are in Italian local time (CET/CEST) with no UTC offset.
@@ -0,0 +1,13 @@
1
+ from .client import IlPostClient
2
+ from .models import SearchResult, Document, FilterGroup, FilterOption, SortOrder, ContentType, DateRange
3
+
4
+ __all__ = [
5
+ "IlPostClient",
6
+ "SearchResult",
7
+ "Document",
8
+ "FilterGroup",
9
+ "FilterOption",
10
+ "SortOrder",
11
+ "ContentType",
12
+ "DateRange",
13
+ ]
@@ -0,0 +1,204 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Union
4
+ from urllib.parse import urlencode, quote
5
+
6
+ import urllib.request
7
+ import json
8
+
9
+ from .models import SearchResult, SortOrder, ContentType, DateRange
10
+
11
+ _BASE_URL = "https://api.ilpost.org/search/api/site_search/"
12
+
13
+
14
+ def _build_filters(
15
+ content_type: Optional[ContentType] = None,
16
+ category: Optional[str] = None,
17
+ date_range: Optional[DateRange] = None,
18
+ ) -> str:
19
+ parts: list[str] = []
20
+ if content_type is not None:
21
+ parts.append(f"ctype:{content_type.value}")
22
+ if category is not None:
23
+ parts.append(f"category:{category}")
24
+ if date_range is not None:
25
+ parts.append(f"pub_date:{date_range.value}")
26
+ return ",".join(parts)
27
+
28
+
29
+ class IlPostClient:
30
+ """Thin wrapper around the Il Post public search API.
31
+
32
+ Parameters
33
+ ----------
34
+ timeout:
35
+ HTTP request timeout in seconds (default: 10).
36
+ """
37
+
38
+ def __init__(self, timeout: int = 10) -> None:
39
+ self.timeout = timeout
40
+
41
+ def _get(self, params: dict) -> dict:
42
+ qs = urlencode(params, quote_via=quote)
43
+ url = f"{_BASE_URL}?{qs}"
44
+ with urllib.request.urlopen(url, timeout=self.timeout) as response:
45
+ return json.loads(response.read().decode("utf-8"))
46
+
47
+ def search(
48
+ self,
49
+ query: str,
50
+ *,
51
+ page: int = 1,
52
+ hits: int = 10,
53
+ sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
54
+ content_type: Optional[ContentType] = None,
55
+ category: Optional[str] = None,
56
+ date_range: Optional[DateRange] = None,
57
+ filters: Optional[str] = None,
58
+ ) -> SearchResult:
59
+ """Search Il Post content.
60
+
61
+ Parameters
62
+ ----------
63
+ query:
64
+ Search term.
65
+ page:
66
+ 1-based page number (default: 1).
67
+ hits:
68
+ Results per page (default: 10).
69
+ sort:
70
+ Sort order. ``SortOrder.RELEVANCE`` (default), ``SortOrder.NEWEST``,
71
+ or ``SortOrder.OLDEST``.
72
+ content_type:
73
+ Filter by content type: ``ContentType.ARTICLES``, ``ContentType.PODCASTS``,
74
+ or ``ContentType.NEWSLETTERS``.
75
+ category:
76
+ Filter articles by editorial category (e.g. ``"politica"``, ``"cultura"``).
77
+ Only meaningful when ``content_type=ContentType.ARTICLES`` or no content
78
+ type filter is set.
79
+ date_range:
80
+ Filter by publication date: ``DateRange.ALL_TIME``, ``DateRange.PAST_YEAR``,
81
+ or ``DateRange.PAST_30_DAYS``.
82
+ filters:
83
+ Raw pre-encoded filter string (e.g. ``"ctype:articoli,pub_date:ultimo_anno"``).
84
+ When provided, overrides ``content_type``, ``category``, and ``date_range``.
85
+
86
+ Returns
87
+ -------
88
+ SearchResult
89
+ """
90
+ if filters is None:
91
+ filters = _build_filters(content_type, category, date_range)
92
+
93
+ sort_value = sort.value if isinstance(sort, SortOrder) else sort
94
+
95
+ params = {
96
+ "qs": query,
97
+ "pg": page,
98
+ "sort": sort_value,
99
+ "filters": filters,
100
+ "hits": hits,
101
+ }
102
+
103
+ data = self._get(params)
104
+ return SearchResult.from_dict(data, page=page, query=query)
105
+
106
+ def search_articles(
107
+ self,
108
+ query: str,
109
+ *,
110
+ page: int = 1,
111
+ hits: int = 10,
112
+ sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
113
+ category: Optional[str] = None,
114
+ date_range: Optional[DateRange] = None,
115
+ ) -> SearchResult:
116
+ """Search articles only. Convenience wrapper around :meth:`search`."""
117
+ return self.search(
118
+ query,
119
+ page=page,
120
+ hits=hits,
121
+ sort=sort,
122
+ content_type=ContentType.ARTICLES,
123
+ category=category,
124
+ date_range=date_range,
125
+ )
126
+
127
+ def search_podcasts(
128
+ self,
129
+ query: str,
130
+ *,
131
+ page: int = 1,
132
+ hits: int = 10,
133
+ sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
134
+ date_range: Optional[DateRange] = None,
135
+ ) -> SearchResult:
136
+ """Search podcast episodes only. Convenience wrapper around :meth:`search`."""
137
+ return self.search(
138
+ query,
139
+ page=page,
140
+ hits=hits,
141
+ sort=sort,
142
+ content_type=ContentType.PODCASTS,
143
+ date_range=date_range,
144
+ )
145
+
146
+ def search_newsletters(
147
+ self,
148
+ query: str,
149
+ *,
150
+ page: int = 1,
151
+ hits: int = 10,
152
+ sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
153
+ date_range: Optional[DateRange] = None,
154
+ ) -> SearchResult:
155
+ """Search newsletter issues only. Convenience wrapper around :meth:`search`."""
156
+ return self.search(
157
+ query,
158
+ page=page,
159
+ hits=hits,
160
+ sort=sort,
161
+ content_type=ContentType.NEWSLETTERS,
162
+ date_range=date_range,
163
+ )
164
+
165
+ def paginate(
166
+ self,
167
+ query: str,
168
+ *,
169
+ hits: int = 10,
170
+ sort: Union[SortOrder, str] = SortOrder.RELEVANCE,
171
+ content_type: Optional[ContentType] = None,
172
+ category: Optional[str] = None,
173
+ date_range: Optional[DateRange] = None,
174
+ max_pages: Optional[int] = None,
175
+ ):
176
+ """Iterate over all pages of search results, yielding one :class:`SearchResult`
177
+ per page.
178
+
179
+ Parameters
180
+ ----------
181
+ max_pages:
182
+ Stop after this many pages. ``None`` means iterate until exhausted.
183
+
184
+ Yields
185
+ ------
186
+ SearchResult
187
+ """
188
+ page = 1
189
+ while True:
190
+ result = self.search(
191
+ query,
192
+ page=page,
193
+ hits=hits,
194
+ sort=sort,
195
+ content_type=content_type,
196
+ category=category,
197
+ date_range=date_range,
198
+ )
199
+ yield result
200
+ if not result.has_next_page:
201
+ break
202
+ if max_pages is not None and page >= max_pages:
203
+ break
204
+ page += 1
@@ -0,0 +1,145 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+ from typing import Optional
6
+
7
+
8
+ class SortOrder(str, Enum):
9
+ RELEVANCE = "default"
10
+ NEWEST = "date_d"
11
+ OLDEST = "date_a"
12
+
13
+
14
+ class ContentType(str, Enum):
15
+ ARTICLES = "articoli"
16
+ PODCASTS = "podcast"
17
+ NEWSLETTERS = "newsletter"
18
+
19
+
20
+ class DateRange(str, Enum):
21
+ ALL_TIME = "da_sempre"
22
+ PAST_YEAR = "ultimo_anno"
23
+ PAST_30_DAYS = "ultimi_30_giorni"
24
+
25
+
26
+ @dataclass
27
+ class FilterOption:
28
+ key: str
29
+ label: str
30
+ doc_count: int
31
+ selected: bool
32
+
33
+ @classmethod
34
+ def from_dict(cls, data: dict) -> FilterOption:
35
+ return cls(
36
+ key=data["key"],
37
+ label=data["label"],
38
+ doc_count=data["doc_count"],
39
+ selected=data["selected"],
40
+ )
41
+
42
+
43
+ @dataclass
44
+ class FilterGroup:
45
+ name: str
46
+ label: str
47
+ multi: bool
48
+ options: list[FilterOption]
49
+
50
+ @classmethod
51
+ def from_dict(cls, data: dict) -> FilterGroup:
52
+ return cls(
53
+ name=data["name"],
54
+ label=data["label"],
55
+ multi=data["multi"],
56
+ options=[FilterOption.from_dict(c) for c in data.get("contents", [])],
57
+ )
58
+
59
+
60
+ @dataclass
61
+ class Document:
62
+ id: int
63
+ type: str
64
+ title: str
65
+ link: str
66
+ timestamp: str
67
+ summary: str
68
+ image: str
69
+ score: float
70
+ subscriber: bool
71
+ highlight: Optional[str] = None
72
+ category: Optional[str] = None
73
+ post_tag_text: list[str] = field(default_factory=list)
74
+ derived_info: dict = field(default_factory=dict)
75
+
76
+ @classmethod
77
+ def from_dict(cls, data: dict) -> Document:
78
+ return cls(
79
+ id=data["id"],
80
+ type=data["type"],
81
+ title=data["title"],
82
+ link=data["link"],
83
+ timestamp=data["timestamp"],
84
+ summary=data.get("summary", ""),
85
+ image=data.get("image", ""),
86
+ score=data.get("score", 0.0),
87
+ subscriber=data.get("subscriber", False),
88
+ highlight=data.get("highlight", {}).get("content"),
89
+ category=data.get("category"),
90
+ post_tag_text=data.get("post_tag_text", []),
91
+ derived_info=data.get("derived_info", {}),
92
+ )
93
+
94
+ @property
95
+ def is_article(self) -> bool:
96
+ return self.type == "post"
97
+
98
+ @property
99
+ def is_podcast(self) -> bool:
100
+ return self.type == "episodes"
101
+
102
+ @property
103
+ def is_newsletter(self) -> bool:
104
+ return self.type == "newsletter"
105
+
106
+ @property
107
+ def is_paywalled(self) -> bool:
108
+ return self.subscriber
109
+
110
+
111
+ @dataclass
112
+ class SearchResult:
113
+ total: int
114
+ docs: list[Document]
115
+ filters: list[FilterGroup]
116
+ sort: str
117
+ hits: int
118
+ page: int
119
+ query: str
120
+
121
+ @classmethod
122
+ def from_dict(cls, data: dict, page: int, query: str) -> SearchResult:
123
+ return cls(
124
+ total=data["total"],
125
+ docs=[Document.from_dict(d) for d in data.get("docs", [])],
126
+ filters=[FilterGroup.from_dict(f) for f in data.get("filters", [])],
127
+ sort=data.get("sort", "default"),
128
+ hits=data.get("hits", 10),
129
+ page=page,
130
+ query=query,
131
+ )
132
+
133
+ @property
134
+ def total_pages(self) -> int:
135
+ if self.hits == 0:
136
+ return 0
137
+ return -(-self.total // self.hits) # ceiling division
138
+
139
+ @property
140
+ def has_next_page(self) -> bool:
141
+ return self.page < self.total_pages
142
+
143
+ @property
144
+ def has_prev_page(self) -> bool:
145
+ return self.page > 1
File without changes
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: ilpost-api-wrapper
3
+ Version: 0.1.0
4
+ Summary: Python wrapper for Il Post newspaper API
5
+ Author: Antonio Girasella
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/girasella/ilpost-api-wrapper
8
+ Project-URL: Repository, https://github.com/girasella/ilpost-api-wrapper
9
+ Project-URL: Issues, https://github.com/girasella/ilpost-api-wrapper/issues
10
+ Keywords: ilpost,api,news,italy,search
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Dynamic: license-file
26
+
27
+ # ilpost-api-wrapper
28
+
29
+ A Python wrapper for the [Il Post](https://www.ilpost.it) public search API.
30
+ Searches articles, podcast episodes, and newsletters — no authentication required.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install ilpost-api-wrapper
36
+ ```
37
+
38
+ Requires Python 3.9+. No third-party dependencies.
39
+
40
+ ## Quick start
41
+
42
+ ```python
43
+ from ilpost import IlPostClient, SortOrder, ContentType, DateRange
44
+
45
+ client = IlPostClient()
46
+
47
+ result = client.search("berlusconi")
48
+ for doc in result.docs:
49
+ print(doc.title, doc.link)
50
+ ```
51
+
52
+ ## API reference
53
+
54
+ ### `IlPostClient(timeout=10)`
55
+
56
+ | Method | Description |
57
+ |--------|-------------|
58
+ | `search(query, ...)` | General search across all content types |
59
+ | `search_articles(query, ...)` | Articles only |
60
+ | `search_podcasts(query, ...)` | Podcast episodes only |
61
+ | `search_newsletters(query, ...)` | Newsletter issues only |
62
+ | `paginate(query, ...)` | Generator that yields one `SearchResult` per page |
63
+
64
+ #### Common parameters
65
+
66
+ | Parameter | Type | Default | Description |
67
+ |-----------|------|---------|-------------|
68
+ | `query` | `str` | — | Search term |
69
+ | `page` | `int` | `1` | Page number (1-based) |
70
+ | `hits` | `int` | `10` | Results per page |
71
+ | `sort` | `SortOrder` | `RELEVANCE` | Sort order |
72
+ | `content_type` | `ContentType` | `None` | Filter by content type |
73
+ | `category` | `str` | `None` | Editorial category (articles only) |
74
+ | `date_range` | `DateRange` | `None` | Publication date filter |
75
+
76
+ #### Enums
77
+
78
+ **`SortOrder`**
79
+ | Value | Description |
80
+ |-------|-------------|
81
+ | `RELEVANCE` | Sort by relevance score (default) |
82
+ | `NEWEST` | Most recent first |
83
+ | `OLDEST` | Oldest first |
84
+
85
+ **`ContentType`**
86
+ | Value | Description |
87
+ |-------|-------------|
88
+ | `ARTICLES` | Articles and news posts |
89
+ | `PODCASTS` | Podcast episodes |
90
+ | `NEWSLETTERS` | Newsletter issues |
91
+
92
+ **`DateRange`**
93
+ | Value | Description |
94
+ |-------|-------------|
95
+ | `ALL_TIME` | Entire archive (default) |
96
+ | `PAST_YEAR` | Past 12 months |
97
+ | `PAST_30_DAYS` | Past 30 days |
98
+
99
+ ### `SearchResult`
100
+
101
+ | Attribute | Type | Description |
102
+ |-----------|------|-------------|
103
+ | `total` | `int` | Total number of matching results |
104
+ | `docs` | `list[Document]` | Results for this page |
105
+ | `filters` | `list[FilterGroup]` | Available filters with counts |
106
+ | `sort` | `str` | Active sort value |
107
+ | `hits` | `int` | Page size |
108
+ | `page` | `int` | Current page number |
109
+ | `total_pages` | `int` | Total number of pages |
110
+ | `has_next_page` | `bool` | Whether a next page exists |
111
+ | `has_prev_page` | `bool` | Whether a previous page exists |
112
+
113
+ ### `Document`
114
+
115
+ | Attribute | Type | Description |
116
+ |-----------|------|-------------|
117
+ | `id` | `int` | Unique content identifier |
118
+ | `type` | `str` | `"post"`, `"episodes"`, or `"newsletter"` |
119
+ | `title` | `str` | Content title |
120
+ | `link` | `str` | URL to the content page |
121
+ | `timestamp` | `str` | Publication date (ISO 8601, Italian local time) |
122
+ | `summary` | `str` | Short excerpt |
123
+ | `image` | `str` | Cover image URL |
124
+ | `score` | `float` | Relevance score (`0.0` when sorting by date) |
125
+ | `subscriber` | `bool` | `True` if content is paywalled |
126
+ | `highlight` | `str \| None` | Snippet with matched term in `<span>` tags |
127
+ | `category` | `str \| None` | Editorial category (articles only) |
128
+ | `post_tag_text` | `list[str]` | Tags (articles only) |
129
+ | `derived_info` | `dict` | Extra data: episode or newsletter metadata |
130
+ | `is_article` | `bool` | Convenience property |
131
+ | `is_podcast` | `bool` | Convenience property |
132
+ | `is_newsletter` | `bool` | Convenience property |
133
+ | `is_paywalled` | `bool` | Alias for `subscriber` |
134
+
135
+ ## Examples
136
+
137
+ ```python
138
+ from ilpost import IlPostClient, SortOrder, ContentType, DateRange
139
+
140
+ client = IlPostClient()
141
+
142
+ # Most recent articles in politics
143
+ result = client.search_articles(
144
+ "renzi",
145
+ sort=SortOrder.NEWEST,
146
+ category="politica",
147
+ date_range=DateRange.PAST_30_DAYS,
148
+ )
149
+
150
+ # Podcast search
151
+ result = client.search_podcasts("cacao", sort=SortOrder.NEWEST)
152
+
153
+ # Paginate through all results, 5 per page
154
+ for page in client.paginate("sicilia", hits=5, max_pages=10):
155
+ print(f"Page {page.page}/{page.total_pages}")
156
+ for doc in page.docs:
157
+ print(f" [{doc.type}] {doc.title}")
158
+
159
+ # Access filter counts from a response
160
+ result = client.search("europa")
161
+ for group in result.filters:
162
+ print(f"{group.label}:")
163
+ for opt in group.options:
164
+ print(f" {opt.label}: {opt.doc_count}")
165
+ ```
166
+
167
+ ## CLI
168
+
169
+ A command-line interface is provided via `main.py`:
170
+
171
+ ```
172
+ usage: ilpost-search [-h] [--type {articles,podcasts,newsletters}]
173
+ [--sort {relevance,newest,oldest}]
174
+ [--date {all,year,month}] [--category CATEGORY]
175
+ [--page PAGE] [--hits HITS] [--all-pages]
176
+ [--max-pages N]
177
+ query
178
+ ```
179
+
180
+ ```bash
181
+ # Basic search
182
+ python main.py berlusconi
183
+
184
+ # Most recent articles in politics
185
+ python main.py renzi --type articles --sort newest --category politica
186
+
187
+ # Podcast search, past 30 days
188
+ python main.py cacao --type podcasts --date month
189
+
190
+ # Page 2, 5 results per page, oldest first
191
+ python main.py sicilia --sort oldest --hits 5 --page 2
192
+
193
+ # Fetch all pages of newsletter results (up to 3 pages)
194
+ python main.py economia --type newsletters --all-pages --max-pages 3
195
+ ```
196
+
197
+ ## Notes
198
+
199
+ - Paywalled content (`subscriber: true`) is included in search results — title, summary, and highlight are visible, but the full article requires an active ilpost.it subscription.
200
+ - When sorting by date (`NEWEST` or `OLDEST`), `score` is always `0.0`.
201
+ - The `category` filter only applies to articles. It is ignored by the server when `content_type=PODCASTS`.
202
+ - Timestamps are in Italian local time (CET/CEST) with no UTC offset.
@@ -0,0 +1,11 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ ilpost/__init__.py
5
+ ilpost/client.py
6
+ ilpost/models.py
7
+ ilpost/py.typed
8
+ ilpost_api_wrapper.egg-info/PKG-INFO
9
+ ilpost_api_wrapper.egg-info/SOURCES.txt
10
+ ilpost_api_wrapper.egg-info/dependency_links.txt
11
+ ilpost_api_wrapper.egg-info/top_level.txt
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ilpost-api-wrapper"
7
+ version = "0.1.0"
8
+ description = "Python wrapper for Il Post newspaper API"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [
14
+ { name = "Antonio Girasella" },
15
+ ]
16
+ keywords = ["ilpost", "api", "news", "italy", "search"]
17
+ classifiers = [
18
+ "Development Status :: 3 - Alpha",
19
+ "Intended Audience :: Developers",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
27
+ "Topic :: Software Development :: Libraries",
28
+ "Typing :: Typed",
29
+ ]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/girasella/ilpost-api-wrapper"
33
+ Repository = "https://github.com/girasella/ilpost-api-wrapper"
34
+ Issues = "https://github.com/girasella/ilpost-api-wrapper/issues"
35
+
36
+ [tool.setuptools.packages.find]
37
+ where = ["."]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+