pageviewapi 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ """Python client for the Wikimedia Pageview API."""
2
+
3
+ from pageviewapi.client import (
4
+ PageviewResponse,
5
+ ThrottlingException,
6
+ ZeroOrDataNotLoadedException,
7
+ __version__,
8
+ aggregate,
9
+ legacy_pagecounts,
10
+ per_article,
11
+ top,
12
+ unique_devices,
13
+ )
14
+
15
+ __all__ = [
16
+ "__version__",
17
+ "aggregate",
18
+ "PageviewResponse",
19
+ "legacy_pagecounts",
20
+ "per_article",
21
+ "ThrottlingException",
22
+ "top",
23
+ "unique_devices",
24
+ "ZeroOrDataNotLoadedException",
25
+ ]
pageviewapi/client.py ADDED
@@ -0,0 +1,206 @@
1
+ """Client to wikimedia pageview api.
2
+
3
+ API doc: https://wikitech.wikimedia.org/wiki/Analytics/AQS/Pageview_API
4
+ Supported endpoints:
5
+ - per-article
6
+ - top
7
+ - aggregate
8
+ - unique-devices
9
+ - legacy/pagecounts
10
+ """
11
+
12
+ from importlib.metadata import PackageNotFoundError, version
13
+ from typing import Any
14
+ from urllib.parse import quote, unquote
15
+
16
+ import requests
17
+
18
+ try:
19
+ __version__ = version("pageviewapi")
20
+ except PackageNotFoundError:
21
+ __version__ = "0.4.0"
22
+
23
+ PROJECT_URL = "https://github.com/Commonists/pageview-api"
24
+ USER_AGENT = {"User-Agent": f"Python pageview-api client v{__version__} <{PROJECT_URL}>"}
25
+
26
+ API_BASE_URL = "https://wikimedia.org/api/rest_v1/metrics"
27
+
28
+ PA_ENDPOINT = "pageviews/per-article"
29
+ PA_ARGS = "{project}/{access}/{agent}/{page}/{granularity}/{start}/{end}"
30
+
31
+ TOP_ENDPOINT = "pageviews/top"
32
+ TOP_ARGS = "{project}/{access}/{year}/{month}/{day}"
33
+
34
+ AG_ENDPOINT = "pageviews/aggregate"
35
+ AG_ARGS = "{project}/{access}/{agent}/{granularity}/{start}/{end}"
36
+
37
+ UD_ENDPOINT = "unique-devices"
38
+ UD_ARGS = "{project}/{access}/{granularity}/{start}/{end}"
39
+
40
+ PC_ENDPOINT = "legacy/pagecounts/aggregate"
41
+ PC_ARGS = "{project}/{access_site}/{granularity}/{start}/{end}"
42
+
43
+
44
+ class PageviewResponse(dict): # type: ignore[type-arg]
45
+ """A Wikimedia Pageview API response with attribute-style read access.
46
+
47
+ Recursively wraps nested dicts and lists so the full response tree is
48
+ navigable via attributes. Dict data keys always take priority over built-in
49
+ dict methods, so ``response.items`` returns the ``items`` data value rather
50
+ than the ``dict.items`` method when that key is present.
51
+
52
+ Use ``from_json`` rather than the constructor directly when the input may
53
+ contain nested dicts or lists — the constructor only wraps the top level.
54
+ """
55
+
56
+ def __getattribute__(self, key: str) -> Any:
57
+ if not key.startswith("_"):
58
+ try:
59
+ return dict.__getitem__(self, key)
60
+ except KeyError:
61
+ pass
62
+ return super().__getattribute__(key)
63
+
64
+ @classmethod
65
+ def from_json(cls, obj: Any) -> Any:
66
+ """Recursively convert a JSON value into a ``PageviewResponse`` tree."""
67
+ if isinstance(obj, dict):
68
+ return cls({k: cls.from_json(v) for k, v in obj.items()})
69
+ if isinstance(obj, list):
70
+ return [cls.from_json(item) for item in obj]
71
+ return obj
72
+
73
+
74
+ class ZeroOrDataNotLoadedException(Exception):
75
+ """Raised on 404 — no data or data not yet filled.
76
+
77
+ https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
78
+ """
79
+
80
+
81
+ class ThrottlingException(Exception):
82
+ """Raised on 429 — client is sending too many requests.
83
+
84
+ https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
85
+ """
86
+
87
+
88
+ def per_article(
89
+ project: str,
90
+ page: str,
91
+ start: str,
92
+ end: str,
93
+ access: str = "all-access",
94
+ agent: str = "all-agents",
95
+ granularity: str = "daily",
96
+ ) -> dict[str, Any]:
97
+ """Per-article pageview counts.
98
+
99
+ >>> import pageviewapi
100
+ >>> pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120')
101
+ """
102
+ args = PA_ARGS.format(
103
+ project=project,
104
+ page=_quote_article_name(page),
105
+ start=start,
106
+ end=end,
107
+ access=access,
108
+ agent=agent,
109
+ granularity=granularity,
110
+ )
111
+ return _api(PA_ENDPOINT, args)
112
+
113
+
114
+ def top(
115
+ project: str,
116
+ year: int | str,
117
+ month: int | str,
118
+ day: int | str,
119
+ access: str = "all-access",
120
+ ) -> dict[str, Any]:
121
+ """Top 1000 most visited articles for a project on a given date.
122
+
123
+ >>> import pageviewapi
124
+ >>> views = pageviewapi.top('fr.wikipedia', 2015, 11, 14)
125
+ >>> views['items'][0]['articles'][0]
126
+ {'article': 'Wikipédia:Accueil_principal', 'rank': 1, 'views': 1600547}
127
+ """
128
+ args = TOP_ARGS.format(project=project, access=access, year=year, month=month, day=day)
129
+ return _api(TOP_ENDPOINT, args)
130
+
131
+
132
+ def aggregate(
133
+ project: str,
134
+ start: str,
135
+ end: str,
136
+ access: str = "all-access",
137
+ agent: str = "all-agents",
138
+ granularity: str = "daily",
139
+ ) -> dict[str, Any]:
140
+ """Aggregate pageview counts for a project.
141
+
142
+ >>> import pageviewapi
143
+ >>> pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100')
144
+ """
145
+ args = AG_ARGS.format(
146
+ project=project,
147
+ start=start,
148
+ end=end,
149
+ access=access,
150
+ agent=agent,
151
+ granularity=granularity,
152
+ )
153
+ return _api(AG_ENDPOINT, args)
154
+
155
+
156
+ def unique_devices(
157
+ project: str,
158
+ start: str,
159
+ end: str,
160
+ access: str = "all-access",
161
+ granularity: str = "daily",
162
+ ) -> dict[str, Any]:
163
+ """Unique devices accessing a project."""
164
+ args = UD_ARGS.format(project=project, start=start, end=end, access=access, granularity=granularity)
165
+ return _api(UD_ENDPOINT, args)
166
+
167
+
168
+ def legacy_pagecounts(
169
+ project: str,
170
+ start: str,
171
+ end: str,
172
+ access_site: str = "all-sites",
173
+ granularity: str = "daily",
174
+ ) -> dict[str, Any]:
175
+ """Legacy pagecounts aggregate.
176
+
177
+ >>> import pageviewapi
178
+ >>> pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100')
179
+ """
180
+ project_arg = "all-projects" if project == "all-projects" else f"{project}.org"
181
+ args = PC_ARGS.format(
182
+ project=project_arg,
183
+ start=start,
184
+ end=end,
185
+ access_site=access_site,
186
+ granularity=granularity,
187
+ )
188
+ return _api(PC_ENDPOINT, args)
189
+
190
+
191
+ def _api(end_point: str, args: str, api_url: str = API_BASE_URL) -> dict[str, Any]:
192
+ url = "/".join([api_url, end_point, args])
193
+ response = requests.get(url, headers=USER_AGENT)
194
+ if response.status_code == 200:
195
+ return PageviewResponse.from_json(response.json())
196
+ elif response.status_code == 404:
197
+ raise ZeroOrDataNotLoadedException()
198
+ elif response.status_code == 429:
199
+ raise ThrottlingException()
200
+ else:
201
+ response.raise_for_status()
202
+ return {} # unreachable, satisfies type checker
203
+
204
+
205
+ def _quote_article_name(page: str) -> str:
206
+ return quote(unquote(page), safe="")
pageviewapi/period.py ADDED
@@ -0,0 +1,41 @@
1
+ """Helper functions for aggregating pageviews over a time period."""
2
+
3
+ import datetime
4
+
5
+ import pageviewapi.client
6
+
7
+
8
+ def sum_last(
9
+ project: str,
10
+ page: str,
11
+ last: int = 30,
12
+ agent: str = "all-agents",
13
+ access: str = "all-access",
14
+ ) -> int:
15
+ """Total pageviews for a page over the last N days."""
16
+ views = pageviewapi.client.per_article(project, page, _days_ago(last), _today(), access=access, agent=agent)
17
+ return sum(daily["views"] for daily in views["items"])
18
+
19
+
20
+ def avg_last(
21
+ project: str,
22
+ page: str,
23
+ last: int = 30,
24
+ agent: str = "all-agents",
25
+ access: str = "all-access",
26
+ ) -> float:
27
+ """Average daily pageviews for a page over the last N days."""
28
+ views = pageviewapi.client.per_article(project, page, _days_ago(last), _today(), access=access, agent=agent)
29
+ return _avg([daily["views"] for daily in views["items"]])
30
+
31
+
32
+ def _today() -> str:
33
+ return datetime.date.today().strftime("%Y%m%d")
34
+
35
+
36
+ def _days_ago(days: int) -> str:
37
+ return (datetime.date.today() - datetime.timedelta(days=days)).strftime("%Y%m%d")
38
+
39
+
40
+ def _avg(values: list[int | float]) -> float:
41
+ return sum(values) / len(values)
@@ -0,0 +1,65 @@
1
+ Metadata-Version: 2.4
2
+ Name: pageviewapi
3
+ Version: 0.5.0
4
+ Summary: Wikimedia Pageview API client
5
+ Project-URL: Homepage, https://github.com/Commonists/pageview-api
6
+ Author-email: Commonists <ps.huard@gmail.com>
7
+ License: MIT
8
+ License-File: LICENSE
9
+ Requires-Python: >=3.10
10
+ Requires-Dist: requests>=2.0
11
+ Description-Content-Type: text/markdown
12
+
13
+ # pageview-api
14
+ [![CI](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml/badge.svg)](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml)
15
+ [![codecov](https://codecov.io/gh/Commonists/pageview-api/branch/master/graph/badge.svg)](https://codecov.io/gh/Commonists/pageview-api)
16
+ [![Pypi](https://img.shields.io/pypi/v/pageviewapi.svg?style=flat)](https://pypi.python.org/pypi/pageviewapi)
17
+ [![License](http://img.shields.io/badge/license-MIT-orange.svg?style=flat)](http://opensource.org/licenses/MIT)
18
+
19
+ Wikimedia Pageview API client for Python 3.10+
20
+
21
+ Installation
22
+ ------------
23
+ In order to install system wide on system using sudo you can use:
24
+ ```sh
25
+ pip install git+https://github.com/Commonists/pageview-api.git
26
+ ```
27
+
28
+ Examples
29
+ --------
30
+
31
+ Number of view on English Wikipedia of article Paris from November 6th to November 20th 2015
32
+
33
+ ```python
34
+ import pageviewapi
35
+ pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120',
36
+ access='all-access', agent='all-agents', granularity='daily')
37
+ ```
38
+
39
+ Aggregation: Get a daily pageview count timeseries of all projects for the month of October 2015
40
+ ```python
41
+ import pageviewapi
42
+ pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100', access='all-access',
43
+ agent='all-agents', granularity='daily')
44
+ ```
45
+
46
+ Most viewed articles on French Wikipedia on November 14th, 2015
47
+ ```python
48
+ import pageviewapi
49
+ pageviewapi.top('fr.wikipedia', 2015, 11, 14, access='all-access')
50
+ ```
51
+
52
+ Sum (resp. average) of view during last 30 days
53
+ ```python
54
+ import pageviewapi.period
55
+ pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
56
+ access='all-access', agent='all-agents')
57
+
58
+ pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
59
+ ```
60
+
61
+ Monthly legacy pagecounts (2008 until end of 2016) on a project
62
+ ```python
63
+ import pageviewapi
64
+ pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100', granularity='monthly')
65
+ ```
@@ -0,0 +1,7 @@
1
+ pageviewapi/__init__.py,sha256=XQuW8nHWyAG_5DbG1w1xmBEWuciq-_hvYfllPAcSMAg,485
2
+ pageviewapi/client.py,sha256=bDB9jZn2J3zRb9aNcrrlKPuWkOUjxTwvDCq49MC3zVk,5998
3
+ pageviewapi/period.py,sha256=CTlTrUAGV-dS4oRFBzgEbcUxL__9aCN9d1VIMp2_xcQ,1153
4
+ pageviewapi-0.5.0.dist-info/METADATA,sha256=bwXjOIoxJoYcD0yeU_7z1j3ErzhbXtfnxawYpnpuntU,2309
5
+ pageviewapi-0.5.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
6
+ pageviewapi-0.5.0.dist-info/licenses/LICENSE,sha256=K_ASJjZ01tNhhxFx5LTnc8GvQzALj9rsa1KihGESI0c,1078
7
+ pageviewapi-0.5.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Commonists
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+