pageviewapi 0.2.3__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pageviewapi-0.5.0/.claude/settings.local.json +8 -0
- pageviewapi-0.5.0/.editorconfig +10 -0
- pageviewapi-0.5.0/.github/workflows/ci.yml +26 -0
- pageviewapi-0.5.0/.gitignore +54 -0
- pageviewapi-0.5.0/LICENSE +22 -0
- pageviewapi-0.5.0/PKG-INFO +65 -0
- {pageviewapi-0.2.3 → pageviewapi-0.5.0}/README.md +10 -3
- pageviewapi-0.5.0/pageviewapi/__init__.py +25 -0
- pageviewapi-0.5.0/pageviewapi/client.py +206 -0
- pageviewapi-0.5.0/pageviewapi/period.py +41 -0
- pageviewapi-0.5.0/pyproject.toml +31 -0
- pageviewapi-0.5.0/tests/__init__.py +0 -0
- pageviewapi-0.5.0/tests/test_client.py +235 -0
- pageviewapi-0.5.0/tests/test_period.py +69 -0
- pageviewapi-0.2.3/MANIFEST.in +0 -1
- pageviewapi-0.2.3/PKG-INFO +0 -63
- pageviewapi-0.2.3/pageviewapi/__init__.py +0 -9
- pageviewapi-0.2.3/pageviewapi/client.py +0 -132
- pageviewapi-0.2.3/pageviewapi/period.py +0 -39
- pageviewapi-0.2.3/pageviewapi.egg-info/PKG-INFO +0 -63
- pageviewapi-0.2.3/pageviewapi.egg-info/SOURCES.txt +0 -11
- pageviewapi-0.2.3/pageviewapi.egg-info/dependency_links.txt +0 -1
- pageviewapi-0.2.3/pageviewapi.egg-info/requires.txt +0 -2
- pageviewapi-0.2.3/pageviewapi.egg-info/top_level.txt +0 -1
- pageviewapi-0.2.3/setup.cfg +0 -5
- pageviewapi-0.2.3/setup.py +0 -42
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on: [push, pull_request]
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
lint:
|
|
7
|
+
runs-on: ubuntu-latest
|
|
8
|
+
steps:
|
|
9
|
+
- uses: actions/checkout@v4
|
|
10
|
+
- uses: astral-sh/setup-uv@v5
|
|
11
|
+
- run: uv run ruff check .
|
|
12
|
+
- run: uv run ruff format --check .
|
|
13
|
+
|
|
14
|
+
test:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
strategy:
|
|
17
|
+
matrix:
|
|
18
|
+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
- uses: astral-sh/setup-uv@v5
|
|
22
|
+
- run: uv run --python ${{ matrix.python-version }} pytest tests/ --cov=pageviewapi --cov-report=term-missing --cov-report=xml
|
|
23
|
+
- uses: codecov/codecov-action@v5
|
|
24
|
+
if: matrix.python-version == '3.14'
|
|
25
|
+
with:
|
|
26
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
*~
|
|
2
|
+
|
|
3
|
+
*.py[cod]
|
|
4
|
+
|
|
5
|
+
# C extensions
|
|
6
|
+
*.so
|
|
7
|
+
|
|
8
|
+
# Packages
|
|
9
|
+
*.egg
|
|
10
|
+
*.egg-info
|
|
11
|
+
dist
|
|
12
|
+
build
|
|
13
|
+
eggs
|
|
14
|
+
parts
|
|
15
|
+
bin
|
|
16
|
+
var
|
|
17
|
+
sdist
|
|
18
|
+
develop-eggs
|
|
19
|
+
.installed.cfg
|
|
20
|
+
lib
|
|
21
|
+
lib64
|
|
22
|
+
|
|
23
|
+
# Installer logs
|
|
24
|
+
pip-log.txt
|
|
25
|
+
|
|
26
|
+
# Unit test / coverage reports
|
|
27
|
+
.coverage
|
|
28
|
+
.tox
|
|
29
|
+
nosetests.xml
|
|
30
|
+
cover/
|
|
31
|
+
|
|
32
|
+
# Translations
|
|
33
|
+
*.mo
|
|
34
|
+
|
|
35
|
+
# Mr Developer
|
|
36
|
+
.mr.developer.cfg
|
|
37
|
+
.project
|
|
38
|
+
.pydevproject
|
|
39
|
+
|
|
40
|
+
# Typical virtual environments
|
|
41
|
+
.venv
|
|
42
|
+
venv
|
|
43
|
+
|
|
44
|
+
# uv
|
|
45
|
+
uv.lock
|
|
46
|
+
.python-version
|
|
47
|
+
|
|
48
|
+
# pytest / coverage
|
|
49
|
+
.coverage
|
|
50
|
+
htmlcov/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
|
|
53
|
+
# ruff
|
|
54
|
+
.ruff_cache/
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2015 Commonists
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pageviewapi
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Wikimedia Pageview API client
|
|
5
|
+
Project-URL: Homepage, https://github.com/Commonists/pageview-api
|
|
6
|
+
Author-email: Commonists <ps.huard@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Requires-Dist: requests>=2.0
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# pageview-api
|
|
14
|
+
[](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml)
|
|
15
|
+
[](https://codecov.io/gh/Commonists/pageview-api)
|
|
16
|
+
[](https://pypi.python.org/pypi/pageviewapi)
|
|
17
|
+
[](http://opensource.org/licenses/MIT)
|
|
18
|
+
|
|
19
|
+
Wikimedia Pageview API client for Python 3.10+
|
|
20
|
+
|
|
21
|
+
Installation
|
|
22
|
+
------------
|
|
23
|
+
In order to install system wide on system using sudo you can use:
|
|
24
|
+
```sh
|
|
25
|
+
pip install git+https://github.com/Commonists/pageview-api.git
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
|
|
31
|
+
Number of view on English Wikipedia of article Paris from November 6th to November 20th 2015
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import pageviewapi
|
|
35
|
+
pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120',
|
|
36
|
+
access='all-access', agent='all-agents', granularity='daily')
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Aggregation: Get a daily pageview count timeseries of all projects for the month of October 2015
|
|
40
|
+
```python
|
|
41
|
+
import pageviewapi
|
|
42
|
+
pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100', access='all-access',
|
|
43
|
+
agent='all-agents', granularity='daily')
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Most viewed articles on French Wikipedia on November 14th, 2015
|
|
47
|
+
```python
|
|
48
|
+
import pageviewapi
|
|
49
|
+
pageviewapi.top('fr.wikipedia', 2015, 11, 14, access='all-access')
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Sum (resp. average) of view during last 30 days
|
|
53
|
+
```python
|
|
54
|
+
import pageviewapi.period
|
|
55
|
+
pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
|
|
56
|
+
access='all-access', agent='all-agents')
|
|
57
|
+
|
|
58
|
+
pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Monthly legacy pagecounts (2008 until end of 2016) on a project
|
|
62
|
+
```python
|
|
63
|
+
import pageviewapi
|
|
64
|
+
pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100', granularity='monthly')
|
|
65
|
+
```
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
# pageview-api
|
|
2
|
-
[](https://github.com/Commonists/pageview-api/actions/workflows/ci.yml)
|
|
3
|
+
[](https://codecov.io/gh/Commonists/pageview-api)
|
|
4
|
+
[](https://pypi.python.org/pypi/pageviewapi)
|
|
4
5
|
[](http://opensource.org/licenses/MIT)
|
|
5
6
|
|
|
6
|
-
Wikimedia Pageview API client
|
|
7
|
+
Wikimedia Pageview API client for Python 3.10+
|
|
7
8
|
|
|
8
9
|
Installation
|
|
9
10
|
------------
|
|
@@ -44,3 +45,9 @@ pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
|
|
|
44
45
|
|
|
45
46
|
pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
|
|
46
47
|
```
|
|
48
|
+
|
|
49
|
+
Monthly legacy pagecounts (2008 until end of 2016) on a project
|
|
50
|
+
```python
|
|
51
|
+
import pageviewapi
|
|
52
|
+
pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100', granularity='monthly')
|
|
53
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Python client for the Wikimedia Pageview API."""
|
|
2
|
+
|
|
3
|
+
from pageviewapi.client import (
|
|
4
|
+
PageviewResponse,
|
|
5
|
+
ThrottlingException,
|
|
6
|
+
ZeroOrDataNotLoadedException,
|
|
7
|
+
__version__,
|
|
8
|
+
aggregate,
|
|
9
|
+
legacy_pagecounts,
|
|
10
|
+
per_article,
|
|
11
|
+
top,
|
|
12
|
+
unique_devices,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"__version__",
|
|
17
|
+
"aggregate",
|
|
18
|
+
"PageviewResponse",
|
|
19
|
+
"legacy_pagecounts",
|
|
20
|
+
"per_article",
|
|
21
|
+
"ThrottlingException",
|
|
22
|
+
"top",
|
|
23
|
+
"unique_devices",
|
|
24
|
+
"ZeroOrDataNotLoadedException",
|
|
25
|
+
]
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Client to wikimedia pageview api.
|
|
2
|
+
|
|
3
|
+
API doc: https://wikitech.wikimedia.org/wiki/Analytics/AQS/Pageview_API
|
|
4
|
+
Supported endpoints:
|
|
5
|
+
- per-article
|
|
6
|
+
- top
|
|
7
|
+
- aggregate
|
|
8
|
+
- unique-devices
|
|
9
|
+
- legacy/pagecounts
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
13
|
+
from typing import Any
|
|
14
|
+
from urllib.parse import quote, unquote
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
__version__ = version("pageviewapi")
|
|
20
|
+
except PackageNotFoundError:
|
|
21
|
+
__version__ = "0.4.0"
|
|
22
|
+
|
|
23
|
+
PROJECT_URL = "https://github.com/Commonists/pageview-api"
|
|
24
|
+
USER_AGENT = {"User-Agent": f"Python pageview-api client v{__version__} <{PROJECT_URL}>"}
|
|
25
|
+
|
|
26
|
+
API_BASE_URL = "https://wikimedia.org/api/rest_v1/metrics"
|
|
27
|
+
|
|
28
|
+
PA_ENDPOINT = "pageviews/per-article"
|
|
29
|
+
PA_ARGS = "{project}/{access}/{agent}/{page}/{granularity}/{start}/{end}"
|
|
30
|
+
|
|
31
|
+
TOP_ENDPOINT = "pageviews/top"
|
|
32
|
+
TOP_ARGS = "{project}/{access}/{year}/{month}/{day}"
|
|
33
|
+
|
|
34
|
+
AG_ENDPOINT = "pageviews/aggregate"
|
|
35
|
+
AG_ARGS = "{project}/{access}/{agent}/{granularity}/{start}/{end}"
|
|
36
|
+
|
|
37
|
+
UD_ENDPOINT = "unique-devices"
|
|
38
|
+
UD_ARGS = "{project}/{access}/{granularity}/{start}/{end}"
|
|
39
|
+
|
|
40
|
+
PC_ENDPOINT = "legacy/pagecounts/aggregate"
|
|
41
|
+
PC_ARGS = "{project}/{access_site}/{granularity}/{start}/{end}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class PageviewResponse(dict): # type: ignore[type-arg]
|
|
45
|
+
"""A Wikimedia Pageview API response with attribute-style read access.
|
|
46
|
+
|
|
47
|
+
Recursively wraps nested dicts and lists so the full response tree is
|
|
48
|
+
navigable via attributes. Dict data keys always take priority over built-in
|
|
49
|
+
dict methods, so ``response.items`` returns the ``items`` data value rather
|
|
50
|
+
than the ``dict.items`` method when that key is present.
|
|
51
|
+
|
|
52
|
+
Use ``from_json`` rather than the constructor directly when the input may
|
|
53
|
+
contain nested dicts or lists — the constructor only wraps the top level.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __getattribute__(self, key: str) -> Any:
|
|
57
|
+
if not key.startswith("_"):
|
|
58
|
+
try:
|
|
59
|
+
return dict.__getitem__(self, key)
|
|
60
|
+
except KeyError:
|
|
61
|
+
pass
|
|
62
|
+
return super().__getattribute__(key)
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def from_json(cls, obj: Any) -> Any:
|
|
66
|
+
"""Recursively convert a JSON value into a ``PageviewResponse`` tree."""
|
|
67
|
+
if isinstance(obj, dict):
|
|
68
|
+
return cls({k: cls.from_json(v) for k, v in obj.items()})
|
|
69
|
+
if isinstance(obj, list):
|
|
70
|
+
return [cls.from_json(item) for item in obj]
|
|
71
|
+
return obj
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ZeroOrDataNotLoadedException(Exception):
|
|
75
|
+
"""Raised on 404 — no data or data not yet filled.
|
|
76
|
+
|
|
77
|
+
https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ThrottlingException(Exception):
|
|
82
|
+
"""Raised on 429 — client is sending too many requests.
|
|
83
|
+
|
|
84
|
+
https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI#Gotchas
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def per_article(
|
|
89
|
+
project: str,
|
|
90
|
+
page: str,
|
|
91
|
+
start: str,
|
|
92
|
+
end: str,
|
|
93
|
+
access: str = "all-access",
|
|
94
|
+
agent: str = "all-agents",
|
|
95
|
+
granularity: str = "daily",
|
|
96
|
+
) -> dict[str, Any]:
|
|
97
|
+
"""Per-article pageview counts.
|
|
98
|
+
|
|
99
|
+
>>> import pageviewapi
|
|
100
|
+
>>> pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120')
|
|
101
|
+
"""
|
|
102
|
+
args = PA_ARGS.format(
|
|
103
|
+
project=project,
|
|
104
|
+
page=_quote_article_name(page),
|
|
105
|
+
start=start,
|
|
106
|
+
end=end,
|
|
107
|
+
access=access,
|
|
108
|
+
agent=agent,
|
|
109
|
+
granularity=granularity,
|
|
110
|
+
)
|
|
111
|
+
return _api(PA_ENDPOINT, args)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def top(
|
|
115
|
+
project: str,
|
|
116
|
+
year: int | str,
|
|
117
|
+
month: int | str,
|
|
118
|
+
day: int | str,
|
|
119
|
+
access: str = "all-access",
|
|
120
|
+
) -> dict[str, Any]:
|
|
121
|
+
"""Top 1000 most visited articles for a project on a given date.
|
|
122
|
+
|
|
123
|
+
>>> import pageviewapi
|
|
124
|
+
>>> views = pageviewapi.top('fr.wikipedia', 2015, 11, 14)
|
|
125
|
+
>>> views['items'][0]['articles'][0]
|
|
126
|
+
{'article': 'Wikipédia:Accueil_principal', 'rank': 1, 'views': 1600547}
|
|
127
|
+
"""
|
|
128
|
+
args = TOP_ARGS.format(project=project, access=access, year=year, month=month, day=day)
|
|
129
|
+
return _api(TOP_ENDPOINT, args)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def aggregate(
|
|
133
|
+
project: str,
|
|
134
|
+
start: str,
|
|
135
|
+
end: str,
|
|
136
|
+
access: str = "all-access",
|
|
137
|
+
agent: str = "all-agents",
|
|
138
|
+
granularity: str = "daily",
|
|
139
|
+
) -> dict[str, Any]:
|
|
140
|
+
"""Aggregate pageview counts for a project.
|
|
141
|
+
|
|
142
|
+
>>> import pageviewapi
|
|
143
|
+
>>> pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100')
|
|
144
|
+
"""
|
|
145
|
+
args = AG_ARGS.format(
|
|
146
|
+
project=project,
|
|
147
|
+
start=start,
|
|
148
|
+
end=end,
|
|
149
|
+
access=access,
|
|
150
|
+
agent=agent,
|
|
151
|
+
granularity=granularity,
|
|
152
|
+
)
|
|
153
|
+
return _api(AG_ENDPOINT, args)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def unique_devices(
|
|
157
|
+
project: str,
|
|
158
|
+
start: str,
|
|
159
|
+
end: str,
|
|
160
|
+
access: str = "all-access",
|
|
161
|
+
granularity: str = "daily",
|
|
162
|
+
) -> dict[str, Any]:
|
|
163
|
+
"""Unique devices accessing a project."""
|
|
164
|
+
args = UD_ARGS.format(project=project, start=start, end=end, access=access, granularity=granularity)
|
|
165
|
+
return _api(UD_ENDPOINT, args)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def legacy_pagecounts(
|
|
169
|
+
project: str,
|
|
170
|
+
start: str,
|
|
171
|
+
end: str,
|
|
172
|
+
access_site: str = "all-sites",
|
|
173
|
+
granularity: str = "daily",
|
|
174
|
+
) -> dict[str, Any]:
|
|
175
|
+
"""Legacy pagecounts aggregate.
|
|
176
|
+
|
|
177
|
+
>>> import pageviewapi
|
|
178
|
+
>>> pageviewapi.legacy_pagecounts('fr.wikipedia', '2010010100', '2011010100')
|
|
179
|
+
"""
|
|
180
|
+
project_arg = "all-projects" if project == "all-projects" else f"{project}.org"
|
|
181
|
+
args = PC_ARGS.format(
|
|
182
|
+
project=project_arg,
|
|
183
|
+
start=start,
|
|
184
|
+
end=end,
|
|
185
|
+
access_site=access_site,
|
|
186
|
+
granularity=granularity,
|
|
187
|
+
)
|
|
188
|
+
return _api(PC_ENDPOINT, args)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _api(end_point: str, args: str, api_url: str = API_BASE_URL) -> dict[str, Any]:
|
|
192
|
+
url = "/".join([api_url, end_point, args])
|
|
193
|
+
response = requests.get(url, headers=USER_AGENT)
|
|
194
|
+
if response.status_code == 200:
|
|
195
|
+
return PageviewResponse.from_json(response.json())
|
|
196
|
+
elif response.status_code == 404:
|
|
197
|
+
raise ZeroOrDataNotLoadedException()
|
|
198
|
+
elif response.status_code == 429:
|
|
199
|
+
raise ThrottlingException()
|
|
200
|
+
else:
|
|
201
|
+
response.raise_for_status()
|
|
202
|
+
return {} # unreachable, satisfies type checker
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _quote_article_name(page: str) -> str:
|
|
206
|
+
return quote(unquote(page), safe="")
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Helper functions for aggregating pageviews over a time period."""
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
|
|
5
|
+
import pageviewapi.client
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def sum_last(
|
|
9
|
+
project: str,
|
|
10
|
+
page: str,
|
|
11
|
+
last: int = 30,
|
|
12
|
+
agent: str = "all-agents",
|
|
13
|
+
access: str = "all-access",
|
|
14
|
+
) -> int:
|
|
15
|
+
"""Total pageviews for a page over the last N days."""
|
|
16
|
+
views = pageviewapi.client.per_article(project, page, _days_ago(last), _today(), access=access, agent=agent)
|
|
17
|
+
return sum(daily["views"] for daily in views["items"])
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def avg_last(
|
|
21
|
+
project: str,
|
|
22
|
+
page: str,
|
|
23
|
+
last: int = 30,
|
|
24
|
+
agent: str = "all-agents",
|
|
25
|
+
access: str = "all-access",
|
|
26
|
+
) -> float:
|
|
27
|
+
"""Average daily pageviews for a page over the last N days."""
|
|
28
|
+
views = pageviewapi.client.per_article(project, page, _days_ago(last), _today(), access=access, agent=agent)
|
|
29
|
+
return _avg([daily["views"] for daily in views["items"]])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _today() -> str:
|
|
33
|
+
return datetime.date.today().strftime("%Y%m%d")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _days_ago(days: int) -> str:
|
|
37
|
+
return (datetime.date.today() - datetime.timedelta(days=days)).strftime("%Y%m%d")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _avg(values: list[int | float]) -> float:
|
|
41
|
+
return sum(values) / len(values)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pageviewapi"
|
|
7
|
+
version = "0.5.0"
|
|
8
|
+
description = "Wikimedia Pageview API client"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
authors = [{ name = "Commonists", email = "ps.huard@gmail.com" }]
|
|
12
|
+
requires-python = ">=3.10"
|
|
13
|
+
dependencies = ["requests>=2.0"]
|
|
14
|
+
|
|
15
|
+
[project.urls]
|
|
16
|
+
Homepage = "https://github.com/Commonists/pageview-api"
|
|
17
|
+
|
|
18
|
+
[dependency-groups]
|
|
19
|
+
dev = ["pytest>=8.0", "pytest-cov>=5.0", "ruff>=0.9", "pytest-mock>=3.0"]
|
|
20
|
+
|
|
21
|
+
[tool.ruff]
|
|
22
|
+
line-length = 120
|
|
23
|
+
|
|
24
|
+
[tool.ruff.lint]
|
|
25
|
+
select = ["E", "F", "I", "UP"]
|
|
26
|
+
|
|
27
|
+
[tool.pytest.ini_options]
|
|
28
|
+
testpaths = ["tests"]
|
|
29
|
+
|
|
30
|
+
[tool.coverage.run]
|
|
31
|
+
source = ["pageviewapi"]
|
|
File without changes
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""Tests for pageviewapi.client."""
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from pageviewapi.client import (
|
|
8
|
+
API_BASE_URL,
|
|
9
|
+
USER_AGENT,
|
|
10
|
+
PageviewResponse,
|
|
11
|
+
ThrottlingException,
|
|
12
|
+
ZeroOrDataNotLoadedException,
|
|
13
|
+
_api,
|
|
14
|
+
_quote_article_name,
|
|
15
|
+
aggregate,
|
|
16
|
+
legacy_pagecounts,
|
|
17
|
+
per_article,
|
|
18
|
+
top,
|
|
19
|
+
unique_devices,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
ITEMS_RESPONSE = {"items": [{"views": 1234}]}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture
|
|
26
|
+
def ok_response() -> MagicMock:
|
|
27
|
+
r = MagicMock()
|
|
28
|
+
r.status_code = 200
|
|
29
|
+
r.json.return_value = ITEMS_RESPONSE
|
|
30
|
+
return r
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.fixture
|
|
34
|
+
def not_found_response() -> MagicMock:
|
|
35
|
+
r = MagicMock()
|
|
36
|
+
r.status_code = 404
|
|
37
|
+
return r
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.fixture
|
|
41
|
+
def throttled_response() -> MagicMock:
|
|
42
|
+
r = MagicMock()
|
|
43
|
+
r.status_code = 429
|
|
44
|
+
return r
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_per_article_returns_json(ok_response):
|
|
48
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
49
|
+
result = per_article("en.wikipedia", "Paris", "20151106", "20151120")
|
|
50
|
+
assert result == ITEMS_RESPONSE
|
|
51
|
+
url = mock_get.call_args[0][0]
|
|
52
|
+
assert "pageviews/per-article" in url
|
|
53
|
+
assert "en.wikipedia" in url
|
|
54
|
+
assert "Paris" in url
|
|
55
|
+
assert "20151106" in url
|
|
56
|
+
assert "20151120" in url
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_per_article_default_args_in_url(ok_response):
|
|
60
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
61
|
+
per_article("en.wikipedia", "Paris", "20151106", "20151120")
|
|
62
|
+
url = mock_get.call_args[0][0]
|
|
63
|
+
assert "all-access" in url
|
|
64
|
+
assert "all-agents" in url
|
|
65
|
+
assert "daily" in url
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_per_article_custom_args(ok_response):
|
|
69
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
70
|
+
per_article(
|
|
71
|
+
"en.wikipedia", "Paris", "20151106", "20151120", access="desktop", agent="user", granularity="monthly"
|
|
72
|
+
)
|
|
73
|
+
url = mock_get.call_args[0][0]
|
|
74
|
+
assert "desktop" in url
|
|
75
|
+
assert "user" in url
|
|
76
|
+
assert "monthly" in url
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_per_article_404_raises(not_found_response):
|
|
80
|
+
with patch("requests.get", return_value=not_found_response):
|
|
81
|
+
with pytest.raises(ZeroOrDataNotLoadedException):
|
|
82
|
+
per_article("en.wikipedia", "Paris", "20151106", "20151120")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_per_article_429_raises(throttled_response):
|
|
86
|
+
with patch("requests.get", return_value=throttled_response):
|
|
87
|
+
with pytest.raises(ThrottlingException):
|
|
88
|
+
per_article("en.wikipedia", "Paris", "20151106", "20151120")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_top_returns_json(ok_response):
|
|
92
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
93
|
+
result = top("fr.wikipedia", 2015, 11, 14)
|
|
94
|
+
assert result == ITEMS_RESPONSE
|
|
95
|
+
url = mock_get.call_args[0][0]
|
|
96
|
+
assert "pageviews/top" in url
|
|
97
|
+
assert "fr.wikipedia" in url
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_top_url_contains_date(ok_response):
|
|
101
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
102
|
+
top("fr.wikipedia", 2015, 11, 14)
|
|
103
|
+
url = mock_get.call_args[0][0]
|
|
104
|
+
assert "2015" in url
|
|
105
|
+
assert "11" in url
|
|
106
|
+
assert "14" in url
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def test_aggregate_returns_json(ok_response):
|
|
110
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
111
|
+
result = aggregate("fr.wikipedia", "2015100100", "2015103100")
|
|
112
|
+
assert result == ITEMS_RESPONSE
|
|
113
|
+
url = mock_get.call_args[0][0]
|
|
114
|
+
assert "pageviews/aggregate" in url
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_unique_devices_returns_json(ok_response):
|
|
118
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
119
|
+
result = unique_devices("en.wikipedia", "20200101", "20200131")
|
|
120
|
+
assert result == ITEMS_RESPONSE
|
|
121
|
+
url = mock_get.call_args[0][0]
|
|
122
|
+
assert "unique-devices" in url
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_legacy_pagecounts_returns_json(ok_response):
|
|
126
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
127
|
+
result = legacy_pagecounts("fr.wikipedia", "2010010100", "2011010100")
|
|
128
|
+
assert result == ITEMS_RESPONSE
|
|
129
|
+
url = mock_get.call_args[0][0]
|
|
130
|
+
assert "legacy/pagecounts" in url
|
|
131
|
+
assert "fr.wikipedia.org" in url
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def test_legacy_pagecounts_all_projects(ok_response):
|
|
135
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
136
|
+
legacy_pagecounts("all-projects", "2010010100", "2011010100")
|
|
137
|
+
url = mock_get.call_args[0][0]
|
|
138
|
+
assert "all-projects" in url
|
|
139
|
+
assert "all-projects.org" not in url
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def test_api_sends_user_agent(ok_response):
|
|
143
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
144
|
+
_api("pageviews/per-article", "en.wikipedia/all-access/all-agents/Paris/daily/20200101/20200131")
|
|
145
|
+
_, kwargs = mock_get.call_args
|
|
146
|
+
assert kwargs["headers"] == USER_AGENT
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_api_other_error_raises(ok_response):
|
|
150
|
+
ok_response.status_code = 500
|
|
151
|
+
ok_response.raise_for_status.side_effect = Exception("server error")
|
|
152
|
+
with patch("requests.get", return_value=ok_response):
|
|
153
|
+
with pytest.raises(Exception, match="server error"):
|
|
154
|
+
_api("pageviews/per-article", "bad/args")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def test_api_constructs_url_correctly(ok_response):
|
|
158
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
159
|
+
_api("my/endpoint", "arg1/arg2")
|
|
160
|
+
url = mock_get.call_args[0][0]
|
|
161
|
+
assert url == f"{API_BASE_URL}/my/endpoint/arg1/arg2"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_pageview_response_attribute_access():
|
|
165
|
+
d = PageviewResponse({"rank": 1, "views": 42})
|
|
166
|
+
assert d.rank == 1
|
|
167
|
+
assert d.views == 42
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def test_pageview_response_dict_access_still_works():
|
|
171
|
+
d = PageviewResponse({"rank": 1})
|
|
172
|
+
assert d["rank"] == 1
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def test_pageview_response_missing_key_raises_attribute_error():
|
|
176
|
+
d = PageviewResponse({"a": 1})
|
|
177
|
+
with pytest.raises(AttributeError):
|
|
178
|
+
_ = d.missing
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def test_pageview_response_items_key_accessible_as_attribute():
|
|
182
|
+
d = PageviewResponse.from_json({"items": [{"views": 1}]})
|
|
183
|
+
assert isinstance(d.items, list)
|
|
184
|
+
assert d.items[0].views == 1
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def test_pageview_response_keys_key_accessible_as_attribute():
|
|
188
|
+
d = PageviewResponse({"keys": ["a", "b"]})
|
|
189
|
+
assert d.keys == ["a", "b"]
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def test_pageview_response_dict_method_accessible_when_key_absent():
|
|
193
|
+
d = PageviewResponse({"rank": 1})
|
|
194
|
+
assert callable(d.items)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def test_pageview_response_nested_dict_is_wrapped():
|
|
198
|
+
d = PageviewResponse.from_json({"outer": {"inner": 99}})
|
|
199
|
+
assert isinstance(d.outer, PageviewResponse)
|
|
200
|
+
assert d.outer.inner == 99
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def test_pageview_response_nested_list_of_dicts_is_wrapped():
|
|
204
|
+
d = PageviewResponse.from_json({"articles": [{"title": "Paris", "views": 10}]})
|
|
205
|
+
assert isinstance(d["articles"][0], PageviewResponse)
|
|
206
|
+
assert d["articles"][0].title == "Paris"
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def test_api_returns_pageview_response(ok_response):
|
|
210
|
+
with patch("requests.get", return_value=ok_response):
|
|
211
|
+
result = per_article("en.wikipedia", "Paris", "20151106", "20151120")
|
|
212
|
+
assert isinstance(result, PageviewResponse)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def test_quote_article_name_plain():
|
|
216
|
+
assert _quote_article_name("Paris") == "Paris"
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_quote_article_name_slash():
|
|
220
|
+
assert _quote_article_name("AC/DC") == "AC%2FDC"
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def test_per_article_encodes_space_in_page_name(ok_response):
|
|
224
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
225
|
+
per_article("en.wikipedia", "New York", "20200101", "20200131")
|
|
226
|
+
url = mock_get.call_args[0][0]
|
|
227
|
+
assert "New%20York" in url
|
|
228
|
+
assert "New York" not in url
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def test_per_article_encodes_slash_in_page_name(ok_response):
|
|
232
|
+
with patch("requests.get", return_value=ok_response) as mock_get:
|
|
233
|
+
per_article("en.wikipedia", "AC/DC", "20200101", "20200131")
|
|
234
|
+
url = mock_get.call_args[0][0]
|
|
235
|
+
assert "AC%2FDC" in url
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Tests for pageviewapi.period."""
|
|
2
|
+
|
|
3
|
+
from unittest.mock import patch
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from pageviewapi.period import _avg, _days_ago, _today, avg_last, sum_last
|
|
8
|
+
|
|
9
|
+
MOCK_VIEWS = {"items": [{"views": 100}, {"views": 200}, {"views": 300}]}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_today_format():
|
|
13
|
+
result = _today()
|
|
14
|
+
assert len(result) == 8
|
|
15
|
+
assert result.isdigit()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_days_ago_format():
|
|
19
|
+
result = _days_ago(30)
|
|
20
|
+
assert len(result) == 8
|
|
21
|
+
assert result.isdigit()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_days_ago_is_before_today():
|
|
25
|
+
assert _days_ago(1) < _today()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_days_ago_zero_equals_today():
|
|
29
|
+
assert _days_ago(0) == _today()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_avg_integers():
|
|
33
|
+
assert _avg([10, 20, 30]) == 20.0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_avg_single_value():
|
|
37
|
+
assert _avg([42]) == 42.0
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_avg_floats():
|
|
41
|
+
assert _avg([1.5, 2.5]) == pytest.approx(2.0)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_sum_last_returns_total():
|
|
45
|
+
with patch("pageviewapi.client.per_article", return_value=MOCK_VIEWS):
|
|
46
|
+
result = sum_last("en.wikipedia", "Python_(programming_language)")
|
|
47
|
+
assert result == 600
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_sum_last_passes_date_range():
|
|
51
|
+
with patch("pageviewapi.client.per_article", return_value=MOCK_VIEWS) as mock_pa:
|
|
52
|
+
sum_last("en.wikipedia", "Python_(programming_language)", last=7)
|
|
53
|
+
call_args = mock_pa.call_args[0]
|
|
54
|
+
start, end = call_args[2], call_args[3]
|
|
55
|
+
assert start < end
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_avg_last_returns_average():
|
|
59
|
+
with patch("pageviewapi.client.per_article", return_value=MOCK_VIEWS):
|
|
60
|
+
result = avg_last("en.wikipedia", "Python_(programming_language)")
|
|
61
|
+
assert result == pytest.approx(200.0)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_avg_last_passes_access_and_agent():
|
|
65
|
+
with patch("pageviewapi.client.per_article", return_value=MOCK_VIEWS) as mock_pa:
|
|
66
|
+
avg_last("en.wikipedia", "Python", access="desktop", agent="user")
|
|
67
|
+
_, kwargs = mock_pa.call_args
|
|
68
|
+
assert kwargs["access"] == "desktop"
|
|
69
|
+
assert kwargs["agent"] == "user"
|
pageviewapi-0.2.3/MANIFEST.in
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
include *.md
|
pageviewapi-0.2.3/PKG-INFO
DELETED
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 1.1
|
|
2
|
-
Name: pageviewapi
|
|
3
|
-
Version: 0.2.3
|
|
4
|
-
Summary: Wikimedia Pageview API client
|
|
5
|
-
Home-page: http://github.com/Commonists/pageviewapi
|
|
6
|
-
Author: Commonists
|
|
7
|
-
Author-email: ps.huard@gmail.com
|
|
8
|
-
License: MIT
|
|
9
|
-
Description: # pageview-api
|
|
10
|
-
[](https://travis-ci.org/Commonists/pageview-api)
|
|
11
|
-
[](https://landscape.io/github/Commonists/pageview-api/master)
|
|
12
|
-
[](http://opensource.org/licenses/MIT)
|
|
13
|
-
|
|
14
|
-
Wikimedia Pageview API client
|
|
15
|
-
|
|
16
|
-
Installation
|
|
17
|
-
------------
|
|
18
|
-
In order to install system wide on system using sudo you can use:
|
|
19
|
-
```sh
|
|
20
|
-
pip install git+https://github.com/Commonists/pageview-api.git
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
Examples
|
|
24
|
-
--------
|
|
25
|
-
|
|
26
|
-
Number of view on English Wikipedia of article Paris from November 6th to November 20th 2015
|
|
27
|
-
|
|
28
|
-
```python
|
|
29
|
-
import pageviewapi
|
|
30
|
-
pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120',
|
|
31
|
-
access='all-access', agent='all-agents', granularity='daily')
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
Aggregation: Get a daily pageview count timeseries of all projects for the month of October 2015
|
|
35
|
-
```python
|
|
36
|
-
import pageviewapi
|
|
37
|
-
pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100', access='all-access',
|
|
38
|
-
agent='all-agents', granularity='daily')
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
Most viewed articles on French Wikipedia on November 14th, 2015
|
|
42
|
-
```python
|
|
43
|
-
import pageviewapi
|
|
44
|
-
pageviewapi.top('fr.wikipedia', 2015, 11, 14, access='all-access')
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
Sum (resp. average) of view during last 30 days
|
|
48
|
-
```python
|
|
49
|
-
import pageviewapi.period
|
|
50
|
-
pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
|
|
51
|
-
access='all-access', agent='all-agents')
|
|
52
|
-
|
|
53
|
-
pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
Platform: UNKNOWN
|
|
57
|
-
Classifier: Development Status :: 4 - Beta
|
|
58
|
-
Classifier: Environment :: Console
|
|
59
|
-
Classifier: Intended Audience :: Developers
|
|
60
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
61
|
-
Classifier: Operating System :: OS Independent
|
|
62
|
-
Classifier: Programming Language :: Python
|
|
63
|
-
Classifier: Topic :: Utilities
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
"""Python client for wikimedia pageview api."""
|
|
2
|
-
|
|
3
|
-
from pageviewapi.client import per_article
|
|
4
|
-
from pageviewapi.client import top
|
|
5
|
-
from pageviewapi.client import aggregate
|
|
6
|
-
from pageviewapi.client import Access
|
|
7
|
-
from pageviewapi.client import Agent
|
|
8
|
-
|
|
9
|
-
from pageviewapi.client import __version__
|
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
"""Client to wikimedia pageview api.
|
|
2
|
-
|
|
3
|
-
API doc: https://wikitech.wikimedia.org/wiki/Analytics/AQS/Pageview_API
|
|
4
|
-
Supported endpoints:
|
|
5
|
-
- per-article
|
|
6
|
-
- top
|
|
7
|
-
- aggregate
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
from attrdict import AttrDict
|
|
11
|
-
import requests
|
|
12
|
-
|
|
13
|
-
__version__ = "0.2.3"
|
|
14
|
-
|
|
15
|
-
# User-agent
|
|
16
|
-
PROJECT_URL = "https://github.com/Commonists/pageview-api"
|
|
17
|
-
UA = "Python pageview-api client v{version} <{url}>"
|
|
18
|
-
USER_AGENT = {
|
|
19
|
-
'User-Agent': UA.format(url=PROJECT_URL, version=__version__)
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
API_BASE_URL = "https://wikimedia.org/api/rest_v1/metrics/pageviews"
|
|
23
|
-
# Per article
|
|
24
|
-
PA_ENDPOINT = "per-article"
|
|
25
|
-
PA_ARGS = "{project}/{access}/{agent}/{page}/{granularity}/{start}/{end}"
|
|
26
|
-
|
|
27
|
-
# Top
|
|
28
|
-
TOP_ENDPOINT = "top"
|
|
29
|
-
TOP_ARGS = "{project}/{access}/{year}/{month}/{day}"
|
|
30
|
-
|
|
31
|
-
# aggregate
|
|
32
|
-
AG_ENDPOINT = "aggregate"
|
|
33
|
-
AG_ARGS = "{project}/{access}/{agent}/{granularity}/{start}/{end}"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def per_article(project, page, start, end,
|
|
37
|
-
access='all-access', agent='all-agents', granularity='daily'):
|
|
38
|
-
"""Per article API.
|
|
39
|
-
|
|
40
|
-
>>> import pageviewapi
|
|
41
|
-
>>> pageview.per_article('en.wikipedia', 'Paris', '20151106', '20151120')
|
|
42
|
-
will requests views for Paris article between 2015-11-06 and 2015-11-20
|
|
43
|
-
"""
|
|
44
|
-
args = PA_ARGS.format(project=project,
|
|
45
|
-
page=page,
|
|
46
|
-
start=start,
|
|
47
|
-
end=end,
|
|
48
|
-
access=access,
|
|
49
|
-
agent=agent,
|
|
50
|
-
granularity=granularity)
|
|
51
|
-
return __api__(PA_ENDPOINT, args)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def top(project, year, month, day, access='all-access'):
|
|
55
|
-
"""Top 1000 most visited articles from project on a given date.
|
|
56
|
-
|
|
57
|
-
>>> import pageviewapi
|
|
58
|
-
>>> views = pageviewapi.top('fr.wikipedia', 2015, 11, 14)
|
|
59
|
-
>>> views['items'][0]['articles'][0]
|
|
60
|
-
{u'article': u'Wikip\xe9dia:Accueil_principal', u'rank': 1,
|
|
61
|
-
u'views': 1600547}
|
|
62
|
-
"""
|
|
63
|
-
args = TOP_ARGS.format(project=project,
|
|
64
|
-
access=access,
|
|
65
|
-
year=year,
|
|
66
|
-
month=month,
|
|
67
|
-
day=day)
|
|
68
|
-
return __api__(TOP_ENDPOINT, args)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def aggregate(project, start, end,
|
|
72
|
-
access='all-access', agent='all-agents', granularity='daily'):
|
|
73
|
-
"""Aggregate API.
|
|
74
|
-
|
|
75
|
-
>>> import pageviewapi
|
|
76
|
-
>>> pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100')
|
|
77
|
-
"""
|
|
78
|
-
args = AG_ARGS.format(project=project,
|
|
79
|
-
start=start,
|
|
80
|
-
end=end,
|
|
81
|
-
access=access,
|
|
82
|
-
agent=agent,
|
|
83
|
-
granularity=granularity)
|
|
84
|
-
return __api__(AG_ENDPOINT, args)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def __api__(end_point, args, api_url=API_BASE_URL):
|
|
88
|
-
"""Calling API."""
|
|
89
|
-
url = "/".join([api_url, end_point, args])
|
|
90
|
-
return AttrDict(requests.get(url, headers=USER_AGENT).json())
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
class APIValues(object):
|
|
94
|
-
|
|
95
|
-
@classmethod
|
|
96
|
-
def allvalues(cls):
|
|
97
|
-
"""All values in the enum class.
|
|
98
|
-
|
|
99
|
-
Returns:
|
|
100
|
-
All upper cased value from APIValues.
|
|
101
|
-
"""
|
|
102
|
-
return sorted([vars(cls)[variable] for variable in dir(cls)
|
|
103
|
-
if variable.isupper()])
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
class Access(APIValues):
|
|
107
|
-
"""Access values allows to filter by access.
|
|
108
|
-
|
|
109
|
-
If you want to filter by mobile use:
|
|
110
|
-
Mobile application: Access.MOBILE_APP
|
|
111
|
-
Mobile web: Access.MOBILE_WEB
|
|
112
|
-
Desktop: Access.DESKTOP
|
|
113
|
-
Default: Access.ALL_ACCESS
|
|
114
|
-
"""
|
|
115
|
-
ALL_ACCESS = "all-access"
|
|
116
|
-
DESKTOP = "desktop"
|
|
117
|
-
MOBILE_APP = "mobile-app"
|
|
118
|
-
MOBILE_WEB = "mobile-web"
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
class Agent(APIValues):
|
|
122
|
-
"""Agent values allows to filter by kind of user-agent.
|
|
123
|
-
|
|
124
|
-
All: Agent.ALL_AGENTS
|
|
125
|
-
User: Agent.USER
|
|
126
|
-
Bot: Agent.BOT
|
|
127
|
-
Spider: Agent.SPIDER
|
|
128
|
-
"""
|
|
129
|
-
ALL_AGENTS = "all-agents"
|
|
130
|
-
USER = "user"
|
|
131
|
-
BOT = "bot"
|
|
132
|
-
SPIDER = "spider"
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
"""Helper functions on period."""
|
|
2
|
-
import datetime
|
|
3
|
-
import pageviewapi.client
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def sum_last(project, page, last=30, agent='all-agents', access='all-access'):
|
|
7
|
-
"""Page views during last days."""
|
|
8
|
-
views = pageviewapi.client.per_article(project, page,
|
|
9
|
-
__days_ago__(last),
|
|
10
|
-
__today__(),
|
|
11
|
-
access=access, agent=agent)
|
|
12
|
-
return sum([daily['views'] for daily in views['items']])
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def avg_last(project, page, last=30, agent='all-agents', access='all-access'):
|
|
16
|
-
"""Page views during last days."""
|
|
17
|
-
views = pageviewapi.client.per_article(project, page,
|
|
18
|
-
__days_ago__(last),
|
|
19
|
-
__today__(),
|
|
20
|
-
access=access, agent=agent)
|
|
21
|
-
return __avg__([daily['views'] for daily in views['items']])
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def __today__():
|
|
25
|
-
"""Date of the day as YYYYmmdd format."""
|
|
26
|
-
return datetime.date.today().strftime('%Y%m%d')
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def __days_ago__(days):
|
|
30
|
-
"""Days ago as YYYYmmdd format."""
|
|
31
|
-
today = datetime.date.today()
|
|
32
|
-
delta = datetime.timedelta(days=days)
|
|
33
|
-
ago = today - delta
|
|
34
|
-
return ago.strftime('%Y%m%d')
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def __avg__(numericlist):
|
|
38
|
-
"""Basic average function."""
|
|
39
|
-
return sum(numericlist) / float(len(numericlist))
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 1.1
|
|
2
|
-
Name: pageviewapi
|
|
3
|
-
Version: 0.2.3
|
|
4
|
-
Summary: Wikimedia Pageview API client
|
|
5
|
-
Home-page: http://github.com/Commonists/pageviewapi
|
|
6
|
-
Author: Commonists
|
|
7
|
-
Author-email: ps.huard@gmail.com
|
|
8
|
-
License: MIT
|
|
9
|
-
Description: # pageview-api
|
|
10
|
-
[](https://travis-ci.org/Commonists/pageview-api)
|
|
11
|
-
[](https://landscape.io/github/Commonists/pageview-api/master)
|
|
12
|
-
[](http://opensource.org/licenses/MIT)
|
|
13
|
-
|
|
14
|
-
Wikimedia Pageview API client
|
|
15
|
-
|
|
16
|
-
Installation
|
|
17
|
-
------------
|
|
18
|
-
In order to install system wide on system using sudo you can use:
|
|
19
|
-
```sh
|
|
20
|
-
pip install git+https://github.com/Commonists/pageview-api.git
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
Examples
|
|
24
|
-
--------
|
|
25
|
-
|
|
26
|
-
Number of view on English Wikipedia of article Paris from November 6th to November 20th 2015
|
|
27
|
-
|
|
28
|
-
```python
|
|
29
|
-
import pageviewapi
|
|
30
|
-
pageviewapi.per_article('en.wikipedia', 'Paris', '20151106', '20151120',
|
|
31
|
-
access='all-access', agent='all-agents', granularity='daily')
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
Aggregation: Get a daily pageview count timeseries of all projects for the month of October 2015
|
|
35
|
-
```python
|
|
36
|
-
import pageviewapi
|
|
37
|
-
pageviewapi.aggregate('fr.wikipedia', '2015100100', '2015103100', access='all-access',
|
|
38
|
-
agent='all-agents', granularity='daily')
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
Most viewed articles on French Wikipedia on November 14th, 2015
|
|
42
|
-
```python
|
|
43
|
-
import pageviewapi
|
|
44
|
-
pageviewapi.top('fr.wikipedia', 2015, 11, 14, access='all-access')
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
Sum (resp. average) of view during last 30 days
|
|
48
|
-
```python
|
|
49
|
-
import pageviewapi.period
|
|
50
|
-
pageviewapi.period.sum_last('fr.wikipedia', 'Paris', last=30,
|
|
51
|
-
access='all-access', agent='all-agents')
|
|
52
|
-
|
|
53
|
-
pageviewapi.period.avg_last('fr.wikipedia', 'Paris', last=30)
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
Platform: UNKNOWN
|
|
57
|
-
Classifier: Development Status :: 4 - Beta
|
|
58
|
-
Classifier: Environment :: Console
|
|
59
|
-
Classifier: Intended Audience :: Developers
|
|
60
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
61
|
-
Classifier: Operating System :: OS Independent
|
|
62
|
-
Classifier: Programming Language :: Python
|
|
63
|
-
Classifier: Topic :: Utilities
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
MANIFEST.in
|
|
2
|
-
README.md
|
|
3
|
-
setup.py
|
|
4
|
-
pageviewapi/__init__.py
|
|
5
|
-
pageviewapi/client.py
|
|
6
|
-
pageviewapi/period.py
|
|
7
|
-
pageviewapi.egg-info/PKG-INFO
|
|
8
|
-
pageviewapi.egg-info/SOURCES.txt
|
|
9
|
-
pageviewapi.egg-info/dependency_links.txt
|
|
10
|
-
pageviewapi.egg-info/requires.txt
|
|
11
|
-
pageviewapi.egg-info/top_level.txt
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
pageviewapi
|
pageviewapi-0.2.3/setup.cfg
DELETED
pageviewapi-0.2.3/setup.py
DELETED
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/python
|
|
2
|
-
# -*- coding: latin-1 -*-
|
|
3
|
-
|
|
4
|
-
"""Setup script."""
|
|
5
|
-
|
|
6
|
-
try:
|
|
7
|
-
from setuptools import setup
|
|
8
|
-
except ImportError:
|
|
9
|
-
from distutils.core import setup
|
|
10
|
-
|
|
11
|
-
try:
|
|
12
|
-
import pageviewapi
|
|
13
|
-
version = pageviewapi.__version__
|
|
14
|
-
except ImportError:
|
|
15
|
-
version = 'Undefined'
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
classifiers = [
|
|
19
|
-
'Development Status :: 4 - Beta',
|
|
20
|
-
'Environment :: Console',
|
|
21
|
-
'Intended Audience :: Developers',
|
|
22
|
-
'License :: OSI Approved :: MIT License',
|
|
23
|
-
'Operating System :: OS Independent',
|
|
24
|
-
'Programming Language :: Python',
|
|
25
|
-
'Topic :: Utilities'
|
|
26
|
-
]
|
|
27
|
-
packages = ['pageviewapi']
|
|
28
|
-
requires = ['requests', 'attrdict']
|
|
29
|
-
|
|
30
|
-
setup(
|
|
31
|
-
name='pageviewapi',
|
|
32
|
-
version=version,
|
|
33
|
-
author='Commonists',
|
|
34
|
-
author_email='ps.huard@gmail.com',
|
|
35
|
-
url='http://github.com/Commonists/pageviewapi',
|
|
36
|
-
description='Wikimedia Pageview API client',
|
|
37
|
-
long_description=open('README.md').read(),
|
|
38
|
-
license='MIT',
|
|
39
|
-
packages=packages,
|
|
40
|
-
install_requires=requires,
|
|
41
|
-
classifiers=classifiers
|
|
42
|
-
)
|