estatjp 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- estatjp-0.1.1/.gitignore +250 -0
- estatjp-0.1.1/LICENSE +21 -0
- estatjp-0.1.1/PKG-INFO +242 -0
- estatjp-0.1.1/README.md +193 -0
- estatjp-0.1.1/pyproject.toml +213 -0
- estatjp-0.1.1/src/estatjp/__init__.py +30 -0
- estatjp-0.1.1/src/estatjp/api.py +94 -0
estatjp-0.1.1/.gitignore
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
## my additions
|
|
2
|
+
# .mynotes
|
|
3
|
+
|
|
4
|
+
### Linux template
|
|
5
|
+
*~
|
|
6
|
+
|
|
7
|
+
# temporary files which can be created if a process still has a handle open of a deleted file
|
|
8
|
+
.fuse_hidden*
|
|
9
|
+
|
|
10
|
+
# KDE directory preferences
|
|
11
|
+
.directory
|
|
12
|
+
|
|
13
|
+
# Linux trash folder which might appear on any partition or disk
|
|
14
|
+
.Trash-*
|
|
15
|
+
|
|
16
|
+
# .nfs files are created when an open file is removed but is still being accessed
|
|
17
|
+
.nfs*
|
|
18
|
+
|
|
19
|
+
### Windows template
|
|
20
|
+
# Windows thumbnail cache files
|
|
21
|
+
Thumbs.db
|
|
22
|
+
Thumbs.db:encryptable
|
|
23
|
+
ehthumbs.db
|
|
24
|
+
ehthumbs_vista.db
|
|
25
|
+
|
|
26
|
+
# Dump file
|
|
27
|
+
*.stackdump
|
|
28
|
+
|
|
29
|
+
# Folder config file
|
|
30
|
+
[Dd]esktop.ini
|
|
31
|
+
|
|
32
|
+
# Recycle Bin used on file shares
|
|
33
|
+
$RECYCLE.BIN/
|
|
34
|
+
|
|
35
|
+
# Windows Installer files
|
|
36
|
+
*.cab
|
|
37
|
+
*.msi
|
|
38
|
+
*.msix
|
|
39
|
+
*.msm
|
|
40
|
+
*.msp
|
|
41
|
+
|
|
42
|
+
# Windows shortcuts
|
|
43
|
+
*.lnk
|
|
44
|
+
|
|
45
|
+
### JupyterNotebooks template
|
|
46
|
+
# gitignore template for Jupyter Notebooks
|
|
47
|
+
# website: http://jupyter.org/
|
|
48
|
+
|
|
49
|
+
.ipynb_checkpoints
|
|
50
|
+
*/.ipynb_checkpoints/*
|
|
51
|
+
|
|
52
|
+
# IPython
|
|
53
|
+
profile_default/
|
|
54
|
+
ipython_config.py
|
|
55
|
+
|
|
56
|
+
# Remove previous ipynb_checkpoints
|
|
57
|
+
# git rm -r .ipynb_checkpoints/
|
|
58
|
+
|
|
59
|
+
### macOS template
|
|
60
|
+
# General
|
|
61
|
+
.DS_Store
|
|
62
|
+
.AppleDouble
|
|
63
|
+
.LSOverride
|
|
64
|
+
|
|
65
|
+
# Icon must end with two \r
|
|
66
|
+
Icon
|
|
67
|
+
|
|
68
|
+
# Thumbnails
|
|
69
|
+
._*
|
|
70
|
+
|
|
71
|
+
# Files that might appear in the root of a volume
|
|
72
|
+
.DocumentRevisions-V100
|
|
73
|
+
.fseventsd
|
|
74
|
+
.Spotlight-V100
|
|
75
|
+
.TemporaryItems
|
|
76
|
+
.Trashes
|
|
77
|
+
.VolumeIcon.icns
|
|
78
|
+
.com.apple.timemachine.donotpresent
|
|
79
|
+
|
|
80
|
+
# Directories potentially created on remote AFP share
|
|
81
|
+
.AppleDB
|
|
82
|
+
.AppleDesktop
|
|
83
|
+
Network Trash Folder
|
|
84
|
+
Temporary Items
|
|
85
|
+
.apdisk
|
|
86
|
+
|
|
87
|
+
### Python template
|
|
88
|
+
# Byte-compiled / optimized / DLL files
|
|
89
|
+
__pycache__/
|
|
90
|
+
*.py[cod]
|
|
91
|
+
*$py.class
|
|
92
|
+
|
|
93
|
+
# C extensions
|
|
94
|
+
*.so
|
|
95
|
+
|
|
96
|
+
# Distribution / packaging
|
|
97
|
+
.pypirc
|
|
98
|
+
.Python
|
|
99
|
+
build/
|
|
100
|
+
develop-eggs/
|
|
101
|
+
dist/
|
|
102
|
+
downloads/
|
|
103
|
+
eggs/
|
|
104
|
+
.eggs/
|
|
105
|
+
lib/
|
|
106
|
+
lib64/
|
|
107
|
+
parts/
|
|
108
|
+
sdist/
|
|
109
|
+
var/
|
|
110
|
+
wheels/
|
|
111
|
+
share/python-wheels/
|
|
112
|
+
*.egg-info/
|
|
113
|
+
.installed.cfg
|
|
114
|
+
*.egg
|
|
115
|
+
MANIFEST
|
|
116
|
+
.mynotes/
|
|
117
|
+
|
|
118
|
+
# PyInstaller
|
|
119
|
+
# Usually these files are written by a python script from a template
|
|
120
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
121
|
+
*.manifest
|
|
122
|
+
*.spec
|
|
123
|
+
|
|
124
|
+
# Installer logs
|
|
125
|
+
pip-log.txt
|
|
126
|
+
pip-delete-this-directory.txt
|
|
127
|
+
|
|
128
|
+
# Unit test / coverage reports
|
|
129
|
+
htmlcov/
|
|
130
|
+
.tox/
|
|
131
|
+
.nox/
|
|
132
|
+
.coverage
|
|
133
|
+
.coverage.*
|
|
134
|
+
.cache
|
|
135
|
+
nosetests.xml
|
|
136
|
+
coverage.xml
|
|
137
|
+
*.cover
|
|
138
|
+
*.py,cover
|
|
139
|
+
.hypothesis/
|
|
140
|
+
.pytest_cache/
|
|
141
|
+
cover/
|
|
142
|
+
|
|
143
|
+
# Translations
|
|
144
|
+
*.mo
|
|
145
|
+
*.pot
|
|
146
|
+
|
|
147
|
+
# Django stuff:
|
|
148
|
+
*.log
|
|
149
|
+
local_settings.py
|
|
150
|
+
db.sqlite3
|
|
151
|
+
db.sqlite3-journal
|
|
152
|
+
|
|
153
|
+
# Flask stuff:
|
|
154
|
+
instance/
|
|
155
|
+
.webassets-cache
|
|
156
|
+
|
|
157
|
+
# Scrapy stuff:
|
|
158
|
+
.scrapy
|
|
159
|
+
|
|
160
|
+
# Sphinx documentation
|
|
161
|
+
docs/_build/
|
|
162
|
+
docs/api
|
|
163
|
+
|
|
164
|
+
# PyBuilder
|
|
165
|
+
.pybuilder/
|
|
166
|
+
target/
|
|
167
|
+
|
|
168
|
+
# Jupyter Notebook
|
|
169
|
+
|
|
170
|
+
# IPython
|
|
171
|
+
|
|
172
|
+
# pyenv
|
|
173
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
174
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
175
|
+
# .python-version
|
|
176
|
+
|
|
177
|
+
# pipenv
|
|
178
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
179
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
180
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
181
|
+
# install all needed dependencies.
|
|
182
|
+
#Pipfile.lock
|
|
183
|
+
|
|
184
|
+
# poetry
|
|
185
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
186
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
187
|
+
# commonly ignored for libraries.
|
|
188
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
189
|
+
#poetry.lock
|
|
190
|
+
|
|
191
|
+
# pdm
|
|
192
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
193
|
+
#pdm.lock
|
|
194
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
195
|
+
# in version control.
|
|
196
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
197
|
+
.pdm.toml
|
|
198
|
+
|
|
199
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
200
|
+
__pypackages__/
|
|
201
|
+
|
|
202
|
+
# Celery stuff
|
|
203
|
+
celerybeat-schedule
|
|
204
|
+
celerybeat.pid
|
|
205
|
+
|
|
206
|
+
# SageMath parsed files
|
|
207
|
+
*.sage.py
|
|
208
|
+
|
|
209
|
+
# Environments
|
|
210
|
+
.env
|
|
211
|
+
.venv
|
|
212
|
+
env/
|
|
213
|
+
venv/
|
|
214
|
+
ENV/
|
|
215
|
+
env.bak/
|
|
216
|
+
venv.bak/
|
|
217
|
+
|
|
218
|
+
# Spyder project settings
|
|
219
|
+
.spyderproject
|
|
220
|
+
.spyproject
|
|
221
|
+
|
|
222
|
+
# Rope project settings
|
|
223
|
+
.ropeproject
|
|
224
|
+
|
|
225
|
+
# mkdocs documentation
|
|
226
|
+
/site
|
|
227
|
+
|
|
228
|
+
# mypy
|
|
229
|
+
.mypy_cache/
|
|
230
|
+
.dmypy.json
|
|
231
|
+
dmypy.json
|
|
232
|
+
|
|
233
|
+
# Pyre type checker
|
|
234
|
+
.pyre/
|
|
235
|
+
|
|
236
|
+
# pytype static type analyzer
|
|
237
|
+
.pytype/
|
|
238
|
+
|
|
239
|
+
# Cython debug symbols
|
|
240
|
+
cython_debug/
|
|
241
|
+
|
|
242
|
+
# PyCharm
|
|
243
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
244
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
245
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
246
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
247
|
+
#.idea/
|
|
248
|
+
|
|
249
|
+
# Hatch-VCS
|
|
250
|
+
_version.py
|
estatjp-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alan Engel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
7
|
+
the Software without restriction, including without limitation the rights to
|
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
|
10
|
+
subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice (including the next
|
|
13
|
+
paragraph) shall be included in all copies or substantial portions of the
|
|
14
|
+
Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
18
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
19
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
20
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
21
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
estatjp-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: estatjp
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: A Python package for accessing Japanese government data on its e-Stat portal
|
|
5
|
+
Project-URL: Homepage, https://github.com/kijinosu/estatjp
|
|
6
|
+
Project-URL: Source Code, https://github.com/kijinosu/estatjp
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/kijinosu/estatjp/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/kijinosu/estatjp/blob/main/README.md
|
|
9
|
+
Project-URL: Download, https://pypi.org/project/estatjp/#files
|
|
10
|
+
Author-email: Alan Engel <kijinosu@proton.me>
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Requires-Python: >=3.14
|
|
19
|
+
Requires-Dist: docutils!=0.18.*,!=0.19.*,>=0.8
|
|
20
|
+
Requires-Dist: importlib-metadata>=3.6; python_version < '3.10'
|
|
21
|
+
Requires-Dist: pandas>=3.0.1
|
|
22
|
+
Requires-Dist: pybtex-docutils>=1.0.2
|
|
23
|
+
Requires-Dist: pybtex>=0.25
|
|
24
|
+
Requires-Dist: sphinx>=3.5
|
|
25
|
+
Provides-Extra: build
|
|
26
|
+
Requires-Dist: pip-audit; extra == 'build'
|
|
27
|
+
Requires-Dist: twine; extra == 'build'
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: hatch; extra == 'dev'
|
|
30
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
31
|
+
Provides-Extra: docs
|
|
32
|
+
Requires-Dist: myst-parser>=4.0; extra == 'docs'
|
|
33
|
+
Requires-Dist: nbsphinx; extra == 'docs'
|
|
34
|
+
Requires-Dist: pydata-sphinx-theme~=0.16; extra == 'docs'
|
|
35
|
+
Requires-Dist: sphinx-autoapi>=3.6.0; extra == 'docs'
|
|
36
|
+
Requires-Dist: sphinx-autobuild>=2024.10.3; extra == 'docs'
|
|
37
|
+
Requires-Dist: sphinx-copybutton>=0.5.2; extra == 'docs'
|
|
38
|
+
Requires-Dist: sphinx-design>=0.6.1; extra == 'docs'
|
|
39
|
+
Requires-Dist: sphinxcontrib-bibtex>=2.6.5; extra == 'docs'
|
|
40
|
+
Requires-Dist: sphinx~=8.0; extra == 'docs'
|
|
41
|
+
Provides-Extra: tests
|
|
42
|
+
Requires-Dist: pytest; extra == 'tests'
|
|
43
|
+
Requires-Dist: pytest-cov; extra == 'tests'
|
|
44
|
+
Requires-Dist: pytest-raises; extra == 'tests'
|
|
45
|
+
Requires-Dist: pytest-randomly; extra == 'tests'
|
|
46
|
+
Requires-Dist: pytest-xdist; extra == 'tests'
|
|
47
|
+
Requires-Dist: sphinx-autoapi>=3.6.0; extra == 'tests'
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
|
|
50
|
+
# estatjp
|
|
51
|
+
|
|
52
|
+
[E-Stat](https://www.e-stat.go.jp/en) is a widely used portal site for
|
|
53
|
+
accessing Japanese governmental statistical data. Began operation in
|
|
54
|
+
2008. e-Stat currently hosts [744 surveys (1,688,550 datasets) in
|
|
55
|
+
Japanese](https://www.e-stat.go.jp/stat-search?page=1) from about 30
|
|
56
|
+
governmental agencies with [56 surveys (292,856 datasets) available in
|
|
57
|
+
English](https://www.e-stat.go.jp/en/stat-search?page=1). These
|
|
58
|
+
collections contain 'databases' and files (mainly Excel files). The
|
|
59
|
+
'databases' can be accessed via an API. API urls can cover entire
|
|
60
|
+
databases or subsets that can be tailored to users' individual needs.
|
|
61
|
+
|
|
62
|
+
The objective of the estatjp Python package is to provide access to the
|
|
63
|
+
e-Stat portal and return datasets in pandas.DataFrame format.
|
|
64
|
+
|
|
65
|
+
For example, the e-Stat API returns CSV streams that contain headers
|
|
66
|
+
with metadata. These headers interfere with pandas.get_csv. The first
|
|
67
|
+
release of estatjp returns a dictionary that contains the header and
|
|
68
|
+
main table as separate dataframes.
|
|
69
|
+
|
|
70
|
+
## Requirement
|
|
71
|
+
|
|
72
|
+
The e-Stat API requires an application ID that can be obtained from the
|
|
73
|
+
[E-Stat API](https://www.e-stat.go.jp/api/en) page. Install this ID into
|
|
74
|
+
your project by setting your terminal to your project root and running
|
|
75
|
+
the following commands:
|
|
76
|
+
|
|
77
|
+
pip install python-dotenv
|
|
78
|
+
dotenv set ESTAT_APP_ID your-app-id
|
|
79
|
+
|
|
80
|
+
## Install this package
|
|
81
|
+
|
|
82
|
+
pip install estatjp
|
|
83
|
+
|
|
84
|
+
## Example
|
|
85
|
+
|
|
86
|
+
This example downloads an English dataset, the [Labour Force Survey
|
|
87
|
+
Basic Tabulation Whole Japan Monthly table Population of 15 years old
|
|
88
|
+
and over by labour force
|
|
89
|
+
status](https://www.e-stat.go.jp/en/dbview?sid=0003005798). The API url
|
|
90
|
+
for that table is assigned to enurl below.
|
|
91
|
+
|
|
92
|
+
``` python
|
|
93
|
+
import pandas
|
|
94
|
+
from dotenv import load_dotenv
|
|
95
|
+
from estatjp import api
|
|
96
|
+
enurl = 'http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData?appId=&lang=E&statsDataId=0003005798&metaGetFlg=Y&cntGetFlg=N&explanationGetFlg=Y&annotationGetFlg=Y§ionHeaderFlg=1&replaceSpChars=0'
|
|
97
|
+
dfs = api.get_csv_data(enurl)
|
|
98
|
+
print(dfs.get('Header'))
|
|
99
|
+
print(dfs.get('Main'))
|
|
100
|
+
print(dfs.get('Description'))
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
# References
|
|
104
|
+
|
|
105
|
+
<div id="refs" class="container references csl-bib-body hanging-indent">
|
|
106
|
+
|
|
107
|
+
<div id="ref-ashizawa2022estat.en" class="container csl-entry">
|
|
108
|
+
|
|
109
|
+
Ashikawa, Souta, Matsuda, Junichi, & Osone, Tadashi. (2022). Method for
|
|
110
|
+
improving the recall in e-stat data search. *Proceedings of Annual
|
|
111
|
+
Conference of the Information Systems Society in Japan ISSJ2022*, S1–C1.
|
|
112
|
+
<https://doi.org/10.19014/proceedingsissj.18.0_S1-C1>
|
|
113
|
+
|
|
114
|
+
</div>
|
|
115
|
+
|
|
116
|
+
<div id="ref-ashizawa2023estat.en" class="container csl-entry">
|
|
117
|
+
|
|
118
|
+
Ashikawa, Souta, Matsuda, Junichi, & Osone, Tadashi. (2023). Development
|
|
119
|
+
of front-end search system improving recall in e-stat. *Proceedings of
|
|
120
|
+
Annual Conference of the Information Systems Society in Japan ISSJ2023*,
|
|
121
|
+
1–6. <https://doi.org/10.19014/proceedingsissj.19.0_P001>
|
|
122
|
+
|
|
123
|
+
</div>
|
|
124
|
+
|
|
125
|
+
<div id="ref-cocosan2023python.en" class="container csl-entry">
|
|
126
|
+
|
|
127
|
+
cocosan. (2023). *Python apuri: Seifu tokei e-stat wo shigoto ni ikase!*
|
|
128
|
+
<https://www.youtube.com/watch?v=hiaK-jTXpCI>.
|
|
129
|
+
|
|
130
|
+
</div>
|
|
131
|
+
|
|
132
|
+
<div id="ref-higashi2024incidence" class="container csl-entry">
|
|
133
|
+
|
|
134
|
+
Higashi, Takahiro, & Kurokawa, Yukinori. (2024). Incidence, mortality,
|
|
135
|
+
survival, and treatment statistics of cancers in digestive
|
|
136
|
+
organs—japanese cancer statistics 2024. *Annals of Gastroenterological
|
|
137
|
+
Surgery*, *8*(6), 958–965. <https://doi.org/10.1002/ags3.12835>
|
|
138
|
+
|
|
139
|
+
</div>
|
|
140
|
+
|
|
141
|
+
<div id="ref-inoue2023self" class="container csl-entry">
|
|
142
|
+
|
|
143
|
+
Inoue, Takao. (2023). A self-made tutorial for GitHub flavored markdown
|
|
144
|
+
(GFM), and its source codes. *ResearchGate*.
|
|
145
|
+
<https://www.researchgate.net/publication/370937551_A_self-made_tutorial_for_GitHub_Flavored_Markdown_GFM_and_its_source_codes>
|
|
146
|
+
|
|
147
|
+
</div>
|
|
148
|
+
|
|
149
|
+
<div id="ref-kato2021residential" class="container csl-entry">
|
|
150
|
+
|
|
151
|
+
Kato, Haruka, & Takizawa, Atsushi. (2021). Which residential clusters of
|
|
152
|
+
walkability affect future population from the perspective of real estate
|
|
153
|
+
prices in the osaka metropolitan area? *Sustainability*, *13*(23),
|
|
154
|
+
13413. <https://doi.org/10.3390/su132313413>
|
|
155
|
+
|
|
156
|
+
</div>
|
|
157
|
+
|
|
158
|
+
<div id="ref-masui2021r.en" class="container csl-entry">
|
|
159
|
+
|
|
160
|
+
Masui, Toshikatsu. (2021). *R to python de manabu tokeigaku nyumon*.
|
|
161
|
+
Ohmsha.
|
|
162
|
+
|
|
163
|
+
</div>
|
|
164
|
+
|
|
165
|
+
<div id="ref-estat2016adaptor" class="container csl-entry">
|
|
166
|
+
|
|
167
|
+
National Statistics Center, Japan. (2016). *Chukan apuri*.
|
|
168
|
+
<https://github.com/e-stat-api/adaptor>.
|
|
169
|
+
|
|
170
|
+
</div>
|
|
171
|
+
|
|
172
|
+
<div id="ref-nishimura2017linked.en" class="container csl-entry">
|
|
173
|
+
|
|
174
|
+
Nishimura, Shoki. (2017). Providing statistical data by linked open data
|
|
175
|
+
(LOD): Innovative official statistical data (e-stat) dissemination.
|
|
176
|
+
*Joho Kanri*, *59*(12), 812–821.
|
|
177
|
+
<https://doi.org/10.1241/johokanri.59.812>
|
|
178
|
+
|
|
179
|
+
</div>
|
|
180
|
+
|
|
181
|
+
<div id="ref-seki2023social" class="container csl-entry">
|
|
182
|
+
|
|
183
|
+
Seki, Katsunori. (2023). Social identification and redistribution
|
|
184
|
+
preference: A survey experiment in japan. *Social Science Japan
|
|
185
|
+
Journal*, *26*(1), 47–60. <https://doi.org/10.1093/ssjj/jyac029>
|
|
186
|
+
|
|
187
|
+
</div>
|
|
188
|
+
|
|
189
|
+
<div id="ref-takahashi2022estat.en" class="container csl-entry">
|
|
190
|
+
|
|
191
|
+
Takahashi, Shūichiro. (2022). *E-stat to nakayokusuru hon: Python to
|
|
192
|
+
ōpun deta de nihon wo bunseki suru! API keiyu de seifu tōkei wo shutoku!
|
|
193
|
+
katsuyo!* Impress R&D.
|
|
194
|
+
|
|
195
|
+
</div>
|
|
196
|
+
|
|
197
|
+
<div id="ref-wakabayashi2015public.en" class="container csl-entry">
|
|
198
|
+
|
|
199
|
+
Wakabayashi, Chihiro, Shinmura, Hiromi, Ando, Miri, Shimada, Masako, &
|
|
200
|
+
Yanagawa, Hiroshi. (2015). Kōeisei topikksu dai 13 kai seifutōkei no
|
|
201
|
+
sōgōmadoguchi e-stat: Chiiki shindan he no katsuyō - jissen herusu
|
|
202
|
+
puromōshon. *Gekkan Chiiki Igaku*, *29*(2), 52.
|
|
203
|
+
<https://doi.org/10.60261/chiikiigaku.29.2_52>
|
|
204
|
+
|
|
205
|
+
</div>
|
|
206
|
+
|
|
207
|
+
<div id="ref-ashizawa2022estat.ja" class="container csl-entry">
|
|
208
|
+
|
|
209
|
+
芦澤颯太, 松田純一, & 大曽根匡. (2022). E-stat
|
|
210
|
+
での統計データ検索におけるいくつかの課題抽出とその解決方法の提案.
|
|
211
|
+
*情報システム学会 全国大会論文集 ISSJ2022*, S1–C1.
|
|
212
|
+
<https://doi.org/10.19014/proceedingsissj.18.0_S1-C1>
|
|
213
|
+
|
|
214
|
+
</div>
|
|
215
|
+
|
|
216
|
+
<div id="ref-ashizawa2023estat.ja" class="container csl-entry">
|
|
217
|
+
|
|
218
|
+
芦澤颯太, 松田純一, & 大曽根匡. (2023). E-stat
|
|
219
|
+
における検索漏れを抑止する情報システムの開発とその検証.
|
|
220
|
+
*情報システム学会 全国大会論文集 情報システム学会*, 1–6.
|
|
221
|
+
<https://doi.org/10.19014/proceedingsissj.19.0_P001>
|
|
222
|
+
|
|
223
|
+
</div>
|
|
224
|
+
|
|
225
|
+
<div id="ref-wakabayashi2015public.ja" class="container csl-entry">
|
|
226
|
+
|
|
227
|
+
若林チヒロ, 新村洋未, 安藤実里, 嶋田雅子, & 柳川洋. (2015).
|
|
228
|
+
公衆衛生トピックス 第 13 回 政府統計の総合窓口
|
|
229
|
+
e-stat-地域診断への活用-実践ヘルスプロモーション. *月刊地域医学*,
|
|
230
|
+
*29*(2), 52. <https://doi.org/10.60261/chiikiigaku.29.2_52>
|
|
231
|
+
|
|
232
|
+
</div>
|
|
233
|
+
|
|
234
|
+
<div id="ref-nishimura2017linked.ja" class="container csl-entry">
|
|
235
|
+
|
|
236
|
+
西村正貴. (2017). Linked open data (LOD) による統計データの提供:
|
|
237
|
+
政府統計データ (e-stat) の新しい形. *情報管理*, *59*(12), 812–821.
|
|
238
|
+
<https://doi.org/10.1241/johokanri.59.812>
|
|
239
|
+
|
|
240
|
+
</div>
|
|
241
|
+
|
|
242
|
+
</div>
|
estatjp-0.1.1/README.md
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# estatjp
|
|
2
|
+
|
|
3
|
+
[E-Stat](https://www.e-stat.go.jp/en) is a widely used portal site for
|
|
4
|
+
accessing Japanese governmental statistical data. Began operation in
|
|
5
|
+
2008. e-Stat currently hosts [744 surveys (1,688,550 datasets) in
|
|
6
|
+
Japanese](https://www.e-stat.go.jp/stat-search?page=1) from about 30
|
|
7
|
+
governmental agencies with [56 surveys (292,856 datasets) available in
|
|
8
|
+
English](https://www.e-stat.go.jp/en/stat-search?page=1). These
|
|
9
|
+
collections contain 'databases' and files (mainly Excel files). The
|
|
10
|
+
'databases' can be accessed via an API. API urls can cover entire
|
|
11
|
+
databases or subsets that can be tailored to users' individual needs.
|
|
12
|
+
|
|
13
|
+
The objective of the estatjp Python package is to provide access to the
|
|
14
|
+
e-Stat portal and return datasets in pandas.DataFrame format.
|
|
15
|
+
|
|
16
|
+
For example, the e-Stat API returns CSV streams that contain headers
|
|
17
|
+
with metadata. These headers interfere with pandas.get_csv. The first
|
|
18
|
+
release of estatjp returns a dictionary that contains the header and
|
|
19
|
+
main table as separate dataframes.
|
|
20
|
+
|
|
21
|
+
## Requirement
|
|
22
|
+
|
|
23
|
+
The e-Stat API requires an application ID that can be obtained from the
|
|
24
|
+
[E-Stat API](https://www.e-stat.go.jp/api/en) page. Install this ID into
|
|
25
|
+
your project by setting your terminal to your project root and running
|
|
26
|
+
the following commands:
|
|
27
|
+
|
|
28
|
+
pip install python-dotenv
|
|
29
|
+
dotenv set ESTAT_APP_ID your-app-id
|
|
30
|
+
|
|
31
|
+
## Install this package
|
|
32
|
+
|
|
33
|
+
pip install estatjp
|
|
34
|
+
|
|
35
|
+
## Example
|
|
36
|
+
|
|
37
|
+
This example downloads an English dataset, the [Labour Force Survey
|
|
38
|
+
Basic Tabulation Whole Japan Monthly table Population of 15 years old
|
|
39
|
+
and over by labour force
|
|
40
|
+
status](https://www.e-stat.go.jp/en/dbview?sid=0003005798). The API url
|
|
41
|
+
for that table is assigned to enurl below.
|
|
42
|
+
|
|
43
|
+
``` python
|
|
44
|
+
import pandas
|
|
45
|
+
from dotenv import load_dotenv
|
|
46
|
+
from estatjp import api
|
|
47
|
+
enurl = 'http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData?appId=&lang=E&statsDataId=0003005798&metaGetFlg=Y&cntGetFlg=N&explanationGetFlg=Y&annotationGetFlg=Y§ionHeaderFlg=1&replaceSpChars=0'
|
|
48
|
+
dfs = api.get_csv_data(enurl)
|
|
49
|
+
print(dfs.get('Header'))
|
|
50
|
+
print(dfs.get('Main'))
|
|
51
|
+
print(dfs.get('Description'))
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
# References
|
|
55
|
+
|
|
56
|
+
<div id="refs" class="container references csl-bib-body hanging-indent">
|
|
57
|
+
|
|
58
|
+
<div id="ref-ashizawa2022estat.en" class="container csl-entry">
|
|
59
|
+
|
|
60
|
+
Ashikawa, Souta, Matsuda, Junichi, & Osone, Tadashi. (2022). Method for
|
|
61
|
+
improving the recall in e-stat data search. *Proceedings of Annual
|
|
62
|
+
Conference of the Information Systems Society in Japan ISSJ2022*, S1–C1.
|
|
63
|
+
<https://doi.org/10.19014/proceedingsissj.18.0_S1-C1>
|
|
64
|
+
|
|
65
|
+
</div>
|
|
66
|
+
|
|
67
|
+
<div id="ref-ashizawa2023estat.en" class="container csl-entry">
|
|
68
|
+
|
|
69
|
+
Ashikawa, Souta, Matsuda, Junichi, & Osone, Tadashi. (2023). Development
|
|
70
|
+
of front-end search system improving recall in e-stat. *Proceedings of
|
|
71
|
+
Annual Conference of the Information Systems Society in Japan ISSJ2023*,
|
|
72
|
+
1–6. <https://doi.org/10.19014/proceedingsissj.19.0_P001>
|
|
73
|
+
|
|
74
|
+
</div>
|
|
75
|
+
|
|
76
|
+
<div id="ref-cocosan2023python.en" class="container csl-entry">
|
|
77
|
+
|
|
78
|
+
cocosan. (2023). *Python apuri: Seifu tokei e-stat wo shigoto ni ikase!*
|
|
79
|
+
<https://www.youtube.com/watch?v=hiaK-jTXpCI>.
|
|
80
|
+
|
|
81
|
+
</div>
|
|
82
|
+
|
|
83
|
+
<div id="ref-higashi2024incidence" class="container csl-entry">
|
|
84
|
+
|
|
85
|
+
Higashi, Takahiro, & Kurokawa, Yukinori. (2024). Incidence, mortality,
|
|
86
|
+
survival, and treatment statistics of cancers in digestive
|
|
87
|
+
organs—japanese cancer statistics 2024. *Annals of Gastroenterological
|
|
88
|
+
Surgery*, *8*(6), 958–965. <https://doi.org/10.1002/ags3.12835>
|
|
89
|
+
|
|
90
|
+
</div>
|
|
91
|
+
|
|
92
|
+
<div id="ref-inoue2023self" class="container csl-entry">
|
|
93
|
+
|
|
94
|
+
Inoue, Takao. (2023). A self-made tutorial for GitHub flavored markdown
|
|
95
|
+
(GFM), and its source codes. *ResearchGate*.
|
|
96
|
+
<https://www.researchgate.net/publication/370937551_A_self-made_tutorial_for_GitHub_Flavored_Markdown_GFM_and_its_source_codes>
|
|
97
|
+
|
|
98
|
+
</div>
|
|
99
|
+
|
|
100
|
+
<div id="ref-kato2021residential" class="container csl-entry">
|
|
101
|
+
|
|
102
|
+
Kato, Haruka, & Takizawa, Atsushi. (2021). Which residential clusters of
|
|
103
|
+
walkability affect future population from the perspective of real estate
|
|
104
|
+
prices in the osaka metropolitan area? *Sustainability*, *13*(23),
|
|
105
|
+
13413. <https://doi.org/10.3390/su132313413>
|
|
106
|
+
|
|
107
|
+
</div>
|
|
108
|
+
|
|
109
|
+
<div id="ref-masui2021r.en" class="container csl-entry">
|
|
110
|
+
|
|
111
|
+
Masui, Toshikatsu. (2021). *R to python de manabu tokeigaku nyumon*.
|
|
112
|
+
Ohmsha.
|
|
113
|
+
|
|
114
|
+
</div>
|
|
115
|
+
|
|
116
|
+
<div id="ref-estat2016adaptor" class="container csl-entry">
|
|
117
|
+
|
|
118
|
+
National Statistics Center, Japan. (2016). *Chukan apuri*.
|
|
119
|
+
<https://github.com/e-stat-api/adaptor>.
|
|
120
|
+
|
|
121
|
+
</div>
|
|
122
|
+
|
|
123
|
+
<div id="ref-nishimura2017linked.en" class="container csl-entry">
|
|
124
|
+
|
|
125
|
+
Nishimura, Shoki. (2017). Providing statistical data by linked open data
|
|
126
|
+
(LOD): Innovative official statistical data (e-stat) dissemination.
|
|
127
|
+
*Joho Kanri*, *59*(12), 812–821.
|
|
128
|
+
<https://doi.org/10.1241/johokanri.59.812>
|
|
129
|
+
|
|
130
|
+
</div>
|
|
131
|
+
|
|
132
|
+
<div id="ref-seki2023social" class="container csl-entry">
|
|
133
|
+
|
|
134
|
+
Seki, Katsunori. (2023). Social identification and redistribution
|
|
135
|
+
preference: A survey experiment in japan. *Social Science Japan
|
|
136
|
+
Journal*, *26*(1), 47–60. <https://doi.org/10.1093/ssjj/jyac029>
|
|
137
|
+
|
|
138
|
+
</div>
|
|
139
|
+
|
|
140
|
+
<div id="ref-takahashi2022estat.en" class="container csl-entry">
|
|
141
|
+
|
|
142
|
+
Takahashi, Shūichiro. (2022). *E-stat to nakayokusuru hon: Python to
|
|
143
|
+
ōpun deta de nihon wo bunseki suru! API keiyu de seifu tōkei wo shutoku!
|
|
144
|
+
katsuyo!* Impress R&D.
|
|
145
|
+
|
|
146
|
+
</div>
|
|
147
|
+
|
|
148
|
+
<div id="ref-wakabayashi2015public.en" class="container csl-entry">
|
|
149
|
+
|
|
150
|
+
Wakabayashi, Chihiro, Shinmura, Hiromi, Ando, Miri, Shimada, Masako, &
|
|
151
|
+
Yanagawa, Hiroshi. (2015). Kōeisei topikksu dai 13 kai seifutōkei no
|
|
152
|
+
sōgōmadoguchi e-stat: Chiiki shindan he no katsuyō - jissen herusu
|
|
153
|
+
puromōshon. *Gekkan Chiiki Igaku*, *29*(2), 52.
|
|
154
|
+
<https://doi.org/10.60261/chiikiigaku.29.2_52>
|
|
155
|
+
|
|
156
|
+
</div>
|
|
157
|
+
|
|
158
|
+
<div id="ref-ashizawa2022estat.ja" class="container csl-entry">
|
|
159
|
+
|
|
160
|
+
芦澤颯太, 松田純一, & 大曽根匡. (2022). E-stat
|
|
161
|
+
での統計データ検索におけるいくつかの課題抽出とその解決方法の提案.
|
|
162
|
+
*情報システム学会 全国大会論文集 ISSJ2022*, S1–C1.
|
|
163
|
+
<https://doi.org/10.19014/proceedingsissj.18.0_S1-C1>
|
|
164
|
+
|
|
165
|
+
</div>
|
|
166
|
+
|
|
167
|
+
<div id="ref-ashizawa2023estat.ja" class="container csl-entry">
|
|
168
|
+
|
|
169
|
+
芦澤颯太, 松田純一, & 大曽根匡. (2023). E-stat
|
|
170
|
+
における検索漏れを抑止する情報システムの開発とその検証.
|
|
171
|
+
*情報システム学会 全国大会論文集 情報システム学会*, 1–6.
|
|
172
|
+
<https://doi.org/10.19014/proceedingsissj.19.0_P001>
|
|
173
|
+
|
|
174
|
+
</div>
|
|
175
|
+
|
|
176
|
+
<div id="ref-wakabayashi2015public.ja" class="container csl-entry">
|
|
177
|
+
|
|
178
|
+
若林チヒロ, 新村洋未, 安藤実里, 嶋田雅子, & 柳川洋. (2015).
|
|
179
|
+
公衆衛生トピックス 第 13 回 政府統計の総合窓口
|
|
180
|
+
e-stat-地域診断への活用-実践ヘルスプロモーション. *月刊地域医学*,
|
|
181
|
+
*29*(2), 52. <https://doi.org/10.60261/chiikiigaku.29.2_52>
|
|
182
|
+
|
|
183
|
+
</div>
|
|
184
|
+
|
|
185
|
+
<div id="ref-nishimura2017linked.ja" class="container csl-entry">
|
|
186
|
+
|
|
187
|
+
西村正貴. (2017). Linked open data (LOD) による統計データの提供:
|
|
188
|
+
政府統計データ (e-stat) の新しい形. *情報管理*, *59*(12), 812–821.
|
|
189
|
+
<https://doi.org/10.1241/johokanri.59.812>
|
|
190
|
+
|
|
191
|
+
</div>
|
|
192
|
+
|
|
193
|
+
</div>
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Build Configuration
|
|
3
|
+
################################################################################
|
|
4
|
+
|
|
5
|
+
[build-system]
|
|
6
|
+
build-backend = "hatchling.build"
|
|
7
|
+
requires = ["hatchling"]
|
|
8
|
+
|
|
9
|
+
################################################################################
|
|
10
|
+
# Project Configuration
|
|
11
|
+
################################################################################
|
|
12
|
+
|
|
13
|
+
[project]
|
|
14
|
+
name = "estatjp"
|
|
15
|
+
# You can chose to use dynamic versioning with hatch or static where you add it manually.
|
|
16
|
+
version = "v0.1.1"
|
|
17
|
+
|
|
18
|
+
description = "A Python package for accessing Japanese government data on its e-Stat portal"
|
|
19
|
+
authors = [
|
|
20
|
+
{ name = "Alan Engel", email = "kijinosu@proton.me" },
|
|
21
|
+
]
|
|
22
|
+
license = "MIT"
|
|
23
|
+
requires-python = ">= 3.14" # Adjust based on the minimum version of Python that you support
|
|
24
|
+
readme = {"file" = "README.md", "content-type" = "text/markdown"}
|
|
25
|
+
# readme = "README.rst"
|
|
26
|
+
# Please consult https://pypi.org/classifiers/ for a full list.
|
|
27
|
+
classifiers = [
|
|
28
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
29
|
+
"Intended Audience :: Science/Research",
|
|
30
|
+
"License :: OSI Approved :: MIT License",
|
|
31
|
+
"Operating System :: OS Independent",
|
|
32
|
+
"Programming Language :: Python :: 3.14",
|
|
33
|
+
]
|
|
34
|
+
# TODO: add keywords
|
|
35
|
+
keywords = []
|
|
36
|
+
# TODO: add dependencies
|
|
37
|
+
dependencies = [
|
|
38
|
+
"Sphinx>=3.5",
|
|
39
|
+
"docutils>=0.8,!=0.18.*,!=0.19.*",
|
|
40
|
+
"pybtex>=0.25",
|
|
41
|
+
"pybtex-docutils>=1.0.2",
|
|
42
|
+
"importlib_metadata>=3.6; python_version < '3.10'",
|
|
43
|
+
"pandas>=3.0.1",
|
|
44
|
+
]
|
|
45
|
+
exclude_patterns = ["_build"]
|
|
46
|
+
bibtex_bibfiles = ["bibliography.bib"]
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/kijinosu/estatjp"
|
|
50
|
+
"Source Code" = "https://github.com/kijinosu/estatjp"
|
|
51
|
+
"Bug Tracker" = "https://github.com/kijinosu/estatjp/issues"
|
|
52
|
+
Documentation = "https://github.com/kijinosu/estatjp/blob/main/README.md"
|
|
53
|
+
Download = "https://pypi.org/project/estatjp/#files"
|
|
54
|
+
|
|
55
|
+
[project.optional-dependencies]
|
|
56
|
+
# The groups below should be in the [development-groups] table
|
|
57
|
+
# They are here now because hatch hasn't released support for them but plans to
|
|
58
|
+
# in Mid November 2025.
|
|
59
|
+
dev = [
|
|
60
|
+
"hatch",
|
|
61
|
+
"pre-commit",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
docs = [
|
|
65
|
+
"nbsphinx",
|
|
66
|
+
"sphinx~=8.0",
|
|
67
|
+
"myst-parser>=4.0",
|
|
68
|
+
"pydata-sphinx-theme~=0.16",
|
|
69
|
+
"sphinx-autobuild>=2024.10.3",
|
|
70
|
+
"sphinx-autoapi>=3.6.0",
|
|
71
|
+
"sphinx_design>=0.6.1",
|
|
72
|
+
"sphinx-copybutton>=0.5.2",
|
|
73
|
+
"sphinxcontrib.bibtex>=2.6.5"
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
build = [
|
|
77
|
+
"pip-audit",
|
|
78
|
+
"twine",
|
|
79
|
+
]
|
|
80
|
+
tests = [
|
|
81
|
+
"pytest",
|
|
82
|
+
"pytest-cov",
|
|
83
|
+
"pytest-raises",
|
|
84
|
+
"pytest-randomly",
|
|
85
|
+
"pytest-xdist",
|
|
86
|
+
"sphinx-autoapi>=3.6.0",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
################################################################################
|
|
91
|
+
# Tool Configuration
|
|
92
|
+
################################################################################
|
|
93
|
+
|
|
94
|
+
# Hatch is building your package's wheel and sdist
|
|
95
|
+
# This tells hatch to only include Python packages (i.e., folders with __init__.py) in the build.
|
|
96
|
+
# read more about package building, here:
|
|
97
|
+
# https://www.pyopensci.org/python-package-guide/package-structure-code/python-package-distribution-files-sdist-wheel.html
|
|
98
|
+
[tool.hatch.build]
|
|
99
|
+
only-packages = true
|
|
100
|
+
|
|
101
|
+
# This tells Hatch to build the package from the src/ directory.
|
|
102
|
+
# Read more about src layouts here: https://www.pyopensci.org/python-package-guide/package-structure-code/python-package-structure.html
|
|
103
|
+
[tool.hatch.build.targets.wheel]
|
|
104
|
+
packages = ["src/estatjp"]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
######## Configure pytest for your test suite ########
|
|
109
|
+
[tool.pytest.ini_options]
|
|
110
|
+
testpaths = ["tests"] # Tells pytest what directory tests are in
|
|
111
|
+
markers = ["raises"] # Tells pytest to not raise a warning if you use @pytest.mark.raises
|
|
112
|
+
|
|
113
|
+
[tool.coverage.paths]
|
|
114
|
+
source = [
|
|
115
|
+
"src/estatjp",
|
|
116
|
+
"*/site-packages/estatjp",
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
[tool.coverage.run]
|
|
120
|
+
# Ensures code coverage is measured for branches (conditional statements with different outcomes) in your code.
|
|
121
|
+
branch = true
|
|
122
|
+
parallel = true
|
|
123
|
+
|
|
124
|
+
[tool.coverage.report]
|
|
125
|
+
# This configures the output test coverage report
|
|
126
|
+
exclude_lines = ["pragma: no cover"]
|
|
127
|
+
precision = 2
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# Use UV to create Hatch environments
|
|
131
|
+
[tool.hatch.envs.default]
|
|
132
|
+
installer = "uv"
|
|
133
|
+
|
|
134
|
+
################################################################################
|
|
135
|
+
# Hatch Environments
|
|
136
|
+
################################################################################
|
|
137
|
+
|
|
138
|
+
#--------------- Build and check your package ---------------#
|
|
139
|
+
|
|
140
|
+
# This table installs the tools you need to test and build your package
|
|
141
|
+
[tool.hatch.envs.build]
|
|
142
|
+
description = """Test the installation the package."""
|
|
143
|
+
dependencies = [
|
|
144
|
+
"pip",
|
|
145
|
+
"twine",
|
|
146
|
+
]
|
|
147
|
+
features = [
|
|
148
|
+
"build",
|
|
149
|
+
]
|
|
150
|
+
detached = true
|
|
151
|
+
builder = true
|
|
152
|
+
|
|
153
|
+
# This table installs created the command hatch run install:check which will build and check your package.
|
|
154
|
+
[tool.hatch.envs.build.scripts]
|
|
155
|
+
check = [
|
|
156
|
+
"pandoc -f bibtex -t rst bibliography.bib --citeproc --csl .pandoc/styles/apa-no-initials.csl -o .pandoc/bibliography.rst",
|
|
157
|
+
"pandoc -f rst -t gfm .pandoc/READMEbody.rst .pandoc/bibliography.rst -o README.md",
|
|
158
|
+
"pip check",
|
|
159
|
+
"hatch build {args:--clean}",
|
|
160
|
+
"twine check dist/*",
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
#--------------- Run tests ---------------#
|
|
164
|
+
[tool.hatch.envs.test]
|
|
165
|
+
description = """Run the test suite."""
|
|
166
|
+
features = [
|
|
167
|
+
"tests",
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
[[tool.hatch.envs.test.matrix]]
|
|
171
|
+
python = ["3.12", "3.13"]
|
|
172
|
+
|
|
173
|
+
[tool.hatch.envs.test.scripts]
|
|
174
|
+
run = "pytest {args:--cov=estatjp --cov-report=term-missing --cov-report=xml}"
|
|
175
|
+
|
|
176
|
+
#--------------- Build and preview your documentation ---------------#
|
|
177
|
+
|
|
178
|
+
# This sets up a hatch environment with associated dependencies that need to be installed
|
|
179
|
+
[tool.hatch.envs.docs]
|
|
180
|
+
description = """Build or serve the documentation."""
|
|
181
|
+
# Install optional dependency test for docs
|
|
182
|
+
features = [
|
|
183
|
+
"docs",
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
# This table contains the scripts that you can use to build and serve your docs
|
|
187
|
+
# hatch run docs:build will build your documentation
|
|
188
|
+
# hatch run docs:serve will serve them 'live' on your computer locally
|
|
189
|
+
[tool.hatch.envs.docs.scripts]
|
|
190
|
+
build = ["sphinx-build {args:-W -b html docs docs/_build}"]
|
|
191
|
+
serve = ["sphinx-autobuild docs --watch src/estatjp {args:-b html docs/_build/serve}"]
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
#--------------- Check security for your dependencies ---------------#
|
|
196
|
+
|
|
197
|
+
[tool.hatch.envs.audit]
|
|
198
|
+
description = """Check dependencies for security vulnerabilities."""
|
|
199
|
+
features = [
|
|
200
|
+
"build",
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
[tool.hatch.envs.audit.scripts]
|
|
204
|
+
check = ["pip-audit"]
|
|
205
|
+
|
|
206
|
+
#----------------sphinxcontrib-bibtex--------------------------------#
|
|
207
|
+
last = "sphinxcontrib.bibtex.style.names.last:LastNameStyle"
|
|
208
|
+
|
|
209
|
+
[project.entry-points."sphinxcontrib.bibtex.style.referencing"]
|
|
210
|
+
author_year = "sphinxcontrib.bibtex.style.referencing.author_year:AuthorYearReferenceStyle"
|
|
211
|
+
foot = "sphinxcontrib.bibtex.style.referencing.foot:FootReferenceStyle"
|
|
212
|
+
label = "sphinxcontrib.bibtex.style.referencing.label:LabelReferenceStyle"
|
|
213
|
+
super = "sphinxcontrib.bibtex.style.referencing.super_:SuperReferenceStyle"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# MIT License
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2026 Alan Engel
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
# furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice (including the next
|
|
13
|
+
# paragraph) shall be included in all copies or substantial portions of the
|
|
14
|
+
# Software.
|
|
15
|
+
#
|
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
# SOFTWARE.
|
|
23
|
+
|
|
24
|
+
"""
|
|
25
|
+
The `E-Stat`_ portal site contains 'databases' that can be accessed via API urls or downloads as XML, CSV and JSON files, and files (mainly Excel files) that can be downloaded. This package will eventually provide for converting these into `pandas DataFrames`_. This early release handles only API url calls with CSV downloads.
|
|
26
|
+
|
|
27
|
+
.. _E-Stat: https://www.e-stat.go.jp/en
|
|
28
|
+
.. _pandas DataFrames: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html
|
|
29
|
+
|
|
30
|
+
"""
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""A module for accessing e-Stat data using its API.
|
|
2
|
+
|
|
3
|
+
The API provides data in CSV, JSON and XML formats. This version provides for the CSV format only.
|
|
4
|
+
|
|
5
|
+
The main task is to request and parse a CSV stream to produce a `pandas.DataFrame` object. The `pandas.read_csv()` cannot be used as-is because CSV streams from e-Stat start with a header of metadata which confuses pandas. For more detail see development notes as chronicled in Read the Docs pages [DevAPI01.ipynb](https://estatpy.readthedocs.io/en/latest/chronicle/DevAPI01.html) and [DevAPI02.ipynb](https://estatpy.readthedocs.io/en/latest/chronicle/DevAPI02.html).
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import os
|
|
12
|
+
import requests
|
|
13
|
+
import tempfile
|
|
14
|
+
import re
|
|
15
|
+
import datetime
|
|
16
|
+
from dotenv import load_dotenv
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
def get_csv_data(url, description = datetime.datetime.now()):
|
|
20
|
+
"""Retrieve a CSV stream from e-Stat using an API url and create a pandas.DataFrame.
|
|
21
|
+
|
|
22
|
+
:param url: An API url obtained from e-Stat, for example, the [2020-base consumer price index](https://www.e-stat.go.jp/en/stat-search/database?page=1&layout=datalist&toukei=00200573&tstat=000001150147&cycle=0&tclass1val=0)
|
|
23
|
+
|
|
24
|
+
:param description: An optional object that the user can supply to help document her search. The default is the time of running this function.
|
|
25
|
+
|
|
26
|
+
:return: Dictionary containing the Header in the form of a pandas.DataFrame, the Main table also in the form of a pandas.DataFrame, and the Description.
|
|
27
|
+
|
|
28
|
+
"""
|
|
29
|
+
try:
|
|
30
|
+
load_dotenv()
|
|
31
|
+
except (FileNotFoundError,IOError) as e:
|
|
32
|
+
e.add_note('Environment variable file (.env) not found. See README.')
|
|
33
|
+
raise
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
app_id = os.environ['ESTAT_APP_ID']
|
|
37
|
+
except KeyError as e:
|
|
38
|
+
e.add_note('Environment variable ESTAT_APP_ID not found. See README.')
|
|
39
|
+
raise
|
|
40
|
+
|
|
41
|
+
if app_id == None:
|
|
42
|
+
raise OSError("Value of environment variable 'ESTAT_APP_ID' not found. See README.")
|
|
43
|
+
|
|
44
|
+
url_split = url.split("appId=")
|
|
45
|
+
if len(url_split) != 2:
|
|
46
|
+
raise Exception("Invalid API url")
|
|
47
|
+
url = url_split[0] + "appId=" + app_id + url_split[1]
|
|
48
|
+
|
|
49
|
+
# the csv has several rows of metadata terminated by a row starting with "VALUE".
|
|
50
|
+
# The data table starts on the next row.
|
|
51
|
+
# Put the metadata in a temporary file.
|
|
52
|
+
result = {}
|
|
53
|
+
try:
|
|
54
|
+
with requests.get(url,stream=False) as estatresponse: # chunking in iter_lines doesn't work for stream=True
|
|
55
|
+
estatresponse.raise_for_status()
|
|
56
|
+
|
|
57
|
+
if estatresponse.encoding is None:
|
|
58
|
+
estatresponse.encoding = 'utf-8'
|
|
59
|
+
estatlines = estatresponse.iter_lines(chunk_size=1024, decode_unicode=True)
|
|
60
|
+
with tempfile.NamedTemporaryFile(mode='w',delete_on_close=False,encoding = 'utf-8') as fheader:
|
|
61
|
+
with tempfile.NamedTemporaryFile(mode='w',delete_on_close=False,encoding = 'utf-8') as fp:
|
|
62
|
+
inheader = True
|
|
63
|
+
colnum = 0
|
|
64
|
+
for line in estatlines:
|
|
65
|
+
if inheader == True:
|
|
66
|
+
#count columns
|
|
67
|
+
fields = re.split('","',line)
|
|
68
|
+
if len(fields) > colnum :
|
|
69
|
+
colnum = len(fields)
|
|
70
|
+
fheader.write(line)
|
|
71
|
+
fheader.write("\n")
|
|
72
|
+
if( line.startswith('"VALUE"')):
|
|
73
|
+
inheader = False
|
|
74
|
+
fheader.flush()
|
|
75
|
+
fheader.seek(0)
|
|
76
|
+
else:
|
|
77
|
+
fp.write(line)
|
|
78
|
+
fp.write("\n")
|
|
79
|
+
fheader.close()
|
|
80
|
+
fp.close()
|
|
81
|
+
if inheader == True:
|
|
82
|
+
errmsg = "The stream that e-Stat returned lacks a 'VALUE' line. See temp file: " + fheader.name
|
|
83
|
+
raise Exception(errmsg)
|
|
84
|
+
dfHeader = pd.read_csv(fheader.name, names = range(colnum))
|
|
85
|
+
dfHeader = dfHeader.dropna(axis=1, how = "all")
|
|
86
|
+
dfMain = pd.read_csv(fp.name)
|
|
87
|
+
result['Description'] = description
|
|
88
|
+
result['Header'] = dfHeader
|
|
89
|
+
result['Main'] = dfMain
|
|
90
|
+
|
|
91
|
+
except requests.RequestException as e:
|
|
92
|
+
raise
|
|
93
|
+
|
|
94
|
+
return result
|