estatjp 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ ## my additions
2
+ # .mynotes
3
+
4
+ ### Linux template
5
+ *~
6
+
7
+ # temporary files which can be created if a process still has a handle open of a deleted file
8
+ .fuse_hidden*
9
+
10
+ # KDE directory preferences
11
+ .directory
12
+
13
+ # Linux trash folder which might appear on any partition or disk
14
+ .Trash-*
15
+
16
+ # .nfs files are created when an open file is removed but is still being accessed
17
+ .nfs*
18
+
19
+ ### Windows template
20
+ # Windows thumbnail cache files
21
+ Thumbs.db
22
+ Thumbs.db:encryptable
23
+ ehthumbs.db
24
+ ehthumbs_vista.db
25
+
26
+ # Dump file
27
+ *.stackdump
28
+
29
+ # Folder config file
30
+ [Dd]esktop.ini
31
+
32
+ # Recycle Bin used on file shares
33
+ $RECYCLE.BIN/
34
+
35
+ # Windows Installer files
36
+ *.cab
37
+ *.msi
38
+ *.msix
39
+ *.msm
40
+ *.msp
41
+
42
+ # Windows shortcuts
43
+ *.lnk
44
+
45
+ ### JupyterNotebooks template
46
+ # gitignore template for Jupyter Notebooks
47
+ # website: http://jupyter.org/
48
+
49
+ .ipynb_checkpoints
50
+ */.ipynb_checkpoints/*
51
+
52
+ # IPython
53
+ profile_default/
54
+ ipython_config.py
55
+
56
+ # Remove previous ipynb_checkpoints
57
+ # git rm -r .ipynb_checkpoints/
58
+
59
+ ### macOS template
60
+ # General
61
+ .DS_Store
62
+ .AppleDouble
63
+ .LSOverride
64
+
65
+ # Icon must end with two \r
66
+ Icon
67
+
68
+ # Thumbnails
69
+ ._*
70
+
71
+ # Files that might appear in the root of a volume
72
+ .DocumentRevisions-V100
73
+ .fseventsd
74
+ .Spotlight-V100
75
+ .TemporaryItems
76
+ .Trashes
77
+ .VolumeIcon.icns
78
+ .com.apple.timemachine.donotpresent
79
+
80
+ # Directories potentially created on remote AFP share
81
+ .AppleDB
82
+ .AppleDesktop
83
+ Network Trash Folder
84
+ Temporary Items
85
+ .apdisk
86
+
87
+ ### Python template
88
+ # Byte-compiled / optimized / DLL files
89
+ __pycache__/
90
+ *.py[cod]
91
+ *$py.class
92
+
93
+ # C extensions
94
+ *.so
95
+
96
+ # Distribution / packaging
97
+ .pypirc
98
+ .Python
99
+ build/
100
+ develop-eggs/
101
+ dist/
102
+ downloads/
103
+ eggs/
104
+ .eggs/
105
+ lib/
106
+ lib64/
107
+ parts/
108
+ sdist/
109
+ var/
110
+ wheels/
111
+ share/python-wheels/
112
+ *.egg-info/
113
+ .installed.cfg
114
+ *.egg
115
+ MANIFEST
116
+ .mynotes/
117
+
118
+ # PyInstaller
119
+ # Usually these files are written by a python script from a template
120
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
121
+ *.manifest
122
+ *.spec
123
+
124
+ # Installer logs
125
+ pip-log.txt
126
+ pip-delete-this-directory.txt
127
+
128
+ # Unit test / coverage reports
129
+ htmlcov/
130
+ .tox/
131
+ .nox/
132
+ .coverage
133
+ .coverage.*
134
+ .cache
135
+ nosetests.xml
136
+ coverage.xml
137
+ *.cover
138
+ *.py,cover
139
+ .hypothesis/
140
+ .pytest_cache/
141
+ cover/
142
+
143
+ # Translations
144
+ *.mo
145
+ *.pot
146
+
147
+ # Django stuff:
148
+ *.log
149
+ local_settings.py
150
+ db.sqlite3
151
+ db.sqlite3-journal
152
+
153
+ # Flask stuff:
154
+ instance/
155
+ .webassets-cache
156
+
157
+ # Scrapy stuff:
158
+ .scrapy
159
+
160
+ # Sphinx documentation
161
+ docs/_build/
162
+ docs/api
163
+
164
+ # PyBuilder
165
+ .pybuilder/
166
+ target/
167
+
168
+ # Jupyter Notebook
169
+
170
+ # IPython
171
+
172
+ # pyenv
173
+ # For a library or package, you might want to ignore these files since the code is
174
+ # intended to run in multiple environments; otherwise, check them in:
175
+ # .python-version
176
+
177
+ # pipenv
178
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
179
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
180
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
181
+ # install all needed dependencies.
182
+ #Pipfile.lock
183
+
184
+ # poetry
185
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
186
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
187
+ # commonly ignored for libraries.
188
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
189
+ #poetry.lock
190
+
191
+ # pdm
192
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
193
+ #pdm.lock
194
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
195
+ # in version control.
196
+ # https://pdm.fming.dev/#use-with-ide
197
+ .pdm.toml
198
+
199
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
200
+ __pypackages__/
201
+
202
+ # Celery stuff
203
+ celerybeat-schedule
204
+ celerybeat.pid
205
+
206
+ # SageMath parsed files
207
+ *.sage.py
208
+
209
+ # Environments
210
+ .env
211
+ .venv
212
+ env/
213
+ venv/
214
+ ENV/
215
+ env.bak/
216
+ venv.bak/
217
+
218
+ # Spyder project settings
219
+ .spyderproject
220
+ .spyproject
221
+
222
+ # Rope project settings
223
+ .ropeproject
224
+
225
+ # mkdocs documentation
226
+ /site
227
+
228
+ # mypy
229
+ .mypy_cache/
230
+ .dmypy.json
231
+ dmypy.json
232
+
233
+ # Pyre type checker
234
+ .pyre/
235
+
236
+ # pytype static type analyzer
237
+ .pytype/
238
+
239
+ # Cython debug symbols
240
+ cython_debug/
241
+
242
+ # PyCharm
243
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
244
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
245
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
246
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
247
+ #.idea/
248
+
249
+ # Hatch-VCS
250
+ _version.py
estatjp-0.1.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Alan Engel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice (including the next
13
+ paragraph) shall be included in all copies or substantial portions of the
14
+ Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
estatjp-0.1.1/PKG-INFO ADDED
@@ -0,0 +1,242 @@
1
+ Metadata-Version: 2.4
2
+ Name: estatjp
3
+ Version: 0.1.1
4
+ Summary: A Python package for accessing Japanese government data on its e-Stat portal
5
+ Project-URL: Homepage, https://github.com/kijinosu/estatjp
6
+ Project-URL: Source Code, https://github.com/kijinosu/estatjp
7
+ Project-URL: Bug Tracker, https://github.com/kijinosu/estatjp/issues
8
+ Project-URL: Documentation, https://github.com/kijinosu/estatjp/blob/main/README.md
9
+ Project-URL: Download, https://pypi.org/project/estatjp/#files
10
+ Author-email: Alan Engel <kijinosu@proton.me>
11
+ License-Expression: MIT
12
+ License-File: LICENSE
13
+ Classifier: Development Status :: 2 - Pre-Alpha
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Requires-Python: >=3.14
19
+ Requires-Dist: docutils!=0.18.*,!=0.19.*,>=0.8
20
+ Requires-Dist: importlib-metadata>=3.6; python_version < '3.10'
21
+ Requires-Dist: pandas>=3.0.1
22
+ Requires-Dist: pybtex-docutils>=1.0.2
23
+ Requires-Dist: pybtex>=0.25
24
+ Requires-Dist: sphinx>=3.5
25
+ Provides-Extra: build
26
+ Requires-Dist: pip-audit; extra == 'build'
27
+ Requires-Dist: twine; extra == 'build'
28
+ Provides-Extra: dev
29
+ Requires-Dist: hatch; extra == 'dev'
30
+ Requires-Dist: pre-commit; extra == 'dev'
31
+ Provides-Extra: docs
32
+ Requires-Dist: myst-parser>=4.0; extra == 'docs'
33
+ Requires-Dist: nbsphinx; extra == 'docs'
34
+ Requires-Dist: pydata-sphinx-theme~=0.16; extra == 'docs'
35
+ Requires-Dist: sphinx-autoapi>=3.6.0; extra == 'docs'
36
+ Requires-Dist: sphinx-autobuild>=2024.10.3; extra == 'docs'
37
+ Requires-Dist: sphinx-copybutton>=0.5.2; extra == 'docs'
38
+ Requires-Dist: sphinx-design>=0.6.1; extra == 'docs'
39
+ Requires-Dist: sphinxcontrib-bibtex>=2.6.5; extra == 'docs'
40
+ Requires-Dist: sphinx~=8.0; extra == 'docs'
41
+ Provides-Extra: tests
42
+ Requires-Dist: pytest; extra == 'tests'
43
+ Requires-Dist: pytest-cov; extra == 'tests'
44
+ Requires-Dist: pytest-raises; extra == 'tests'
45
+ Requires-Dist: pytest-randomly; extra == 'tests'
46
+ Requires-Dist: pytest-xdist; extra == 'tests'
47
+ Requires-Dist: sphinx-autoapi>=3.6.0; extra == 'tests'
48
+ Description-Content-Type: text/markdown
49
+
50
+ # estatjp
51
+
52
+ [E-Stat](https://www.e-stat.go.jp/en) is a widely used portal site for
53
+ accessing Japanese governmental statistical data. Began operation in
54
+ 2008. e-Stat currently hosts [744 surveys (1,688,550 datasets) in
55
+ Japanese](https://www.e-stat.go.jp/stat-search?page=1) from about 30
56
+ governmental agencies with [56 surveys (292,856 datasets) available in
57
+ English](https://www.e-stat.go.jp/en/stat-search?page=1). These
58
+ collections contain 'databases' and files (mainly Excel files). The
59
+ 'databases' can be accessed via an API. API urls can cover entire
60
+ databases or subsets that can be tailored to users' individual needs.
61
+
62
+ The objective of the estatjp Python package is to provide access to the
63
+ e-Stat portal and return datasets in pandas.DataFrame format.
64
+
65
+ For example, the e-Stat API returns CSV streams that contain headers
66
+ with metadata. These headers interfere with pandas.get_csv. The first
67
+ release of estatjp returns a dictionary that contains the header and
68
+ main table as separate dataframes.
69
+
70
+ ## Requirement
71
+
72
+ The e-Stat API requires an application ID that can be obtained from the
73
+ [E-Stat API](https://www.e-stat.go.jp/api/en) page. Install this ID into
74
+ your project by setting your terminal to your project root and running
75
+ the following commands:
76
+
77
+ pip install python-dotenv
78
+ dotenv set ESTAT_APP_ID your-app-id
79
+
80
+ ## Install this package
81
+
82
+ pip install estatjp
83
+
84
+ ## Example
85
+
86
+ This example downloads an English dataset, the [Labour Force Survey
87
+ Basic Tabulation Whole Japan Monthly table Population of 15 years old
88
+ and over by labour force
89
+ status](https://www.e-stat.go.jp/en/dbview?sid=0003005798). The API url
90
+ for that table is assigned to enurl below.
91
+
92
+ ``` python
93
+ import pandas
94
+ from dotenv import load_dotenv
95
+ from estatjp import api
96
+ enurl = 'http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData?appId=&lang=E&statsDataId=0003005798&metaGetFlg=Y&cntGetFlg=N&explanationGetFlg=Y&annotationGetFlg=Y&sectionHeaderFlg=1&replaceSpChars=0'
97
+ dfs = api.get_csv_data(enurl)
98
+ print(dfs.get('Header'))
99
+ print(dfs.get('Main'))
100
+ print(dfs.get('Description'))
101
+ ```
102
+
103
+ # References
104
+
105
+ <div id="refs" class="container references csl-bib-body hanging-indent">
106
+
107
+ <div id="ref-ashizawa2022estat.en" class="container csl-entry">
108
+
109
+ Ashikawa, Souta, Matsuda, Junichi, & Osone, Tadashi. (2022). Method for
110
+ improving the recall in e-stat data search. *Proceedings of Annual
111
+ Conference of the Information Systems Society in Japan ISSJ2022*, S1–C1.
112
+ <https://doi.org/10.19014/proceedingsissj.18.0_S1-C1>
113
+
114
+ </div>
115
+
116
+ <div id="ref-ashizawa2023estat.en" class="container csl-entry">
117
+
118
+ Ashikawa, Souta, Matsuda, Junichi, & Osone, Tadashi. (2023). Development
119
+ of front-end search system improving recall in e-stat. *Proceedings of
120
+ Annual Conference of the Information Systems Society in Japan ISSJ2023*,
121
+ 1–6. <https://doi.org/10.19014/proceedingsissj.19.0_P001>
122
+
123
+ </div>
124
+
125
+ <div id="ref-cocosan2023python.en" class="container csl-entry">
126
+
127
+ cocosan. (2023). *Python apuri: Seifu tokei e-stat wo shigoto ni ikase!*
128
+ <https://www.youtube.com/watch?v=hiaK-jTXpCI>.
129
+
130
+ </div>
131
+
132
+ <div id="ref-higashi2024incidence" class="container csl-entry">
133
+
134
+ Higashi, Takahiro, & Kurokawa, Yukinori. (2024). Incidence, mortality,
135
+ survival, and treatment statistics of cancers in digestive
136
+ organs—japanese cancer statistics 2024. *Annals of Gastroenterological
137
+ Surgery*, *8*(6), 958–965. <https://doi.org/10.1002/ags3.12835>
138
+
139
+ </div>
140
+
141
+ <div id="ref-inoue2023self" class="container csl-entry">
142
+
143
+ Inoue, Takao. (2023). A self-made tutorial for GitHub flavored markdown
144
+ (GFM), and its source codes. *ResearchGate*.
145
+ <https://www.researchgate.net/publication/370937551_A_self-made_tutorial_for_GitHub_Flavored_Markdown_GFM_and_its_source_codes>
146
+
147
+ </div>
148
+
149
+ <div id="ref-kato2021residential" class="container csl-entry">
150
+
151
+ Kato, Haruka, & Takizawa, Atsushi. (2021). Which residential clusters of
152
+ walkability affect future population from the perspective of real estate
153
+ prices in the osaka metropolitan area? *Sustainability*, *13*(23),
154
+ 13413. <https://doi.org/10.3390/su132313413>
155
+
156
+ </div>
157
+
158
+ <div id="ref-masui2021r.en" class="container csl-entry">
159
+
160
+ Masui, Toshikatsu. (2021). *R to python de manabu tokeigaku nyumon*.
161
+ Ohmsha.
162
+
163
+ </div>
164
+
165
+ <div id="ref-estat2016adaptor" class="container csl-entry">
166
+
167
+ National Statistics Center, Japan. (2016). *Chukan apuri*.
168
+ <https://github.com/e-stat-api/adaptor>.
169
+
170
+ </div>
171
+
172
+ <div id="ref-nishimura2017linked.en" class="container csl-entry">
173
+
174
+ Nishimura, Shoki. (2017). Providing statistical data by linked open data
175
+ (LOD): Innovative official statistical data (e-stat) dissemination.
176
+ *Joho Kanri*, *59*(12), 812–821.
177
+ <https://doi.org/10.1241/johokanri.59.812>
178
+
179
+ </div>
180
+
181
+ <div id="ref-seki2023social" class="container csl-entry">
182
+
183
+ Seki, Katsunori. (2023). Social identification and redistribution
184
+ preference: A survey experiment in japan. *Social Science Japan
185
+ Journal*, *26*(1), 47–60. <https://doi.org/10.1093/ssjj/jyac029>
186
+
187
+ </div>
188
+
189
+ <div id="ref-takahashi2022estat.en" class="container csl-entry">
190
+
191
+ Takahashi, Shūichiro. (2022). *E-stat to nakayokusuru hon: Python to
192
+ ōpun deta de nihon wo bunseki suru! API keiyu de seifu tōkei wo shutoku!
193
+ katsuyo!* Impress R&D.
194
+
195
+ </div>
196
+
197
+ <div id="ref-wakabayashi2015public.en" class="container csl-entry">
198
+
199
+ Wakabayashi, Chihiro, Shinmura, Hiromi, Ando, Miri, Shimada, Masako, &
200
+ Yanagawa, Hiroshi. (2015). Kōeisei topikksu dai 13 kai seifutōkei no
201
+ sōgōmadoguchi e-stat: Chiiki shindan he no katsuyō - jissen herusu
202
+ puromōshon. *Gekkan Chiiki Igaku*, *29*(2), 52.
203
+ <https://doi.org/10.60261/chiikiigaku.29.2_52>
204
+
205
+ </div>
206
+
207
+ <div id="ref-ashizawa2022estat.ja" class="container csl-entry">
208
+
209
+ 芦澤颯太, 松田純一, & 大曽根匡. (2022). E-stat
210
+ での統計データ検索におけるいくつかの課題抽出とその解決方法の提案.
211
+ *情報システム学会 全国大会論文集 ISSJ2022*, S1–C1.
212
+ <https://doi.org/10.19014/proceedingsissj.18.0_S1-C1>
213
+
214
+ </div>
215
+
216
+ <div id="ref-ashizawa2023estat.ja" class="container csl-entry">
217
+
218
+ 芦澤颯太, 松田純一, & 大曽根匡. (2023). E-stat
219
+ における検索漏れを抑止する情報システムの開発とその検証.
220
+ *情報システム学会 全国大会論文集 情報システム学会*, 1–6.
221
+ <https://doi.org/10.19014/proceedingsissj.19.0_P001>
222
+
223
+ </div>
224
+
225
+ <div id="ref-wakabayashi2015public.ja" class="container csl-entry">
226
+
227
+ 若林チヒロ, 新村洋未, 安藤実里, 嶋田雅子, & 柳川洋. (2015).
228
+ 公衆衛生トピックス 第 13 回 政府統計の総合窓口
229
+ e-stat-地域診断への活用-実践ヘルスプロモーション. *月刊地域医学*,
230
+ *29*(2), 52. <https://doi.org/10.60261/chiikiigaku.29.2_52>
231
+
232
+ </div>
233
+
234
+ <div id="ref-nishimura2017linked.ja" class="container csl-entry">
235
+
236
+ 西村正貴. (2017). Linked open data (LOD) による統計データの提供:
237
+ 政府統計データ (e-stat) の新しい形. *情報管理*, *59*(12), 812–821.
238
+ <https://doi.org/10.1241/johokanri.59.812>
239
+
240
+ </div>
241
+
242
+ </div>
@@ -0,0 +1,193 @@
1
+ # estatjp
2
+
3
+ [E-Stat](https://www.e-stat.go.jp/en) is a widely used portal site for
4
+ accessing Japanese governmental statistical data. Began operation in
5
+ 2008. e-Stat currently hosts [744 surveys (1,688,550 datasets) in
6
+ Japanese](https://www.e-stat.go.jp/stat-search?page=1) from about 30
7
+ governmental agencies with [56 surveys (292,856 datasets) available in
8
+ English](https://www.e-stat.go.jp/en/stat-search?page=1). These
9
+ collections contain 'databases' and files (mainly Excel files). The
10
+ 'databases' can be accessed via an API. API urls can cover entire
11
+ databases or subsets that can be tailored to users' individual needs.
12
+
13
+ The objective of the estatjp Python package is to provide access to the
14
+ e-Stat portal and return datasets in pandas.DataFrame format.
15
+
16
+ For example, the e-Stat API returns CSV streams that contain headers
17
+ with metadata. These headers interfere with pandas.get_csv. The first
18
+ release of estatjp returns a dictionary that contains the header and
19
+ main table as separate dataframes.
20
+
21
+ ## Requirement
22
+
23
+ The e-Stat API requires an application ID that can be obtained from the
24
+ [E-Stat API](https://www.e-stat.go.jp/api/en) page. Install this ID into
25
+ your project by setting your terminal to your project root and running
26
+ the following commands:
27
+
28
+ pip install python-dotenv
29
+ dotenv set ESTAT_APP_ID your-app-id
30
+
31
+ ## Install this package
32
+
33
+ pip install estatjp
34
+
35
+ ## Example
36
+
37
+ This example downloads an English dataset, the [Labour Force Survey
38
+ Basic Tabulation Whole Japan Monthly table Population of 15 years old
39
+ and over by labour force
40
+ status](https://www.e-stat.go.jp/en/dbview?sid=0003005798). The API url
41
+ for that table is assigned to enurl below.
42
+
43
+ ``` python
44
+ import pandas
45
+ from dotenv import load_dotenv
46
+ from estatjp import api
47
+ enurl = 'http://api.e-stat.go.jp/rest/3.0/app/getSimpleStatsData?appId=&lang=E&statsDataId=0003005798&metaGetFlg=Y&cntGetFlg=N&explanationGetFlg=Y&annotationGetFlg=Y&sectionHeaderFlg=1&replaceSpChars=0'
48
+ dfs = api.get_csv_data(enurl)
49
+ print(dfs.get('Header'))
50
+ print(dfs.get('Main'))
51
+ print(dfs.get('Description'))
52
+ ```
53
+
54
+ # References
55
+
56
+ <div id="refs" class="container references csl-bib-body hanging-indent">
57
+
58
+ <div id="ref-ashizawa2022estat.en" class="container csl-entry">
59
+
60
+ Ashikawa, Souta, Matsuda, Junichi, & Osone, Tadashi. (2022). Method for
61
+ improving the recall in e-stat data search. *Proceedings of Annual
62
+ Conference of the Information Systems Society in Japan ISSJ2022*, S1–C1.
63
+ <https://doi.org/10.19014/proceedingsissj.18.0_S1-C1>
64
+
65
+ </div>
66
+
67
+ <div id="ref-ashizawa2023estat.en" class="container csl-entry">
68
+
69
+ Ashikawa, Souta, Matsuda, Junichi, & Osone, Tadashi. (2023). Development
70
+ of front-end search system improving recall in e-stat. *Proceedings of
71
+ Annual Conference of the Information Systems Society in Japan ISSJ2023*,
72
+ 1–6. <https://doi.org/10.19014/proceedingsissj.19.0_P001>
73
+
74
+ </div>
75
+
76
+ <div id="ref-cocosan2023python.en" class="container csl-entry">
77
+
78
+ cocosan. (2023). *Python apuri: Seifu tokei e-stat wo shigoto ni ikase!*
79
+ <https://www.youtube.com/watch?v=hiaK-jTXpCI>.
80
+
81
+ </div>
82
+
83
+ <div id="ref-higashi2024incidence" class="container csl-entry">
84
+
85
+ Higashi, Takahiro, & Kurokawa, Yukinori. (2024). Incidence, mortality,
86
+ survival, and treatment statistics of cancers in digestive
87
+ organs—japanese cancer statistics 2024. *Annals of Gastroenterological
88
+ Surgery*, *8*(6), 958–965. <https://doi.org/10.1002/ags3.12835>
89
+
90
+ </div>
91
+
92
+ <div id="ref-inoue2023self" class="container csl-entry">
93
+
94
+ Inoue, Takao. (2023). A self-made tutorial for GitHub flavored markdown
95
+ (GFM), and its source codes. *ResearchGate*.
96
+ <https://www.researchgate.net/publication/370937551_A_self-made_tutorial_for_GitHub_Flavored_Markdown_GFM_and_its_source_codes>
97
+
98
+ </div>
99
+
100
+ <div id="ref-kato2021residential" class="container csl-entry">
101
+
102
+ Kato, Haruka, & Takizawa, Atsushi. (2021). Which residential clusters of
103
+ walkability affect future population from the perspective of real estate
104
+ prices in the osaka metropolitan area? *Sustainability*, *13*(23),
105
+ 13413. <https://doi.org/10.3390/su132313413>
106
+
107
+ </div>
108
+
109
+ <div id="ref-masui2021r.en" class="container csl-entry">
110
+
111
+ Masui, Toshikatsu. (2021). *R to python de manabu tokeigaku nyumon*.
112
+ Ohmsha.
113
+
114
+ </div>
115
+
116
+ <div id="ref-estat2016adaptor" class="container csl-entry">
117
+
118
+ National Statistics Center, Japan. (2016). *Chukan apuri*.
119
+ <https://github.com/e-stat-api/adaptor>.
120
+
121
+ </div>
122
+
123
+ <div id="ref-nishimura2017linked.en" class="container csl-entry">
124
+
125
+ Nishimura, Shoki. (2017). Providing statistical data by linked open data
126
+ (LOD): Innovative official statistical data (e-stat) dissemination.
127
+ *Joho Kanri*, *59*(12), 812–821.
128
+ <https://doi.org/10.1241/johokanri.59.812>
129
+
130
+ </div>
131
+
132
+ <div id="ref-seki2023social" class="container csl-entry">
133
+
134
+ Seki, Katsunori. (2023). Social identification and redistribution
135
+ preference: A survey experiment in japan. *Social Science Japan
136
+ Journal*, *26*(1), 47–60. <https://doi.org/10.1093/ssjj/jyac029>
137
+
138
+ </div>
139
+
140
+ <div id="ref-takahashi2022estat.en" class="container csl-entry">
141
+
142
+ Takahashi, Shūichiro. (2022). *E-stat to nakayokusuru hon: Python to
143
+ ōpun deta de nihon wo bunseki suru! API keiyu de seifu tōkei wo shutoku!
144
+ katsuyo!* Impress R&D.
145
+
146
+ </div>
147
+
148
+ <div id="ref-wakabayashi2015public.en" class="container csl-entry">
149
+
150
+ Wakabayashi, Chihiro, Shinmura, Hiromi, Ando, Miri, Shimada, Masako, &
151
+ Yanagawa, Hiroshi. (2015). Kōeisei topikksu dai 13 kai seifutōkei no
152
+ sōgōmadoguchi e-stat: Chiiki shindan he no katsuyō - jissen herusu
153
+ puromōshon. *Gekkan Chiiki Igaku*, *29*(2), 52.
154
+ <https://doi.org/10.60261/chiikiigaku.29.2_52>
155
+
156
+ </div>
157
+
158
+ <div id="ref-ashizawa2022estat.ja" class="container csl-entry">
159
+
160
+ 芦澤颯太, 松田純一, & 大曽根匡. (2022). E-stat
161
+ での統計データ検索におけるいくつかの課題抽出とその解決方法の提案.
162
+ *情報システム学会 全国大会論文集 ISSJ2022*, S1–C1.
163
+ <https://doi.org/10.19014/proceedingsissj.18.0_S1-C1>
164
+
165
+ </div>
166
+
167
+ <div id="ref-ashizawa2023estat.ja" class="container csl-entry">
168
+
169
+ 芦澤颯太, 松田純一, & 大曽根匡. (2023). E-stat
170
+ における検索漏れを抑止する情報システムの開発とその検証.
171
+ *情報システム学会 全国大会論文集 情報システム学会*, 1–6.
172
+ <https://doi.org/10.19014/proceedingsissj.19.0_P001>
173
+
174
+ </div>
175
+
176
+ <div id="ref-wakabayashi2015public.ja" class="container csl-entry">
177
+
178
+ 若林チヒロ, 新村洋未, 安藤実里, 嶋田雅子, & 柳川洋. (2015).
179
+ 公衆衛生トピックス 第 13 回 政府統計の総合窓口
180
+ e-stat-地域診断への活用-実践ヘルスプロモーション. *月刊地域医学*,
181
+ *29*(2), 52. <https://doi.org/10.60261/chiikiigaku.29.2_52>
182
+
183
+ </div>
184
+
185
+ <div id="ref-nishimura2017linked.ja" class="container csl-entry">
186
+
187
+ 西村正貴. (2017). Linked open data (LOD) による統計データの提供:
188
+ 政府統計データ (e-stat) の新しい形. *情報管理*, *59*(12), 812–821.
189
+ <https://doi.org/10.1241/johokanri.59.812>
190
+
191
+ </div>
192
+
193
+ </div>
@@ -0,0 +1,213 @@
1
+ ################################################################################
2
+ # Build Configuration
3
+ ################################################################################
4
+
5
+ [build-system]
6
+ build-backend = "hatchling.build"
7
+ requires = ["hatchling"]
8
+
9
+ ################################################################################
10
+ # Project Configuration
11
+ ################################################################################
12
+
13
+ [project]
14
+ name = "estatjp"
15
+ # You can chose to use dynamic versioning with hatch or static where you add it manually.
16
+ version = "v0.1.1"
17
+
18
+ description = "A Python package for accessing Japanese government data on its e-Stat portal"
19
+ authors = [
20
+ { name = "Alan Engel", email = "kijinosu@proton.me" },
21
+ ]
22
+ license = "MIT"
23
+ requires-python = ">= 3.14" # Adjust based on the minimum version of Python that you support
24
+ readme = {"file" = "README.md", "content-type" = "text/markdown"}
25
+ # readme = "README.rst"
26
+ # Please consult https://pypi.org/classifiers/ for a full list.
27
+ classifiers = [
28
+ "Development Status :: 2 - Pre-Alpha",
29
+ "Intended Audience :: Science/Research",
30
+ "License :: OSI Approved :: MIT License",
31
+ "Operating System :: OS Independent",
32
+ "Programming Language :: Python :: 3.14",
33
+ ]
34
+ # TODO: add keywords
35
+ keywords = []
36
+ # TODO: add dependencies
37
+ dependencies = [
38
+ "Sphinx>=3.5",
39
+ "docutils>=0.8,!=0.18.*,!=0.19.*",
40
+ "pybtex>=0.25",
41
+ "pybtex-docutils>=1.0.2",
42
+ "importlib_metadata>=3.6; python_version < '3.10'",
43
+ "pandas>=3.0.1",
44
+ ]
45
+ exclude_patterns = ["_build"]
46
+ bibtex_bibfiles = ["bibliography.bib"]
47
+
48
+ [project.urls]
49
+ Homepage = "https://github.com/kijinosu/estatjp"
50
+ "Source Code" = "https://github.com/kijinosu/estatjp"
51
+ "Bug Tracker" = "https://github.com/kijinosu/estatjp/issues"
52
+ Documentation = "https://github.com/kijinosu/estatjp/blob/main/README.md"
53
+ Download = "https://pypi.org/project/estatjp/#files"
54
+
55
+ [project.optional-dependencies]
56
+ # The groups below should be in the [development-groups] table
57
+ # They are here now because hatch hasn't released support for them but plans to
58
+ # in Mid November 2025.
59
+ dev = [
60
+ "hatch",
61
+ "pre-commit",
62
+ ]
63
+
64
+ docs = [
65
+ "nbsphinx",
66
+ "sphinx~=8.0",
67
+ "myst-parser>=4.0",
68
+ "pydata-sphinx-theme~=0.16",
69
+ "sphinx-autobuild>=2024.10.3",
70
+ "sphinx-autoapi>=3.6.0",
71
+ "sphinx_design>=0.6.1",
72
+ "sphinx-copybutton>=0.5.2",
73
+ "sphinxcontrib.bibtex>=2.6.5"
74
+ ]
75
+
76
+ build = [
77
+ "pip-audit",
78
+ "twine",
79
+ ]
80
+ tests = [
81
+ "pytest",
82
+ "pytest-cov",
83
+ "pytest-raises",
84
+ "pytest-randomly",
85
+ "pytest-xdist",
86
+ "sphinx-autoapi>=3.6.0",
87
+ ]
88
+
89
+
90
+ ################################################################################
91
+ # Tool Configuration
92
+ ################################################################################
93
+
94
+ # Hatch is building your package's wheel and sdist
95
+ # This tells hatch to only include Python packages (i.e., folders with __init__.py) in the build.
96
+ # read more about package building, here:
97
+ # https://www.pyopensci.org/python-package-guide/package-structure-code/python-package-distribution-files-sdist-wheel.html
98
+ [tool.hatch.build]
99
+ only-packages = true
100
+
101
+ # This tells Hatch to build the package from the src/ directory.
102
+ # Read more about src layouts here: https://www.pyopensci.org/python-package-guide/package-structure-code/python-package-structure.html
103
+ [tool.hatch.build.targets.wheel]
104
+ packages = ["src/estatjp"]
105
+
106
+
107
+
108
+ ######## Configure pytest for your test suite ########
109
+ [tool.pytest.ini_options]
110
+ testpaths = ["tests"] # Tells pytest what directory tests are in
111
+ markers = ["raises"] # Tells pytest to not raise a warning if you use @pytest.mark.raises
112
+
113
+ [tool.coverage.paths]
114
+ source = [
115
+ "src/estatjp",
116
+ "*/site-packages/estatjp",
117
+ ]
118
+
119
+ [tool.coverage.run]
120
+ # Ensures code coverage is measured for branches (conditional statements with different outcomes) in your code.
121
+ branch = true
122
+ parallel = true
123
+
124
+ [tool.coverage.report]
125
+ # This configures the output test coverage report
126
+ exclude_lines = ["pragma: no cover"]
127
+ precision = 2
128
+
129
+
130
+ # Use UV to create Hatch environments
131
+ [tool.hatch.envs.default]
132
+ installer = "uv"
133
+
134
+ ################################################################################
135
+ # Hatch Environments
136
+ ################################################################################
137
+
138
+ #--------------- Build and check your package ---------------#
139
+
140
+ # This table installs the tools you need to test and build your package
141
+ [tool.hatch.envs.build]
142
+ description = """Test the installation the package."""
143
+ dependencies = [
144
+ "pip",
145
+ "twine",
146
+ ]
147
+ features = [
148
+ "build",
149
+ ]
150
+ detached = true
151
+ builder = true
152
+
153
+ # This table installs created the command hatch run install:check which will build and check your package.
154
+ [tool.hatch.envs.build.scripts]
155
+ check = [
156
+ "pandoc -f bibtex -t rst bibliography.bib --citeproc --csl .pandoc/styles/apa-no-initials.csl -o .pandoc/bibliography.rst",
157
+ "pandoc -f rst -t gfm .pandoc/READMEbody.rst .pandoc/bibliography.rst -o README.md",
158
+ "pip check",
159
+ "hatch build {args:--clean}",
160
+ "twine check dist/*",
161
+ ]
162
+
163
+ #--------------- Run tests ---------------#
164
+ [tool.hatch.envs.test]
165
+ description = """Run the test suite."""
166
+ features = [
167
+ "tests",
168
+ ]
169
+
170
+ [[tool.hatch.envs.test.matrix]]
171
+ python = ["3.12", "3.13"]
172
+
173
+ [tool.hatch.envs.test.scripts]
174
+ run = "pytest {args:--cov=estatjp --cov-report=term-missing --cov-report=xml}"
175
+
176
+ #--------------- Build and preview your documentation ---------------#
177
+
178
+ # This sets up a hatch environment with associated dependencies that need to be installed
179
+ [tool.hatch.envs.docs]
180
+ description = """Build or serve the documentation."""
181
+ # Install optional dependency test for docs
182
+ features = [
183
+ "docs",
184
+ ]
185
+
186
+ # This table contains the scripts that you can use to build and serve your docs
187
+ # hatch run docs:build will build your documentation
188
+ # hatch run docs:serve will serve them 'live' on your computer locally
189
+ [tool.hatch.envs.docs.scripts]
190
+ build = ["sphinx-build {args:-W -b html docs docs/_build}"]
191
+ serve = ["sphinx-autobuild docs --watch src/estatjp {args:-b html docs/_build/serve}"]
192
+
193
+
194
+
195
+ #--------------- Check security for your dependencies ---------------#
196
+
197
+ [tool.hatch.envs.audit]
198
+ description = """Check dependencies for security vulnerabilities."""
199
+ features = [
200
+ "build",
201
+ ]
202
+
203
+ [tool.hatch.envs.audit.scripts]
204
+ check = ["pip-audit"]
205
+
206
+ #----------------sphinxcontrib-bibtex--------------------------------#
207
+ last = "sphinxcontrib.bibtex.style.names.last:LastNameStyle"
208
+
209
+ [project.entry-points."sphinxcontrib.bibtex.style.referencing"]
210
+ author_year = "sphinxcontrib.bibtex.style.referencing.author_year:AuthorYearReferenceStyle"
211
+ foot = "sphinxcontrib.bibtex.style.referencing.foot:FootReferenceStyle"
212
+ label = "sphinxcontrib.bibtex.style.referencing.label:LabelReferenceStyle"
213
+ super = "sphinxcontrib.bibtex.style.referencing.super_:SuperReferenceStyle"
@@ -0,0 +1,30 @@
1
+ # MIT License
2
+ #
3
+ # Copyright (c) 2026 Alan Engel
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice (including the next
13
+ # paragraph) shall be included in all copies or substantial portions of the
14
+ # Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ # SOFTWARE.
23
+
24
+ """
25
+ The `E-Stat`_ portal site contains 'databases' that can be accessed via API urls or downloads as XML, CSV and JSON files, and files (mainly Excel files) that can be downloaded. This package will eventually provide for converting these into `pandas DataFrames`_. This early release handles only API url calls with CSV downloads.
26
+
27
+ .. _E-Stat: https://www.e-stat.go.jp/en
28
+ .. _pandas DataFrames: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html
29
+
30
+ """
@@ -0,0 +1,94 @@
1
+ """A module for accessing e-Stat data using its API.
2
+
3
+ The API provides data in CSV, JSON and XML formats. This version provides for the CSV format only.
4
+
5
+ The main task is to request and parse a CSV stream to produce a `pandas.DataFrame` object. The `pandas.read_csv()` cannot be used as-is because CSV streams from e-Stat start with a header of metadata which confuses pandas. For more detail see development notes as chronicled in Read the Docs pages [DevAPI01.ipynb](https://estatpy.readthedocs.io/en/latest/chronicle/DevAPI01.html) and [DevAPI02.ipynb](https://estatpy.readthedocs.io/en/latest/chronicle/DevAPI02.html).
6
+
7
+
8
+
9
+ """
10
+ import pandas as pd
11
+ import os
12
+ import requests
13
+ import tempfile
14
+ import re
15
+ import datetime
16
+ from dotenv import load_dotenv
17
+ import os
18
+
19
+ def get_csv_data(url, description = datetime.datetime.now()):
20
+ """Retrieve a CSV stream from e-Stat using an API url and create a pandas.DataFrame.
21
+
22
+ :param url: An API url obtained from e-Stat, for example, the [2020-base consumer price index](https://www.e-stat.go.jp/en/stat-search/database?page=1&layout=datalist&toukei=00200573&tstat=000001150147&cycle=0&tclass1val=0)
23
+
24
+ :param description: An optional object that the user can supply to help document her search. The default is the time of running this function.
25
+
26
+ :return: Dictionary containing the Header in the form of a pandas.DataFrame, the Main table also in the form of a pandas.DataFrame, and the Description.
27
+
28
+ """
29
+ try:
30
+ load_dotenv()
31
+ except (FileNotFoundError,IOError) as e:
32
+ e.add_note('Environment variable file (.env) not found. See README.')
33
+ raise
34
+
35
+ try:
36
+ app_id = os.environ['ESTAT_APP_ID']
37
+ except KeyError as e:
38
+ e.add_note('Environment variable ESTAT_APP_ID not found. See README.')
39
+ raise
40
+
41
+ if app_id == None:
42
+ raise OSError("Value of environment variable 'ESTAT_APP_ID' not found. See README.")
43
+
44
+ url_split = url.split("appId=")
45
+ if len(url_split) != 2:
46
+ raise Exception("Invalid API url")
47
+ url = url_split[0] + "appId=" + app_id + url_split[1]
48
+
49
+ # the csv has several rows of metadata terminated by a row starting with "VALUE".
50
+ # The data table starts on the next row.
51
+ # Put the metadata in a temporary file.
52
+ result = {}
53
+ try:
54
+ with requests.get(url,stream=False) as estatresponse: # chunking in iter_lines doesn't work for stream=True
55
+ estatresponse.raise_for_status()
56
+
57
+ if estatresponse.encoding is None:
58
+ estatresponse.encoding = 'utf-8'
59
+ estatlines = estatresponse.iter_lines(chunk_size=1024, decode_unicode=True)
60
+ with tempfile.NamedTemporaryFile(mode='w',delete_on_close=False,encoding = 'utf-8') as fheader:
61
+ with tempfile.NamedTemporaryFile(mode='w',delete_on_close=False,encoding = 'utf-8') as fp:
62
+ inheader = True
63
+ colnum = 0
64
+ for line in estatlines:
65
+ if inheader == True:
66
+ #count columns
67
+ fields = re.split('","',line)
68
+ if len(fields) > colnum :
69
+ colnum = len(fields)
70
+ fheader.write(line)
71
+ fheader.write("\n")
72
+ if( line.startswith('"VALUE"')):
73
+ inheader = False
74
+ fheader.flush()
75
+ fheader.seek(0)
76
+ else:
77
+ fp.write(line)
78
+ fp.write("\n")
79
+ fheader.close()
80
+ fp.close()
81
+ if inheader == True:
82
+ errmsg = "The stream that e-Stat returned lacks a 'VALUE' line. See temp file: " + fheader.name
83
+ raise Exception(errmsg)
84
+ dfHeader = pd.read_csv(fheader.name, names = range(colnum))
85
+ dfHeader = dfHeader.dropna(axis=1, how = "all")
86
+ dfMain = pd.read_csv(fp.name)
87
+ result['Description'] = description
88
+ result['Header'] = dfHeader
89
+ result['Main'] = dfMain
90
+
91
+ except requests.RequestException as e:
92
+ raise
93
+
94
+ return result