pyistat 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyistat-0.1.0/LICENSE +373 -0
- pyistat-0.1.0/PKG-INFO +85 -0
- pyistat-0.1.0/README.md +67 -0
- pyistat-0.1.0/pyistat/__init__.py +11 -0
- pyistat-0.1.0/pyistat/errors.py +77 -0
- pyistat-0.1.0/pyistat/get.py +221 -0
- pyistat-0.1.0/pyistat/search.py +205 -0
- pyistat-0.1.0/pyproject.toml +20 -0
pyistat-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
Mozilla Public License Version 2.0
|
|
2
|
+
==================================
|
|
3
|
+
|
|
4
|
+
1. Definitions
|
|
5
|
+
--------------
|
|
6
|
+
|
|
7
|
+
1.1. "Contributor"
|
|
8
|
+
means each individual or legal entity that creates, contributes to
|
|
9
|
+
the creation of, or owns Covered Software.
|
|
10
|
+
|
|
11
|
+
1.2. "Contributor Version"
|
|
12
|
+
means the combination of the Contributions of others (if any) used
|
|
13
|
+
by a Contributor and that particular Contributor's Contribution.
|
|
14
|
+
|
|
15
|
+
1.3. "Contribution"
|
|
16
|
+
means Covered Software of a particular Contributor.
|
|
17
|
+
|
|
18
|
+
1.4. "Covered Software"
|
|
19
|
+
means Source Code Form to which the initial Contributor has attached
|
|
20
|
+
the notice in Exhibit A, the Executable Form of such Source Code
|
|
21
|
+
Form, and Modifications of such Source Code Form, in each case
|
|
22
|
+
including portions thereof.
|
|
23
|
+
|
|
24
|
+
1.5. "Incompatible With Secondary Licenses"
|
|
25
|
+
means
|
|
26
|
+
|
|
27
|
+
(a) that the initial Contributor has attached the notice described
|
|
28
|
+
in Exhibit B to the Covered Software; or
|
|
29
|
+
|
|
30
|
+
(b) that the Covered Software was made available under the terms of
|
|
31
|
+
version 1.1 or earlier of the License, but not also under the
|
|
32
|
+
terms of a Secondary License.
|
|
33
|
+
|
|
34
|
+
1.6. "Executable Form"
|
|
35
|
+
means any form of the work other than Source Code Form.
|
|
36
|
+
|
|
37
|
+
1.7. "Larger Work"
|
|
38
|
+
means a work that combines Covered Software with other material, in
|
|
39
|
+
a separate file or files, that is not Covered Software.
|
|
40
|
+
|
|
41
|
+
1.8. "License"
|
|
42
|
+
means this document.
|
|
43
|
+
|
|
44
|
+
1.9. "Licensable"
|
|
45
|
+
means having the right to grant, to the maximum extent possible,
|
|
46
|
+
whether at the time of the initial grant or subsequently, any and
|
|
47
|
+
all of the rights conveyed by this License.
|
|
48
|
+
|
|
49
|
+
1.10. "Modifications"
|
|
50
|
+
means any of the following:
|
|
51
|
+
|
|
52
|
+
(a) any file in Source Code Form that results from an addition to,
|
|
53
|
+
deletion from, or modification of the contents of Covered
|
|
54
|
+
Software; or
|
|
55
|
+
|
|
56
|
+
(b) any new file in Source Code Form that contains any Covered
|
|
57
|
+
Software.
|
|
58
|
+
|
|
59
|
+
1.11. "Patent Claims" of a Contributor
|
|
60
|
+
means any patent claim(s), including without limitation, method,
|
|
61
|
+
process, and apparatus claims, in any patent Licensable by such
|
|
62
|
+
Contributor that would be infringed, but for the grant of the
|
|
63
|
+
License, by the making, using, selling, offering for sale, having
|
|
64
|
+
made, import, or transfer of either its Contributions or its
|
|
65
|
+
Contributor Version.
|
|
66
|
+
|
|
67
|
+
1.12. "Secondary License"
|
|
68
|
+
means either the GNU General Public License, Version 2.0, the GNU
|
|
69
|
+
Lesser General Public License, Version 2.1, the GNU Affero General
|
|
70
|
+
Public License, Version 3.0, or any later versions of those
|
|
71
|
+
licenses.
|
|
72
|
+
|
|
73
|
+
1.13. "Source Code Form"
|
|
74
|
+
means the form of the work preferred for making modifications.
|
|
75
|
+
|
|
76
|
+
1.14. "You" (or "Your")
|
|
77
|
+
means an individual or a legal entity exercising rights under this
|
|
78
|
+
License. For legal entities, "You" includes any entity that
|
|
79
|
+
controls, is controlled by, or is under common control with You. For
|
|
80
|
+
purposes of this definition, "control" means (a) the power, direct
|
|
81
|
+
or indirect, to cause the direction or management of such entity,
|
|
82
|
+
whether by contract or otherwise, or (b) ownership of more than
|
|
83
|
+
fifty percent (50%) of the outstanding shares or beneficial
|
|
84
|
+
ownership of such entity.
|
|
85
|
+
|
|
86
|
+
2. License Grants and Conditions
|
|
87
|
+
--------------------------------
|
|
88
|
+
|
|
89
|
+
2.1. Grants
|
|
90
|
+
|
|
91
|
+
Each Contributor hereby grants You a world-wide, royalty-free,
|
|
92
|
+
non-exclusive license:
|
|
93
|
+
|
|
94
|
+
(a) under intellectual property rights (other than patent or trademark)
|
|
95
|
+
Licensable by such Contributor to use, reproduce, make available,
|
|
96
|
+
modify, display, perform, distribute, and otherwise exploit its
|
|
97
|
+
Contributions, either on an unmodified basis, with Modifications, or
|
|
98
|
+
as part of a Larger Work; and
|
|
99
|
+
|
|
100
|
+
(b) under Patent Claims of such Contributor to make, use, sell, offer
|
|
101
|
+
for sale, have made, import, and otherwise transfer either its
|
|
102
|
+
Contributions or its Contributor Version.
|
|
103
|
+
|
|
104
|
+
2.2. Effective Date
|
|
105
|
+
|
|
106
|
+
The licenses granted in Section 2.1 with respect to any Contribution
|
|
107
|
+
become effective for each Contribution on the date the Contributor first
|
|
108
|
+
distributes such Contribution.
|
|
109
|
+
|
|
110
|
+
2.3. Limitations on Grant Scope
|
|
111
|
+
|
|
112
|
+
The licenses granted in this Section 2 are the only rights granted under
|
|
113
|
+
this License. No additional rights or licenses will be implied from the
|
|
114
|
+
distribution or licensing of Covered Software under this License.
|
|
115
|
+
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
|
116
|
+
Contributor:
|
|
117
|
+
|
|
118
|
+
(a) for any code that a Contributor has removed from Covered Software;
|
|
119
|
+
or
|
|
120
|
+
|
|
121
|
+
(b) for infringements caused by: (i) Your and any other third party's
|
|
122
|
+
modifications of Covered Software, or (ii) the combination of its
|
|
123
|
+
Contributions with other software (except as part of its Contributor
|
|
124
|
+
Version); or
|
|
125
|
+
|
|
126
|
+
(c) under Patent Claims infringed by Covered Software in the absence of
|
|
127
|
+
its Contributions.
|
|
128
|
+
|
|
129
|
+
This License does not grant any rights in the trademarks, service marks,
|
|
130
|
+
or logos of any Contributor (except as may be necessary to comply with
|
|
131
|
+
the notice requirements in Section 3.4).
|
|
132
|
+
|
|
133
|
+
2.4. Subsequent Licenses
|
|
134
|
+
|
|
135
|
+
No Contributor makes additional grants as a result of Your choice to
|
|
136
|
+
distribute the Covered Software under a subsequent version of this
|
|
137
|
+
License (see Section 10.2) or under the terms of a Secondary License (if
|
|
138
|
+
permitted under the terms of Section 3.3).
|
|
139
|
+
|
|
140
|
+
2.5. Representation
|
|
141
|
+
|
|
142
|
+
Each Contributor represents that the Contributor believes its
|
|
143
|
+
Contributions are its original creation(s) or it has sufficient rights
|
|
144
|
+
to grant the rights to its Contributions conveyed by this License.
|
|
145
|
+
|
|
146
|
+
2.6. Fair Use
|
|
147
|
+
|
|
148
|
+
This License is not intended to limit any rights You have under
|
|
149
|
+
applicable copyright doctrines of fair use, fair dealing, or other
|
|
150
|
+
equivalents.
|
|
151
|
+
|
|
152
|
+
2.7. Conditions
|
|
153
|
+
|
|
154
|
+
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
|
|
155
|
+
in Section 2.1.
|
|
156
|
+
|
|
157
|
+
3. Responsibilities
|
|
158
|
+
-------------------
|
|
159
|
+
|
|
160
|
+
3.1. Distribution of Source Form
|
|
161
|
+
|
|
162
|
+
All distribution of Covered Software in Source Code Form, including any
|
|
163
|
+
Modifications that You create or to which You contribute, must be under
|
|
164
|
+
the terms of this License. You must inform recipients that the Source
|
|
165
|
+
Code Form of the Covered Software is governed by the terms of this
|
|
166
|
+
License, and how they can obtain a copy of this License. You may not
|
|
167
|
+
attempt to alter or restrict the recipients' rights in the Source Code
|
|
168
|
+
Form.
|
|
169
|
+
|
|
170
|
+
3.2. Distribution of Executable Form
|
|
171
|
+
|
|
172
|
+
If You distribute Covered Software in Executable Form then:
|
|
173
|
+
|
|
174
|
+
(a) such Covered Software must also be made available in Source Code
|
|
175
|
+
Form, as described in Section 3.1, and You must inform recipients of
|
|
176
|
+
the Executable Form how they can obtain a copy of such Source Code
|
|
177
|
+
Form by reasonable means in a timely manner, at a charge no more
|
|
178
|
+
than the cost of distribution to the recipient; and
|
|
179
|
+
|
|
180
|
+
(b) You may distribute such Executable Form under the terms of this
|
|
181
|
+
License, or sublicense it under different terms, provided that the
|
|
182
|
+
license for the Executable Form does not attempt to limit or alter
|
|
183
|
+
the recipients' rights in the Source Code Form under this License.
|
|
184
|
+
|
|
185
|
+
3.3. Distribution of a Larger Work
|
|
186
|
+
|
|
187
|
+
You may create and distribute a Larger Work under terms of Your choice,
|
|
188
|
+
provided that You also comply with the requirements of this License for
|
|
189
|
+
the Covered Software. If the Larger Work is a combination of Covered
|
|
190
|
+
Software with a work governed by one or more Secondary Licenses, and the
|
|
191
|
+
Covered Software is not Incompatible With Secondary Licenses, this
|
|
192
|
+
License permits You to additionally distribute such Covered Software
|
|
193
|
+
under the terms of such Secondary License(s), so that the recipient of
|
|
194
|
+
the Larger Work may, at their option, further distribute the Covered
|
|
195
|
+
Software under the terms of either this License or such Secondary
|
|
196
|
+
License(s).
|
|
197
|
+
|
|
198
|
+
3.4. Notices
|
|
199
|
+
|
|
200
|
+
You may not remove or alter the substance of any license notices
|
|
201
|
+
(including copyright notices, patent notices, disclaimers of warranty,
|
|
202
|
+
or limitations of liability) contained within the Source Code Form of
|
|
203
|
+
the Covered Software, except that You may alter any license notices to
|
|
204
|
+
the extent required to remedy known factual inaccuracies.
|
|
205
|
+
|
|
206
|
+
3.5. Application of Additional Terms
|
|
207
|
+
|
|
208
|
+
You may choose to offer, and to charge a fee for, warranty, support,
|
|
209
|
+
indemnity or liability obligations to one or more recipients of Covered
|
|
210
|
+
Software. However, You may do so only on Your own behalf, and not on
|
|
211
|
+
behalf of any Contributor. You must make it absolutely clear that any
|
|
212
|
+
such warranty, support, indemnity, or liability obligation is offered by
|
|
213
|
+
You alone, and You hereby agree to indemnify every Contributor for any
|
|
214
|
+
liability incurred by such Contributor as a result of warranty, support,
|
|
215
|
+
indemnity or liability terms You offer. You may include additional
|
|
216
|
+
disclaimers of warranty and limitations of liability specific to any
|
|
217
|
+
jurisdiction.
|
|
218
|
+
|
|
219
|
+
4. Inability to Comply Due to Statute or Regulation
|
|
220
|
+
---------------------------------------------------
|
|
221
|
+
|
|
222
|
+
If it is impossible for You to comply with any of the terms of this
|
|
223
|
+
License with respect to some or all of the Covered Software due to
|
|
224
|
+
statute, judicial order, or regulation then You must: (a) comply with
|
|
225
|
+
the terms of this License to the maximum extent possible; and (b)
|
|
226
|
+
describe the limitations and the code they affect. Such description must
|
|
227
|
+
be placed in a text file included with all distributions of the Covered
|
|
228
|
+
Software under this License. Except to the extent prohibited by statute
|
|
229
|
+
or regulation, such description must be sufficiently detailed for a
|
|
230
|
+
recipient of ordinary skill to be able to understand it.
|
|
231
|
+
|
|
232
|
+
5. Termination
|
|
233
|
+
--------------
|
|
234
|
+
|
|
235
|
+
5.1. The rights granted under this License will terminate automatically
|
|
236
|
+
if You fail to comply with any of its terms. However, if You become
|
|
237
|
+
compliant, then the rights granted under this License from a particular
|
|
238
|
+
Contributor are reinstated (a) provisionally, unless and until such
|
|
239
|
+
Contributor explicitly and finally terminates Your grants, and (b) on an
|
|
240
|
+
ongoing basis, if such Contributor fails to notify You of the
|
|
241
|
+
non-compliance by some reasonable means prior to 60 days after You have
|
|
242
|
+
come back into compliance. Moreover, Your grants from a particular
|
|
243
|
+
Contributor are reinstated on an ongoing basis if such Contributor
|
|
244
|
+
notifies You of the non-compliance by some reasonable means, this is the
|
|
245
|
+
first time You have received notice of non-compliance with this License
|
|
246
|
+
from such Contributor, and You become compliant prior to 30 days after
|
|
247
|
+
Your receipt of the notice.
|
|
248
|
+
|
|
249
|
+
5.2. If You initiate litigation against any entity by asserting a patent
|
|
250
|
+
infringement claim (excluding declaratory judgment actions,
|
|
251
|
+
counter-claims, and cross-claims) alleging that a Contributor Version
|
|
252
|
+
directly or indirectly infringes any patent, then the rights granted to
|
|
253
|
+
You by any and all Contributors for the Covered Software under Section
|
|
254
|
+
2.1 of this License shall terminate.
|
|
255
|
+
|
|
256
|
+
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
|
|
257
|
+
end user license agreements (excluding distributors and resellers) which
|
|
258
|
+
have been validly granted by You or Your distributors under this License
|
|
259
|
+
prior to termination shall survive termination.
|
|
260
|
+
|
|
261
|
+
************************************************************************
|
|
262
|
+
* *
|
|
263
|
+
* 6. Disclaimer of Warranty *
|
|
264
|
+
* ------------------------- *
|
|
265
|
+
* *
|
|
266
|
+
* Covered Software is provided under this License on an "as is" *
|
|
267
|
+
* basis, without warranty of any kind, either expressed, implied, or *
|
|
268
|
+
* statutory, including, without limitation, warranties that the *
|
|
269
|
+
* Covered Software is free of defects, merchantable, fit for a *
|
|
270
|
+
* particular purpose or non-infringing. The entire risk as to the *
|
|
271
|
+
* quality and performance of the Covered Software is with You. *
|
|
272
|
+
* Should any Covered Software prove defective in any respect, You *
|
|
273
|
+
* (not any Contributor) assume the cost of any necessary servicing, *
|
|
274
|
+
* repair, or correction. This disclaimer of warranty constitutes an *
|
|
275
|
+
* essential part of this License. No use of any Covered Software is *
|
|
276
|
+
* authorized under this License except under this disclaimer. *
|
|
277
|
+
* *
|
|
278
|
+
************************************************************************
|
|
279
|
+
|
|
280
|
+
************************************************************************
|
|
281
|
+
* *
|
|
282
|
+
* 7. Limitation of Liability *
|
|
283
|
+
* -------------------------- *
|
|
284
|
+
* *
|
|
285
|
+
* Under no circumstances and under no legal theory, whether tort *
|
|
286
|
+
* (including negligence), contract, or otherwise, shall any *
|
|
287
|
+
* Contributor, or anyone who distributes Covered Software as *
|
|
288
|
+
* permitted above, be liable to You for any direct, indirect, *
|
|
289
|
+
* special, incidental, or consequential damages of any character *
|
|
290
|
+
* including, without limitation, damages for lost profits, loss of *
|
|
291
|
+
* goodwill, work stoppage, computer failure or malfunction, or any *
|
|
292
|
+
* and all other commercial damages or losses, even if such party *
|
|
293
|
+
* shall have been informed of the possibility of such damages. This *
|
|
294
|
+
* limitation of liability shall not apply to liability for death or *
|
|
295
|
+
* personal injury resulting from such party's negligence to the *
|
|
296
|
+
* extent applicable law prohibits such limitation. Some *
|
|
297
|
+
* jurisdictions do not allow the exclusion or limitation of *
|
|
298
|
+
* incidental or consequential damages, so this exclusion and *
|
|
299
|
+
* limitation may not apply to You. *
|
|
300
|
+
* *
|
|
301
|
+
************************************************************************
|
|
302
|
+
|
|
303
|
+
8. Litigation
|
|
304
|
+
-------------
|
|
305
|
+
|
|
306
|
+
Any litigation relating to this License may be brought only in the
|
|
307
|
+
courts of a jurisdiction where the defendant maintains its principal
|
|
308
|
+
place of business and such litigation shall be governed by laws of that
|
|
309
|
+
jurisdiction, without reference to its conflict-of-law provisions.
|
|
310
|
+
Nothing in this Section shall prevent a party's ability to bring
|
|
311
|
+
cross-claims or counter-claims.
|
|
312
|
+
|
|
313
|
+
9. Miscellaneous
|
|
314
|
+
----------------
|
|
315
|
+
|
|
316
|
+
This License represents the complete agreement concerning the subject
|
|
317
|
+
matter hereof. If any provision of this License is held to be
|
|
318
|
+
unenforceable, such provision shall be reformed only to the extent
|
|
319
|
+
necessary to make it enforceable. Any law or regulation which provides
|
|
320
|
+
that the language of a contract shall be construed against the drafter
|
|
321
|
+
shall not be used to construe this License against a Contributor.
|
|
322
|
+
|
|
323
|
+
10. Versions of the License
|
|
324
|
+
---------------------------
|
|
325
|
+
|
|
326
|
+
10.1. New Versions
|
|
327
|
+
|
|
328
|
+
Mozilla Foundation is the license steward. Except as provided in Section
|
|
329
|
+
10.3, no one other than the license steward has the right to modify or
|
|
330
|
+
publish new versions of this License. Each version will be given a
|
|
331
|
+
distinguishing version number.
|
|
332
|
+
|
|
333
|
+
10.2. Effect of New Versions
|
|
334
|
+
|
|
335
|
+
You may distribute the Covered Software under the terms of the version
|
|
336
|
+
of the License under which You originally received the Covered Software,
|
|
337
|
+
or under the terms of any subsequent version published by the license
|
|
338
|
+
steward.
|
|
339
|
+
|
|
340
|
+
10.3. Modified Versions
|
|
341
|
+
|
|
342
|
+
If you create software not governed by this License, and you want to
|
|
343
|
+
create a new license for such software, you may create and use a
|
|
344
|
+
modified version of this License if you rename the license and remove
|
|
345
|
+
any references to the name of the license steward (except to note that
|
|
346
|
+
such modified license differs from this License).
|
|
347
|
+
|
|
348
|
+
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
|
349
|
+
Licenses
|
|
350
|
+
|
|
351
|
+
If You choose to distribute Source Code Form that is Incompatible With
|
|
352
|
+
Secondary Licenses under the terms of this version of the License, the
|
|
353
|
+
notice described in Exhibit B of this License must be attached.
|
|
354
|
+
|
|
355
|
+
Exhibit A - Source Code Form License Notice
|
|
356
|
+
-------------------------------------------
|
|
357
|
+
|
|
358
|
+
This Source Code Form is subject to the terms of the Mozilla Public
|
|
359
|
+
License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
360
|
+
file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
361
|
+
|
|
362
|
+
If it is not possible or desirable to put the notice in a particular
|
|
363
|
+
file, then You may include the notice in a location (such as a LICENSE
|
|
364
|
+
file in a relevant directory) where a recipient would be likely to look
|
|
365
|
+
for such a notice.
|
|
366
|
+
|
|
367
|
+
You may add additional accurate notices of copyright ownership.
|
|
368
|
+
|
|
369
|
+
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
|
370
|
+
---------------------------------------------------------
|
|
371
|
+
|
|
372
|
+
This Source Code Form is "Incompatible With Secondary Licenses", as
|
|
373
|
+
defined by the Mozilla Public License, v. 2.0.
|
pyistat-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: pyistat
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pyistat is a friendly module made to easily allow anyone to use Python to search and get datasets from ISTAT APIs. There are two modules: the "search" module is used to find datasets and gives all the information needed to build a request URL. The "get" module is used to get data after helping you properly setup the dimensions (the keys, as called by ISTAT). This module was created because I found the lack of documentation by ISTAT frustrating.
|
|
5
|
+
License: MPL-2.0
|
|
6
|
+
Author: Cosimo Di Martino
|
|
7
|
+
Author-email: derto.dimartino@gmail.com
|
|
8
|
+
Requires-Python: >=3.12
|
|
9
|
+
Classifier: License :: OSI Approved
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Requires-Dist: datetime (>=5.5,<6.0)
|
|
14
|
+
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
15
|
+
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# pyistat
|
|
19
|
+
|
|
20
|
+
# PyIstat: easy ISTAT APIs requests
|
|
21
|
+
|
|
22
|
+
Documentation for ISTAT APIs is non-existent and this is a shame. After much grief I created a simple module that allows analysts to search and extract data from their APIs without relying on the outdated information that can be found on the Internet.
|
|
23
|
+
|
|
24
|
+
## How does it work?
|
|
25
|
+
|
|
26
|
+
PyIstat has two modules: search and get.
|
|
27
|
+
|
|
28
|
+
### The search module
|
|
29
|
+
|
|
30
|
+
With the search module, you can easily request all the dataflows together with their structure. If you are looking for all dataflows, simply use get_dataflows().
|
|
31
|
+
|
|
32
|
+
```from pyistat import search
|
|
33
|
+
import pandas as pd
|
|
34
|
+
|
|
35
|
+
df = get_dataflows()
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
With this code, you'll have a DataFrame with every dataflow available on the ISTAT API. However, if you are looking for a specific dataset, you can use the search_dataflows function.
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
search_term = ["Gross margin", "Energy"]
|
|
42
|
+
df = search_dataflows(search_term, mode="fast", lang="en", returned="dataframe")
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
The DataFrame returned will be populated with all the datasets found with those terms in their name. If you want to see what dimensions (keys) and dimension values are available, you can set mode="deep". This will return an additional column with a human-readable set of keys and key values. You can also set the language to lang="it", or you can choose to obtain a .csv file.
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
search_term = ["Gross margin", "Energy"]
|
|
49
|
+
search_dataflows(search_term, mode="deep", lang="it", returned="csv"
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### The get module
|
|
53
|
+
|
|
54
|
+
After finding the datasets you are most interested in, it's time to get that data from ISTAT APIs. First of all, you can check the dimensions and their ordering by using get_dimensions.
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
dimensions_df = get_dimensions(dataflow_id)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
This will return all the dimensions and their meaning in a readable DataFrame (use Spyder or another IDE with a variable explorer to make it even easier to read). The order of the dimensions will also be displayed, in case you want to pass a list with the dimensions. If you do not want to pass a list, you can pass dimensions as arguments of the function.
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
# Either pass a list with the ordered dimensions...
|
|
64
|
+
dimensions = ["Q", "W", "", "", "", ""] # Make sure to leave the unwanted dimensions with "".
|
|
65
|
+
pil_df = get_data("163_156_DF_DCCN_SQCQ_3", dimensions, start_period=2020)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Or use kwargs...
|
|
69
|
+
pil_df = get_data("163_156_DF_DCCN_SQCQ_3", end_period=2024, updated_after=2023, freq="Q", correz="W", returned="csv")
|
|
70
|
+
|
|
71
|
+
# Or simply get the full data available.
|
|
72
|
+
pil_df = get_data("163_156_DF_DCCN_SQCQ_3")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
There is an additional variable you can pass to the get_data function, which is force_url=True. Normally, the function checks whether the number of dimensions assigned is the same as the dimensions the dataflow requires, and whether the dimension values you provide are consistent with those of the dataflow. However, for unknown reasons, sometimes the number of dimension found in the structure XML is different from what the dataflow actually requires... In this case, if you are confident the URL is correct (maybe try it in the browser first), you can pass force_url=True to skip the controls.
|
|
76
|
+
|
|
77
|
+
### To do
|
|
78
|
+
|
|
79
|
+
I made this module as I found the lack of documentation from ISTAT regarding their API access incredibly frustrating. I needed a quick way to get the data from their APIs in order to improve my data pipeline. However, this code needs some refining still; as of now, it works, but it can be more efficient.
|
|
80
|
+
|
|
81
|
+
If it gains traction I'd be more than happy to fix it wherever there is the need of fixing.
|
|
82
|
+
|
|
83
|
+
To do: a .exe that is system-and language-agnostic.
|
|
84
|
+
Fix inefficiencies in the code.
|
|
85
|
+
|
pyistat-0.1.0/README.md
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# pyistat
|
|
2
|
+
|
|
3
|
+
# PyIstat: easy ISTAT APIs requests
|
|
4
|
+
|
|
5
|
+
Documentation for ISTAT APIs is non-existent and this is a shame. After much grief I created a simple module that allows analysts to search and extract data from their APIs without relying on the outdated information that can be found on the Internet.
|
|
6
|
+
|
|
7
|
+
## How does it work?
|
|
8
|
+
|
|
9
|
+
PyIstat has two modules: search and get.
|
|
10
|
+
|
|
11
|
+
### The search module
|
|
12
|
+
|
|
13
|
+
With the search module, you can easily request all the dataflows together with their structure. If you are looking for all dataflows, simply use get_dataflows().
|
|
14
|
+
|
|
15
|
+
```from pyistat import search
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
df = get_dataflows()
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
With this code, you'll have a DataFrame with every dataflow available on the ISTAT API. However, if you are looking for a specific dataset, you can use the search_dataflows function.
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
search_term = ["Gross margin", "Energy"]
|
|
25
|
+
df = search_dataflows(search_term, mode="fast", lang="en", returned="dataframe")
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
The DataFrame returned will be populated with all the datasets found with those terms in their name. If you want to see what dimensions (keys) and dimension values are available, you can set mode="deep". This will return an additional column with a human-readable set of keys and key values. You can also set the language to lang="it", or you can choose to obtain a .csv file.
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
search_term = ["Gross margin", "Energy"]
|
|
32
|
+
search_dataflows(search_term, mode="deep", lang="it", returned="csv"
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### The get module
|
|
36
|
+
|
|
37
|
+
After finding the datasets you are most interested in, it's time to get that data from ISTAT APIs. First of all, you can check the dimensions and their ordering by using get_dimensions.
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
dimensions_df = get_dimensions(dataflow_id)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
This will return all the dimensions and their meaning in a readable DataFrame (use Spyder or another IDE with a variable explorer to make it even easier to read). The order of the dimensions will also be displayed, in case you want to pass a list with the dimensions. If you do not want to pass a list, you can pass dimensions as arguments of the function.
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
# Either pass a list with the ordered dimensions...
|
|
47
|
+
dimensions = ["Q", "W", "", "", "", ""] # Make sure to leave the unwanted dimensions with "".
|
|
48
|
+
pil_df = get_data("163_156_DF_DCCN_SQCQ_3", dimensions, start_period=2020)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Or use kwargs...
|
|
52
|
+
pil_df = get_data("163_156_DF_DCCN_SQCQ_3", end_period=2024, updated_after=2023, freq="Q", correz="W", returned="csv")
|
|
53
|
+
|
|
54
|
+
# Or simply get the full data available.
|
|
55
|
+
pil_df = get_data("163_156_DF_DCCN_SQCQ_3")
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
There is an additional variable you can pass to the get_data function, which is force_url=True. Normally, the function checks whether the number of dimensions assigned is the same as the dimensions the dataflow requires, and whether the dimension values you provide are consistent with those of the dataflow. However, for unknown reasons, sometimes the number of dimension found in the structure XML is different from what the dataflow actually requires... In this case, if you are confident the URL is correct (maybe try it in the browser first), you can pass force_url=True to skip the controls.
|
|
59
|
+
|
|
60
|
+
### To do
|
|
61
|
+
|
|
62
|
+
I made this module as I found the lack of documentation from ISTAT regarding their API access incredibly frustrating. I needed a quick way to get the data from their APIs in order to improve my data pipeline. However, this code needs some refining still; as of now, it works, but it can be more efficient.
|
|
63
|
+
|
|
64
|
+
If it gains traction I'd be more than happy to fix it wherever there is the need of fixing.
|
|
65
|
+
|
|
66
|
+
To do: a .exe that is system-and language-agnostic.
|
|
67
|
+
Fix inefficiencies in the code.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Created on Fri May 30 14:39:59 2025
|
|
4
|
+
|
|
5
|
+
@author: DiMartino
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .get import get_data, get_dimensions
|
|
9
|
+
from .search import get_all_dataflows, search_dataflows
|
|
10
|
+
|
|
11
|
+
__all__ = ['get_data', 'search_dataflows', 'get_all_dataflows', 'get_dimensions']
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Created on Fri May 30 12:37:57 2025
|
|
4
|
+
|
|
5
|
+
@author: DiMartino
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
class DimensionsOrKwargsError(Exception):
|
|
9
|
+
"""
|
|
10
|
+
Warning: Either use a dimensions list (make sure it is ordered, get the correct
|
|
11
|
+
order from get_dimensions) or use kwargs, never both.
|
|
12
|
+
"""
|
|
13
|
+
def __init__(self, message="Warning: either pass the dimensions in the correct order as a list or use kwargs."):
|
|
14
|
+
self.message = message
|
|
15
|
+
super().__init__(self.message)
|
|
16
|
+
|
|
17
|
+
class NotAListError(Exception):
|
|
18
|
+
"""
|
|
19
|
+
Warning: the variable Dimensions must be a list or null. Remember that if you provide a list,
|
|
20
|
+
you must also provide wildcard values with a void string like ''.
|
|
21
|
+
"""
|
|
22
|
+
def __init__(self, message="""Warning: the variable Dimensions must be a list or null. Remember that if you provide a list,
|
|
23
|
+
you must also provide wildcard values with a void string like ''."""):
|
|
24
|
+
self.message = message
|
|
25
|
+
super().__init__(self.message)
|
|
26
|
+
|
|
27
|
+
class TooManyDimensionsError(Exception):
|
|
28
|
+
"""
|
|
29
|
+
The dimensions you chose are less or more than those required from the dataflow.
|
|
30
|
+
"""
|
|
31
|
+
def __init__(self, dimensions, dimensions_dict, message=None):
|
|
32
|
+
if message is None:
|
|
33
|
+
message = f"""Warning: the dimensions you chose are {len(dimensions)}, while the dimensions
|
|
34
|
+
requested by the dataflow are {len(dimensions_dict.keys())}. If you believe
|
|
35
|
+
this is a mistake, you can force the url by adding force_url=True to the function call."""
|
|
36
|
+
self.message = message
|
|
37
|
+
super().__init__(self.message)
|
|
38
|
+
|
|
39
|
+
class DifferentDimensionValueError(Exception):
|
|
40
|
+
"""
|
|
41
|
+
Different dimensions found.
|
|
42
|
+
"""
|
|
43
|
+
def __init__(self, user_dim, dataflow_dim, message=None):
|
|
44
|
+
if message is None:
|
|
45
|
+
message = f"""Warning: the dimension value {user_dim} cannot be found in the possible values
|
|
46
|
+
for the dimension {dataflow_dim}. Check if the order of the dimensions is correct.
|
|
47
|
+
If you believe this is an error, you can force the url by adding
|
|
48
|
+
force_url=True to the function call."""
|
|
49
|
+
self.message = message
|
|
50
|
+
super().__init__(self.message)
|
|
51
|
+
|
|
52
|
+
class KwargsError(Exception):
|
|
53
|
+
"""
|
|
54
|
+
Error while using arguments. Check the name and value of arguments or use a list.
|
|
55
|
+
"""
|
|
56
|
+
def __init__(self, message="Error while using arguments. Check the name and value of arguments or use a list.."):
|
|
57
|
+
self.message = message
|
|
58
|
+
super().__init__(self.message)
|
|
59
|
+
|
|
60
|
+
class OtherResponseCodeError(Exception):
|
|
61
|
+
"""
|
|
62
|
+
Different response code from 200 found.
|
|
63
|
+
"""
|
|
64
|
+
def __init__(self, response_code, message=None):
|
|
65
|
+
if message is None:
|
|
66
|
+
message = f"""Error {response_code}. Check SDMX documentation and double check the dataflow id spelling."""
|
|
67
|
+
self.message = message
|
|
68
|
+
super().__init__(self.message)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class WrongFormatError(Exception):
|
|
72
|
+
"""
|
|
73
|
+
Error while determining format.
|
|
74
|
+
"""
|
|
75
|
+
def __init__(self, message="Wrong format requested. Choose either 'csv' or 'dataframe'."):
|
|
76
|
+
self.message = message
|
|
77
|
+
super().__init__(self.message)
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Created on Tue May 27 17:01:35 2025
|
|
4
|
+
|
|
5
|
+
@author: DiMartino
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import requests
|
|
10
|
+
import xml.etree.ElementTree as ET
|
|
11
|
+
import search
|
|
12
|
+
import errors
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
def get_data(dataflow_id, dimensions=[], force_url=False, start_period="", end_period="", updated_after="", returned="dataframe", **kwargs):
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
dataflow_id : String,
|
|
22
|
+
the dataflow id of the dataset.
|
|
23
|
+
dimensions : List,
|
|
24
|
+
an ordered list of strings of the dimensions. Make sure to leave it null if you use kwargs. The default is [].
|
|
25
|
+
force_url : Bool,
|
|
26
|
+
used to force the URL request even if the they were not checked against the allowed dimensions. The default is False.
|
|
27
|
+
start_period : Int,
|
|
28
|
+
used to filter for start period. The default is "".
|
|
29
|
+
end_period : Int,
|
|
30
|
+
used to filter for end period. The default is "".
|
|
31
|
+
updated_after : Int,
|
|
32
|
+
used to filter for update period. The default is "".
|
|
33
|
+
returned : String,
|
|
34
|
+
"dataframe" or "csv", the format to be returned. The default is "dataframe".
|
|
35
|
+
**kwargs : Key=value,
|
|
36
|
+
each kwarg will be used in place of the keys of the URL. Can't be used together with the dimensions list. Usage: freq="Q", correz="W"...
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
df : Returns a pandas DataFrame with all the dataflows if you choose the dataframe.
|
|
42
|
+
csv file: Creates a csv file in the path of your code if you choose the csv.
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
if returned != "dataframe" or returned != "csv":
|
|
46
|
+
raise errors.WrongFormatError()
|
|
47
|
+
if dimensions and kwargs:
|
|
48
|
+
print("Warning: either pass a list that is ordered following the order you can find with get_dimensions, or pass args. You cannot pass both.")
|
|
49
|
+
raise errors.DimensionsOrKwargsError
|
|
50
|
+
elif not dimensions and not kwargs:
|
|
51
|
+
dimensions = ["all"]
|
|
52
|
+
elif not isinstance(dimensions, list):
|
|
53
|
+
raise errors.NotAListError
|
|
54
|
+
return None
|
|
55
|
+
elif not force_url and dimensions:
|
|
56
|
+
# Sometimes url checker can bug out for undiscovered reasons, in this case you are free to force the program to request data
|
|
57
|
+
dimensions_dict = search.deep_search(dataflow_id, get=True)
|
|
58
|
+
if len(dimensions) != len(dimensions_dict.keys()):
|
|
59
|
+
raise errors.TooManyDimensionsError(dimensions, dimensions_dict)
|
|
60
|
+
|
|
61
|
+
for user_dim, dataflow_dim in zip(dimensions, dimensions_dict.values()):
|
|
62
|
+
if user_dim not in dataflow_dim and user_dim != "":
|
|
63
|
+
raise errors.DifferentDimensionValueError(user_dim, dataflow_dim)
|
|
64
|
+
|
|
65
|
+
elif not force_url and kwargs:
|
|
66
|
+
dimensions_df = get_dimensions(dataflow_id)
|
|
67
|
+
dimensions_dict = search.deep_search(dataflow_id, get=True)
|
|
68
|
+
# Check how many dimensions there are
|
|
69
|
+
for _ in range(len(dimensions_dict.keys())):
|
|
70
|
+
dimensions.append("")
|
|
71
|
+
for key, value in kwargs.items():
|
|
72
|
+
check = False
|
|
73
|
+
while not check:
|
|
74
|
+
for index, row in dimensions_df.iterrows():
|
|
75
|
+
if key.casefold() == row["dimension_id"].casefold():
|
|
76
|
+
if value.casefold() == row["dimension_value"].casefold():
|
|
77
|
+
dimensions[row["order"]-1] = value
|
|
78
|
+
check = True
|
|
79
|
+
if check:
|
|
80
|
+
break
|
|
81
|
+
raise errors.KwargsError()
|
|
82
|
+
# Checking if time periods are formatted right and building the strings
|
|
83
|
+
dim_string = '.'.join(dimensions)
|
|
84
|
+
if start_period=="" and end_period=="":
|
|
85
|
+
period_string = "all?"
|
|
86
|
+
elif end_period=="" and isinstance(start_period, int):
|
|
87
|
+
period_string = f"all?startPeriod={start_period}"
|
|
88
|
+
elif start_period=="" and isinstance(end_period, int):
|
|
89
|
+
period_string = f"all?endPeriod={end_period}"
|
|
90
|
+
elif start_period=="" and isinstance(end_period, int) and end_period=="" and isinstance(start_period, int):
|
|
91
|
+
period_string = f"all?startPeriod={start_period}&endPeriod={end_period}"
|
|
92
|
+
else:
|
|
93
|
+
print("Warning: variables start_period and end_period are not an int or ''. Removed the period_string.")
|
|
94
|
+
period_string=""
|
|
95
|
+
if updated_after != "" and isinstance(updated_after,int):
|
|
96
|
+
period_string.append(f"&updatedAfter={updated_after}")
|
|
97
|
+
else:
|
|
98
|
+
print("Warning: updated_after is not an int. Skipped.")
|
|
99
|
+
|
|
100
|
+
api_url = rf"https://esploradati.istat.it/SDMXWS/rest/data/{dataflow_id}/{dim_string}/{period_string}"
|
|
101
|
+
response = requests.get(api_url)
|
|
102
|
+
response_code = response.status_code
|
|
103
|
+
if response_code != 200:
|
|
104
|
+
raise errors.OtherResponseCodeError(response_code)
|
|
105
|
+
elif response.status_code == 200:
|
|
106
|
+
response = response.content.decode('utf-8-sig')
|
|
107
|
+
tree = ET.ElementTree(ET.fromstring(response))
|
|
108
|
+
namespaces = {
|
|
109
|
+
'message': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message',
|
|
110
|
+
'generic': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/data/generic',
|
|
111
|
+
'common': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common'
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
data = []
|
|
115
|
+
for series in tree.findall('.//generic:Series', namespaces):
|
|
116
|
+
series_key = {}
|
|
117
|
+
series_key_element = series.find('generic:SeriesKey', namespaces)
|
|
118
|
+
if series_key_element is not None:
|
|
119
|
+
for value in series_key_element.findall('generic:Value', namespaces):
|
|
120
|
+
key_id = value.get('id')
|
|
121
|
+
value_text = value.get('value')
|
|
122
|
+
series_key[key_id] = value_text
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
for obs in series.findall('generic:Obs', namespaces):
|
|
126
|
+
obs_data = series_key.copy()
|
|
127
|
+
obs_dimension = obs.find('generic:ObsDimension', namespaces)
|
|
128
|
+
if obs_dimension is not None:
|
|
129
|
+
obs_data['TIME_PERIOD'] = obs_dimension.get('value')
|
|
130
|
+
|
|
131
|
+
obs_value = obs.find('generic:ObsValue', namespaces)
|
|
132
|
+
if obs_value is not None:
|
|
133
|
+
obs_data['OBS_VALUE'] = obs_value.get('value')
|
|
134
|
+
|
|
135
|
+
data.append(obs_data)
|
|
136
|
+
|
|
137
|
+
df = pd.DataFrame(data)
|
|
138
|
+
|
|
139
|
+
if df.empty:
|
|
140
|
+
print("No data retrieved. Open a request on GitHub, please.")
|
|
141
|
+
return None
|
|
142
|
+
else:
|
|
143
|
+
if returned == "dataframe":
|
|
144
|
+
return df
|
|
145
|
+
elif returned == "csv":
|
|
146
|
+
df.to_csv(f"{dataflow_id}_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_dimensions(dataflow_id, lang="en", get=False, returned="dataframe"):
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
dataflow_id : String,
|
|
156
|
+
the dataflow id of the dataset.
|
|
157
|
+
lang : String,
|
|
158
|
+
"en" or "it", the language the search will be performed in. The default is "en".
|
|
159
|
+
get : Bool,
|
|
160
|
+
used only when called by the function get_dataframe() with force_url=False. The default is False.
|
|
161
|
+
returned : String,
|
|
162
|
+
"dataframe" or "csv", the format to be returned. The default is "dataframe".
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
df : Returns a pandas DataFrame with all the dataflows if you choose the dataframe.
|
|
167
|
+
csv file: Creates a csv file in the path of your code if you choose the csv.
|
|
168
|
+
|
|
169
|
+
"""
|
|
170
|
+
if returned != "dataframe" or returned != "csv":
|
|
171
|
+
raise errors.WrongFormatError()
|
|
172
|
+
namespaces = {
|
|
173
|
+
'message': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message',
|
|
174
|
+
'structure': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/structure',
|
|
175
|
+
'common': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common',
|
|
176
|
+
'xml': 'http://www.w3.org/XML/1998/namespace'
|
|
177
|
+
}
|
|
178
|
+
data_url = f"https://esploradati.istat.it/SDMXWS/rest/availableconstraint/{dataflow_id}/?references=all&detail=full"
|
|
179
|
+
|
|
180
|
+
response = requests.get(data_url)
|
|
181
|
+
codelist_list = []
|
|
182
|
+
response_code = response.status_code
|
|
183
|
+
if response_code != 200:
|
|
184
|
+
raise errors.OtherResponseCodeError(response_code)
|
|
185
|
+
|
|
186
|
+
response = response.content.decode('utf-8-sig')
|
|
187
|
+
tree = ET.ElementTree(ET.fromstring(response))
|
|
188
|
+
cube_region = tree.find('.//structure:CubeRegion', namespaces)
|
|
189
|
+
key_values = cube_region.findall('.//common:KeyValue', namespaces)
|
|
190
|
+
|
|
191
|
+
codelist_list = []
|
|
192
|
+
|
|
193
|
+
for codelist in tree.findall(".//structure:Codelist", namespaces):
|
|
194
|
+
codelist_id = codelist.get('id')[3:] # Rimuovi il prefisso "CL_"
|
|
195
|
+
codelist_name = codelist.find(f'.//common:Name[@xml:lang="{lang}"]', namespaces).text
|
|
196
|
+
|
|
197
|
+
for code in codelist.findall('.//structure:Code', namespaces):
|
|
198
|
+
code_id = code.get('id')
|
|
199
|
+
code_name = code.find(f'.//common:Name[@xml:lang="{lang}"]', namespaces).text
|
|
200
|
+
|
|
201
|
+
for idx, key_value in enumerate(key_values):
|
|
202
|
+
for value in key_value.findall('common:Value', namespaces):
|
|
203
|
+
if value.text == code_id:
|
|
204
|
+
codelist_list.append({
|
|
205
|
+
'dimension_id': codelist_id,
|
|
206
|
+
'dimension_name': codelist_name,
|
|
207
|
+
'dimension_value': code_id,
|
|
208
|
+
'value_explanation': code_name,
|
|
209
|
+
'order': idx + 1
|
|
210
|
+
})
|
|
211
|
+
break
|
|
212
|
+
|
|
213
|
+
if not get:
|
|
214
|
+
df = pd.DataFrame(codelist_list)
|
|
215
|
+
if returned == "dataframe":
|
|
216
|
+
return df
|
|
217
|
+
elif returned == "csv":
|
|
218
|
+
df.to_csv(f"{dataflow_id}_dimensions")
|
|
219
|
+
else:
|
|
220
|
+
return codelist_list
|
|
221
|
+
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Created on Tue May 27 14:08:35 2025
|
|
4
|
+
|
|
5
|
+
@author: DiMartino
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import requests
|
|
10
|
+
import xml.etree.ElementTree as ET
|
|
11
|
+
import errors
|
|
12
|
+
|
|
13
|
+
def get_all_dataflows(returned="dataframe"):
|
|
14
|
+
"""
|
|
15
|
+
This function is used in the search_dataflows function to search for dataflows,
|
|
16
|
+
but it can also be used alone to get all the possible dataflows.
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
df : Returns a pandas DataFrame with all the dataflows if you choose the dataframe.
|
|
21
|
+
csv file: Creates a csv file in the path of your code if you choose the csv.
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
# This is the ISTAT url for all dataflows
|
|
25
|
+
dataflow_url = "https://esploradati.istat.it/SDMXWS/rest/dataflow/ALL/ALL/LATEST"
|
|
26
|
+
response = requests.get(dataflow_url)
|
|
27
|
+
response_code = response.status_code
|
|
28
|
+
if response_code == 200:
|
|
29
|
+
response = response.content.decode('utf-8-sig')
|
|
30
|
+
tree = ET.ElementTree(ET.fromstring(response))
|
|
31
|
+
# Namespaces for ISTAT' SDMX dataflows
|
|
32
|
+
namespaces = {
|
|
33
|
+
'message': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message',
|
|
34
|
+
'structure': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/structure',
|
|
35
|
+
'common': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common'
|
|
36
|
+
}
|
|
37
|
+
data = []
|
|
38
|
+
for dataflow in tree.findall('.//structure:Dataflow', namespaces):
|
|
39
|
+
|
|
40
|
+
name_it = None
|
|
41
|
+
name_en = None
|
|
42
|
+
for name in dataflow.findall('.//common:Name', namespaces):
|
|
43
|
+
lang = name.get('{http://www.w3.org/XML/1998/namespace}lang')
|
|
44
|
+
if lang == 'it':
|
|
45
|
+
name_it = name.text
|
|
46
|
+
elif lang == 'en':
|
|
47
|
+
name_en = name.text
|
|
48
|
+
row = {
|
|
49
|
+
'id': dataflow.get('id'),
|
|
50
|
+
'agencyID': dataflow.get('agencyID'),
|
|
51
|
+
'version': dataflow.get('version'),
|
|
52
|
+
'isFinal': dataflow.get('isFinal'),
|
|
53
|
+
'name_it': name_it,
|
|
54
|
+
'name_en': name_en
|
|
55
|
+
}
|
|
56
|
+
data.append(row)
|
|
57
|
+
|
|
58
|
+
df = pd.DataFrame(data)
|
|
59
|
+
|
|
60
|
+
if returned.casefold() == "dataframe" :
|
|
61
|
+
return df
|
|
62
|
+
elif returned.casefold() == "csv":
|
|
63
|
+
df.to_csv("all_dataflows_ISTAT.csv")
|
|
64
|
+
else:
|
|
65
|
+
raise errors.WrongFormatError()
|
|
66
|
+
else:
|
|
67
|
+
raise errors.OtherResponseCodeError(response_code)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def search_dataflows(search_term, mode="fast", lang="en", returned="dataframe"):
|
|
71
|
+
"""
|
|
72
|
+
Allows searching for dataflows starting from strings passed. Can also accept a list.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
search_term : String or list of strings,
|
|
77
|
+
is required to perform a search through the datasets.
|
|
78
|
+
mode : String,
|
|
79
|
+
can be deep or fast. Deep search requires more requests but also gets the dimensions for datasets in a readable way. The default is "fast".
|
|
80
|
+
lang : String,
|
|
81
|
+
"en" or "it", the language the search will be performed in. The default is "en".
|
|
82
|
+
returned : String,
|
|
83
|
+
"dataframe" or "csv", the format to be returned. The default is "dataframe".
|
|
84
|
+
|
|
85
|
+
Raises
|
|
86
|
+
------
|
|
87
|
+
errors
|
|
88
|
+
OtherResponseCodeError: when the code response from the API URL is not 200.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
df : Returns a pandas DataFrame with all the dataflows if you choose the dataframe.
|
|
93
|
+
csv file: Creates a csv file in the path of your code if you choose the csv.
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
if returned != "dataframe" or returned != "csv":
|
|
97
|
+
raise errors.WrongFormatError()
|
|
98
|
+
# The function must accept either single words or lists
|
|
99
|
+
if isinstance(search_term, str):
|
|
100
|
+
search_term = [search_term]
|
|
101
|
+
df = get_all_dataflows()
|
|
102
|
+
if df.empty:
|
|
103
|
+
print("Error: cannot retrieve dataflows from the ISTAT API. Open a request on Github.")
|
|
104
|
+
|
|
105
|
+
# Initialize dataframe
|
|
106
|
+
search_df = df.copy()
|
|
107
|
+
search_df = search_df.iloc[:0]
|
|
108
|
+
for term in search_term:
|
|
109
|
+
if lang == "en":
|
|
110
|
+
temp_df = df[df["name_en"].str.contains(term, case=False, na=False)]
|
|
111
|
+
search_df = pd.concat([search_df, temp_df], ignore_index=True)
|
|
112
|
+
elif lang == "it":
|
|
113
|
+
temp_df = df[df["name_it"].str.contains(term, case=False, na=False)]
|
|
114
|
+
search_df = pd.concat([search_df, temp_df], ignore_index=True)
|
|
115
|
+
elif lang == "id":
|
|
116
|
+
temp_df = df[df["id"].str.contains(term, case=False, na=False)]
|
|
117
|
+
search_df = pd.concat([search_df, temp_df], ignore_index=True)
|
|
118
|
+
else:
|
|
119
|
+
print("Language not found.")
|
|
120
|
+
if search_df.empty:
|
|
121
|
+
print(f"Warning: the dataflow {term} could not be found.")
|
|
122
|
+
return None
|
|
123
|
+
if mode == "fast":
|
|
124
|
+
if returned == "dataframe":
|
|
125
|
+
return search_df
|
|
126
|
+
elif returned == "csv":
|
|
127
|
+
search_df.to_csv("requested_data.csv", index=False)
|
|
128
|
+
if mode =="deep":
|
|
129
|
+
deep_search_df = deep_search(search_df)
|
|
130
|
+
if returned == "dataframe":
|
|
131
|
+
return deep_search_df
|
|
132
|
+
elif returned == "csv":
|
|
133
|
+
deep_search_df.to_csv("requested_data.csv", index=False)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def deep_search(obj, lang="en", get=False):
|
|
138
|
+
"""
|
|
139
|
+
This function is used by the search_dataflows function if the selected mode is "deep".
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
obj : Can be a string or a DataFrame.
|
|
144
|
+
lang : String,
|
|
145
|
+
used to select the language of the search. The default is "en".
|
|
146
|
+
get : Bool,
|
|
147
|
+
used only when called by the function in get.py. The default is False.
|
|
148
|
+
|
|
149
|
+
Raises
|
|
150
|
+
------
|
|
151
|
+
errors
|
|
152
|
+
OtherResponseCodeError: when the code response from the API URL is not 200.
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
df : normal return when used by search_dataflows.
|
|
157
|
+
dict : return when used to count the keys by get.get_data.
|
|
158
|
+
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
namespaces = {
|
|
163
|
+
'message': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message',
|
|
164
|
+
'structure': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/structure',
|
|
165
|
+
'common': 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1/common'
|
|
166
|
+
}
|
|
167
|
+
df = obj # Terrible, hopefully temporary workaround that must be resolved otherwise it doesn't work with the get functions.
|
|
168
|
+
if not isinstance(obj, pd.DataFrame):
|
|
169
|
+
df = pd.DataFrame({"id":[obj]})
|
|
170
|
+
codelist_list = []
|
|
171
|
+
for index, row in df.iterrows():
|
|
172
|
+
dataflow_id = row["id"]
|
|
173
|
+
data_url = f"https://esploradati.istat.it/SDMXWS/rest/availableconstraint/{dataflow_id}/?references=all&detail=full"
|
|
174
|
+
|
|
175
|
+
response = requests.get(data_url)
|
|
176
|
+
response_code = response.status_code
|
|
177
|
+
if response_code != 200:
|
|
178
|
+
raise errors.OtherResponseCodeError(response_code)
|
|
179
|
+
|
|
180
|
+
response = response.content.decode('utf-8-sig')
|
|
181
|
+
tree = ET.ElementTree(ET.fromstring(response))
|
|
182
|
+
codelist_dict = {}
|
|
183
|
+
codelist_full_dict = {}
|
|
184
|
+
for key_value in tree.findall('.//common:KeyValue', namespaces):
|
|
185
|
+
key_id = key_value.get('id')
|
|
186
|
+
if not key_id == "TIME_PERIOD":
|
|
187
|
+
values = [value.text for value in key_value.findall('common:Value', namespaces)]
|
|
188
|
+
codelist_dict[key_id] = values
|
|
189
|
+
formatted_dimensions = "; ".join([f"{key}: {', '.join(values)}" for key, values in codelist_dict.items()])
|
|
190
|
+
codelist_full_dict.update(codelist_dict)
|
|
191
|
+
codelist_list.append(formatted_dimensions)
|
|
192
|
+
|
|
193
|
+
if get == False:
|
|
194
|
+
df['Dimensions'] = codelist_list
|
|
195
|
+
return df
|
|
196
|
+
return codelist_full_dict
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pyistat"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Pyistat is a friendly module made to easily allow anyone to use Python to search and get datasets from ISTAT APIs. There are two modules: the \"search\" module is used to find datasets and gives all the information needed to build a request URL. The \"get\" module is used to get data after helping you properly setup the dimensions (the keys, as called by ISTAT). This module was created because I found the lack of documentation by ISTAT frustrating."
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Cosimo Di Martino",email = "derto.dimartino@gmail.com"}
|
|
7
|
+
]
|
|
8
|
+
license = {text = "Mozilla Public License 2.0"}
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"requests (>=2.32.3,<3.0.0)",
|
|
13
|
+
"pandas (>=2.2.3,<3.0.0)",
|
|
14
|
+
"datetime (>=5.5,<6.0)"
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
[build-system]
|
|
19
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
20
|
+
build-backend = "poetry.core.masonry.api"
|