pyreadstat 1.2.9__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyreadstat might be problematic. Click here for more details.

pyreadstat/__init__.py ADDED
@@ -0,0 +1,26 @@
1
+ # #############################################################################
2
+ # Copyright 2018 Hoffmann-La Roche
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # #############################################################################
16
+
17
+ from .pyreadstat import read_sas7bdat, read_xport, read_dta, read_sav, read_por, read_sas7bcat
18
+ from .pyreadstat import write_sav, write_dta, write_xport, write_por
19
+ #from .pyreadstat import set_value_labels, set_catalog_to_sas
20
+ #from .pyreadstat import set_catalog_to_sas
21
+ from .pyreadstat import read_file_in_chunks, read_file_multiprocessing
22
+ from ._readstat_parser import ReadstatError, metadata_container
23
+ from .pyfunctions import set_value_labels, set_catalog_to_sas
24
+
25
+ __version__ = "1.2.9"
26
+
@@ -0,0 +1,115 @@
1
+ """
2
+ Functions written in pure python
3
+ """
4
+ from copy import deepcopy
5
+
6
+ import pandas as pd
7
+
8
+ # Functions to deal with value labels
9
+
10
+ def set_value_labels(dataframe, metadata, formats_as_category=True, formats_as_ordered_category=False):
11
+ """
12
+ Changes the values in the dataframe according to the value formats in the metadata.
13
+ It will return a copy of the dataframe. If no appropiate formats were found, the result will be an unchanged copy
14
+ of the original dataframe.
15
+
16
+ Parameters
17
+ ----------
18
+ dataframe : pandas dataframe
19
+ resulting from parsing a file
20
+ metadata : dictionary
21
+ resulting from parsing a file
22
+ formats_as_category : bool, optional
23
+ defaults to True. If True the variables having formats will be transformed into pandas categories.
24
+ formats_as_ordered_category : bool, optional
25
+ defaults to False. If True the variables having formats will be transformed into pandas ordered categories.
26
+ it has precedence over formats_as_category, meaning if this is True, it will take effect irrespective of
27
+ the value of formats_as_category.
28
+
29
+ Returns
30
+ -------
31
+ df_copy : pandas dataframe
32
+ a copy of the original dataframe with the values changed, if appropiate formats were found, unaltered
33
+ otherwise
34
+ """
35
+
36
+ df_copy = dataframe.copy()
37
+
38
+ if metadata.value_labels and metadata.variable_to_label:
39
+ for var_name, label_name in metadata.variable_to_label.items():
40
+ labels = metadata.value_labels.get(label_name)
41
+ if labels:
42
+ if var_name in df_copy.columns:
43
+ df_copy[var_name] = df_copy[var_name].apply(lambda x: labels.get(x, x))
44
+ if formats_as_ordered_category:
45
+ categories = list(set(labels.values()))
46
+ original_values = list(labels.keys())
47
+ original_values.sort()
48
+ revdict= dict()
49
+ for orival in original_values:
50
+ curcat = labels.get(orival)
51
+ if not revdict.get(curcat):
52
+ revdict[curcat] = orival
53
+ categories.sort(key=revdict.get)
54
+ df_copy[var_name] = pd.Categorical(
55
+ df_copy[var_name],
56
+ ordered = True,
57
+ categories = categories
58
+ )
59
+ elif formats_as_category:
60
+ df_copy[var_name] = df_copy[var_name].astype("category")
61
+
62
+
63
+ return df_copy
64
+
65
+ def set_catalog_to_sas(sas_dataframe, sas_metadata, catalog_metadata, formats_as_category=True,
66
+ formats_as_ordered_category=False):
67
+ """
68
+ Changes the values in the dataframe and sas_metadata according to the formats in the catalog.
69
+ It will return a copy of the dataframe and metadata. If no appropriate formats were found, the result will
70
+ be an unchanged copy of the original dataframe.
71
+
72
+ Parameters
73
+ ----------
74
+ sas_dataframe : pandas dataframe
75
+ resulting from parsing a sas7bdat file
76
+ sas_metadata : pyreadstat metadata object
77
+ resulting from parsing a sas7bdat file
78
+ catalog_metadata : pyreadstat metadata object
79
+ resulting from parsing a sas7bcat (catalog) file
80
+ formats_as_category : bool, optional
81
+ defaults to True. If True the variables having formats will be transformed into pandas categories.
82
+ formats_as_ordered_category : bool, optional
83
+ defaults to False. If True the variables having formats will be transformed into pandas ordered categories.
84
+ it has precedence over formats_as_category, meaning if this is True, it will take effect irrespective of
85
+ the value of formats_as_category.
86
+
87
+ Returns
88
+ -------
89
+ df_copy : pandas dataframe
90
+ a copy of the original dataframe with the values changed, if appropriate formats were found, unaltered
91
+ otherwise
92
+ metadata : dict
93
+ a copy of the original sas_metadata enriched with catalog information if found, otherwise unaltered
94
+ """
95
+
96
+ if catalog_metadata.value_labels and sas_metadata.variable_to_label:
97
+ catalog_metadata_copy = deepcopy(catalog_metadata)
98
+ metadata = deepcopy(sas_metadata)
99
+ metadata.value_labels = catalog_metadata_copy.value_labels
100
+ df_copy = set_value_labels(sas_dataframe, metadata, formats_as_category=formats_as_category,
101
+ formats_as_ordered_category=formats_as_ordered_category)
102
+
103
+ variable_value_labels = dict()
104
+ for var_name, var_label in metadata.variable_to_label.items():
105
+ current_labels = catalog_metadata_copy.value_labels.get(var_label)
106
+ if current_labels:
107
+ variable_value_labels[var_name] = current_labels
108
+ metadata.variable_value_labels = variable_value_labels
109
+
110
+ else:
111
+ df_copy = sas_dataframe.copy()
112
+ metadata = deepcopy(sas_metadata)
113
+
114
+ return df_copy, metadata
115
+
pyreadstat/worker.py ADDED
@@ -0,0 +1,25 @@
1
+
2
+ # #############################################################################
3
+ # Copyright 2018 Hoffmann-La Roche
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ # #############################################################################
17
+
18
+ """
19
+ Functions to work with multiprocessing
20
+ """
21
+
22
+ def worker(inpt):
23
+ read_function, path, row_offset, row_limit, kwargs = inpt
24
+ df, meta = read_function(path, row_offset=row_offset, row_limit=row_limit, **kwargs)
25
+ return df
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyreadstat
3
+ Version: 1.2.9
4
+ Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas data frames.
5
+ Home-page: https://github.com/Roche/pyreadstat
6
+ Download-URL: https://github.com/Roche/pyreadstat/dist
7
+ Author: Otto Fajardo
8
+ Author-email: pleasecontactviagithub@notvalid.com
9
+ License: Apache License Version 2.0
10
+ Classifier: Programming Language :: Python
11
+ Classifier: Programming Language :: Cython
12
+ Classifier: Programming Language :: C
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Classifier: Environment :: Console
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: pandas>=1.2.0
20
+ Dynamic: author
21
+ Dynamic: author-email
22
+ Dynamic: classifier
23
+ Dynamic: description
24
+ Dynamic: description-content-type
25
+ Dynamic: download-url
26
+ Dynamic: home-page
27
+ Dynamic: license
28
+ Dynamic: license-file
29
+ Dynamic: requires-dist
30
+ Dynamic: summary
31
+
32
+ A Python package to read and write SAS
33
+ (sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas data frames. It is a wrapper
34
+ around the C library readstat.<br>
35
+ Please visit out project home page for more information:<br>
36
+ https://github.com/Roche/pyreadstat
@@ -0,0 +1,11 @@
1
+ pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
2
+ pyreadstat/_readstat_writer.cpython-312-darwin.so,sha256=0Vhc_F_SkJrWA0-UKkHTSr74K8ca2n2EjQ6WQsKYMZY,694168
3
+ pyreadstat/__init__.py,sha256=vtiUNptlyFWXtCbTNZGI4CCY3scZ6BnXgcDkLDRxCyc,1232
4
+ pyreadstat/pyreadstat.cpython-312-darwin.so,sha256=KvbGmnl4D5ZDsoKL6z8tU-U5H3xIzcrOZiY3RordCMw,635144
5
+ pyreadstat/_readstat_parser.cpython-312-darwin.so,sha256=st_hJ0jet2BCA6x1rPDrHmQ7AqXLdx3-8v9ppbg5BTg,603336
6
+ pyreadstat/pyfunctions.py,sha256=wnlWbD5o1knLWX28s9ve8jWWv_MFDUTQ7vQUiyNFYmk,5172
7
+ pyreadstat-1.2.9.dist-info/RECORD,,
8
+ pyreadstat-1.2.9.dist-info/WHEEL,sha256=BFdb_k0h28XLyTJbDhyfENAtM6pUDO59IS0eJF9X2OA,138
9
+ pyreadstat-1.2.9.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
10
+ pyreadstat-1.2.9.dist-info/METADATA,sha256=ST2Ynct8Vku0UNOoE9NE2EaR6M-a3bwniAU17gqmYhA,1281
11
+ pyreadstat-1.2.9.dist-info/licenses/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.7.1)
3
+ Root-Is-Purelib: false
4
+ Tag: cp312-cp312-macosx_10_13_x86_64
5
+ Generator: delocate 0.13.0
6
+
@@ -0,0 +1,236 @@
1
+ The pyreadstat package as a whole is distributed under Apache License
2
+ Vesrion 2 (see below).
3
+
4
+ The pyreadstat package also includes the following open source software
5
+ components:
6
+
7
+ - ReadStat; https://github.com/WizardMac/ReadStat, distributed under MIT license
8
+
9
+
10
+ ReadStat license:
11
+ ---------------------------------------------------------------------------
12
+
13
+ Copyright (c) 2013-2016 Evan Miller (except where otherwise noted)
14
+
15
+ Permission is hereby granted, free of charge, to any person obtaining a copy
16
+ of this software and associated documentation files (the "Software"), to deal
17
+ in the Software without restriction, including without limitation the rights
18
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19
+ copies of the Software, and to permit persons to whom the Software is
20
+ furnished to do so, subject to the following conditions:
21
+
22
+ The above copyright notice and this permission notice shall be included in
23
+ all copies or substantial portions of the Software.
24
+
25
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
31
+ THE SOFTWARE.
32
+
33
+
34
+ pyreadstat liscence:
35
+ ---------------------------------------------------------------------------
36
+ Apache License
37
+ Version 2.0, January 2004
38
+ http://www.apache.org/licenses/
39
+
40
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
41
+
42
+ 1. Definitions.
43
+
44
+ "License" shall mean the terms and conditions for use, reproduction,
45
+ and distribution as defined by Sections 1 through 9 of this document.
46
+
47
+ "Licensor" shall mean the copyright owner or entity authorized by
48
+ the copyright owner that is granting the License.
49
+
50
+ "Legal Entity" shall mean the union of the acting entity and all
51
+ other entities that control, are controlled by, or are under common
52
+ control with that entity. For the purposes of this definition,
53
+ "control" means (i) the power, direct or indirect, to cause the
54
+ direction or management of such entity, whether by contract or
55
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
56
+ outstanding shares, or (iii) beneficial ownership of such entity.
57
+
58
+ "You" (or "Your") shall mean an individual or Legal Entity
59
+ exercising permissions granted by this License.
60
+
61
+ "Source" form shall mean the preferred form for making modifications,
62
+ including but not limited to software source code, documentation
63
+ source, and configuration files.
64
+
65
+ "Object" form shall mean any form resulting from mechanical
66
+ transformation or translation of a Source form, including but
67
+ not limited to compiled object code, generated documentation,
68
+ and conversions to other media types.
69
+
70
+ "Work" shall mean the work of authorship, whether in Source or
71
+ Object form, made available under the License, as indicated by a
72
+ copyright notice that is included in or attached to the work
73
+ (an example is provided in the Appendix below).
74
+
75
+ "Derivative Works" shall mean any work, whether in Source or Object
76
+ form, that is based on (or derived from) the Work and for which the
77
+ editorial revisions, annotations, elaborations, or other modifications
78
+ represent, as a whole, an original work of authorship. For the purposes
79
+ of this License, Derivative Works shall not include works that remain
80
+ separable from, or merely link (or bind by name) to the interfaces of,
81
+ the Work and Derivative Works thereof.
82
+
83
+ "Contribution" shall mean any work of authorship, including
84
+ the original version of the Work and any modifications or additions
85
+ to that Work or Derivative Works thereof, that is intentionally
86
+ submitted to Licensor for inclusion in the Work by the copyright owner
87
+ or by an individual or Legal Entity authorized to submit on behalf of
88
+ the copyright owner. For the purposes of this definition, "submitted"
89
+ means any form of electronic, verbal, or written communication sent
90
+ to the Licensor or its representatives, including but not limited to
91
+ communication on electronic mailing lists, source code control systems,
92
+ and issue tracking systems that are managed by, or on behalf of, the
93
+ Licensor for the purpose of discussing and improving the Work, but
94
+ excluding communication that is conspicuously marked or otherwise
95
+ designated in writing by the copyright owner as "Not a Contribution."
96
+
97
+ "Contributor" shall mean Licensor and any individual or Legal Entity
98
+ on behalf of whom a Contribution has been received by Licensor and
99
+ subsequently incorporated within the Work.
100
+
101
+ 2. Grant of Copyright License. Subject to the terms and conditions of
102
+ this License, each Contributor hereby grants to You a perpetual,
103
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
104
+ copyright license to reproduce, prepare Derivative Works of,
105
+ publicly display, publicly perform, sublicense, and distribute the
106
+ Work and such Derivative Works in Source or Object form.
107
+
108
+ 3. Grant of Patent License. Subject to the terms and conditions of
109
+ this License, each Contributor hereby grants to You a perpetual,
110
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
111
+ (except as stated in this section) patent license to make, have made,
112
+ use, offer to sell, sell, import, and otherwise transfer the Work,
113
+ where such license applies only to those patent claims licensable
114
+ by such Contributor that are necessarily infringed by their
115
+ Contribution(s) alone or by combination of their Contribution(s)
116
+ with the Work to which such Contribution(s) was submitted. If You
117
+ institute patent litigation against any entity (including a
118
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
119
+ or a Contribution incorporated within the Work constitutes direct
120
+ or contributory patent infringement, then any patent licenses
121
+ granted to You under this License for that Work shall terminate
122
+ as of the date such litigation is filed.
123
+
124
+ 4. Redistribution. You may reproduce and distribute copies of the
125
+ Work or Derivative Works thereof in any medium, with or without
126
+ modifications, and in Source or Object form, provided that You
127
+ meet the following conditions:
128
+
129
+ (a) You must give any other recipients of the Work or
130
+ Derivative Works a copy of this License; and
131
+
132
+ (b) You must cause any modified files to carry prominent notices
133
+ stating that You changed the files; and
134
+
135
+ (c) You must retain, in the Source form of any Derivative Works
136
+ that You distribute, all copyright, patent, trademark, and
137
+ attribution notices from the Source form of the Work,
138
+ excluding those notices that do not pertain to any part of
139
+ the Derivative Works; and
140
+
141
+ (d) If the Work includes a "NOTICE" text file as part of its
142
+ distribution, then any Derivative Works that You distribute must
143
+ include a readable copy of the attribution notices contained
144
+ within such NOTICE file, excluding those notices that do not
145
+ pertain to any part of the Derivative Works, in at least one
146
+ of the following places: within a NOTICE text file distributed
147
+ as part of the Derivative Works; within the Source form or
148
+ documentation, if provided along with the Derivative Works; or,
149
+ within a display generated by the Derivative Works, if and
150
+ wherever such third-party notices normally appear. The contents
151
+ of the NOTICE file are for informational purposes only and
152
+ do not modify the License. You may add Your own attribution
153
+ notices within Derivative Works that You distribute, alongside
154
+ or as an addendum to the NOTICE text from the Work, provided
155
+ that such additional attribution notices cannot be construed
156
+ as modifying the License.
157
+
158
+ You may add Your own copyright statement to Your modifications and
159
+ may provide additional or different license terms and conditions
160
+ for use, reproduction, or distribution of Your modifications, or
161
+ for any such Derivative Works as a whole, provided Your use,
162
+ reproduction, and distribution of the Work otherwise complies with
163
+ the conditions stated in this License.
164
+
165
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
166
+ any Contribution intentionally submitted for inclusion in the Work
167
+ by You to the Licensor shall be under the terms and conditions of
168
+ this License, without any additional terms or conditions.
169
+ Notwithstanding the above, nothing herein shall supersede or modify
170
+ the terms of any separate license agreement you may have executed
171
+ with Licensor regarding such Contributions.
172
+
173
+ 6. Trademarks. This License does not grant permission to use the trade
174
+ names, trademarks, service marks, or product names of the Licensor,
175
+ except as required for reasonable and customary use in describing the
176
+ origin of the Work and reproducing the content of the NOTICE file.
177
+
178
+ 7. Disclaimer of Warranty. Unless required by applicable law or
179
+ agreed to in writing, Licensor provides the Work (and each
180
+ Contributor provides its Contributions) on an "AS IS" BASIS,
181
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
182
+ implied, including, without limitation, any warranties or conditions
183
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
184
+ PARTICULAR PURPOSE. You are solely responsible for determining the
185
+ appropriateness of using or redistributing the Work and assume any
186
+ risks associated with Your exercise of permissions under this License.
187
+
188
+ 8. Limitation of Liability. In no event and under no legal theory,
189
+ whether in tort (including negligence), contract, or otherwise,
190
+ unless required by applicable law (such as deliberate and grossly
191
+ negligent acts) or agreed to in writing, shall any Contributor be
192
+ liable to You for damages, including any direct, indirect, special,
193
+ incidental, or consequential damages of any character arising as a
194
+ result of this License or out of the use or inability to use the
195
+ Work (including but not limited to damages for loss of goodwill,
196
+ work stoppage, computer failure or malfunction, or any and all
197
+ other commercial damages or losses), even if such Contributor
198
+ has been advised of the possibility of such damages.
199
+
200
+ 9. Accepting Warranty or Additional Liability. While redistributing
201
+ the Work or Derivative Works thereof, You may choose to offer,
202
+ and charge a fee for, acceptance of support, warranty, indemnity,
203
+ or other liability obligations and/or rights consistent with this
204
+ License. However, in accepting such obligations, You may act only
205
+ on Your own behalf and on Your sole responsibility, not on behalf
206
+ of any other Contributor, and only if You agree to indemnify,
207
+ defend, and hold each Contributor harmless for any liability
208
+ incurred by, or claims asserted against, such Contributor by reason
209
+ of your accepting any such warranty or additional liability.
210
+
211
+ END OF TERMS AND CONDITIONS
212
+
213
+ APPENDIX: How to apply the Apache License to your work.
214
+
215
+ To apply the Apache License to your work, attach the following
216
+ boilerplate notice, with the fields enclosed by brackets "[]"
217
+ replaced with your own identifying information. (Don't include
218
+ the brackets!) The text should be enclosed in the appropriate
219
+ comment syntax for the file format. We also recommend that a
220
+ file or class name and description of purpose be included on the
221
+ same "printed page" as the copyright notice for easier
222
+ identification within third-party archives.
223
+
224
+ Copyright [yyyy] [name of copyright owner]
225
+
226
+ Licensed under the Apache License, Version 2.0 (the "License");
227
+ you may not use this file except in compliance with the License.
228
+ You may obtain a copy of the License at
229
+
230
+ http://www.apache.org/licenses/LICENSE-2.0
231
+
232
+ Unless required by applicable law or agreed to in writing, software
233
+ distributed under the License is distributed on an "AS IS" BASIS,
234
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
235
+ See the License for the specific language governing permissions and
236
+ limitations under the License.
@@ -0,0 +1 @@
1
+ pyreadstat