pyreadstat 1.2.9__cp313-cp313-macosx_10_13_x86_64.whl → 1.3.1__cp313-cp313-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyreadstat might be problematic. Click here for more details.
- pyreadstat/__init__.py +1 -1
- pyreadstat/_readstat_parser.cpython-313-darwin.so +0 -0
- pyreadstat/_readstat_writer.cpython-313-darwin.so +0 -0
- pyreadstat/pyfunctions.py +38 -12
- pyreadstat/pyreadstat.cpython-313-darwin.so +0 -0
- {pyreadstat-1.2.9.dist-info → pyreadstat-1.3.1.dist-info}/METADATA +6 -6
- pyreadstat-1.3.1.dist-info/RECORD +11 -0
- {pyreadstat-1.2.9.dist-info → pyreadstat-1.3.1.dist-info}/WHEEL +1 -1
- pyreadstat-1.2.9.dist-info/RECORD +0 -11
- {pyreadstat-1.2.9.dist-info → pyreadstat-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {pyreadstat-1.2.9.dist-info → pyreadstat-1.3.1.dist-info}/top_level.txt +0 -0
pyreadstat/__init__.py
CHANGED
|
Binary file
|
|
Binary file
|
pyreadstat/pyfunctions.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Functions written in pure python
|
|
3
3
|
"""
|
|
4
|
-
from copy import deepcopy
|
|
4
|
+
from copy import deepcopy, copy
|
|
5
|
+
import warnings
|
|
5
6
|
|
|
6
|
-
import
|
|
7
|
+
import narwhals.stable.v2 as nw
|
|
7
8
|
|
|
8
9
|
# Functions to deal with value labels
|
|
9
10
|
|
|
@@ -33,14 +34,42 @@ def set_value_labels(dataframe, metadata, formats_as_category=True, formats_as_o
|
|
|
33
34
|
otherwise
|
|
34
35
|
"""
|
|
35
36
|
|
|
36
|
-
df_copy = dataframe.
|
|
37
|
+
df_copy = nw.from_native(dataframe).clone()
|
|
37
38
|
|
|
38
39
|
if metadata.value_labels and metadata.variable_to_label:
|
|
39
40
|
for var_name, label_name in metadata.variable_to_label.items():
|
|
40
41
|
labels = metadata.value_labels.get(label_name)
|
|
41
42
|
if labels:
|
|
43
|
+
labels = deepcopy(labels)
|
|
42
44
|
if var_name in df_copy.columns:
|
|
43
|
-
|
|
45
|
+
# unique does not work for polars Object
|
|
46
|
+
if not df_copy.implementation.is_pandas() and df_copy[var_name].dtype == nw.Object:
|
|
47
|
+
unvals = list(set(df_copy[var_name].to_list()))
|
|
48
|
+
else:
|
|
49
|
+
unvals = df_copy[var_name].unique()
|
|
50
|
+
for uval in unvals:
|
|
51
|
+
if uval not in labels:
|
|
52
|
+
labels[uval] = uval
|
|
53
|
+
# if all values are null, there will be nothing to replace. However we cannot do replace_strict on null dtype, it raises an error
|
|
54
|
+
if not df_copy.implementation.is_pandas() and (len(df_copy[var_name])==df_copy[var_name].null_count()):
|
|
55
|
+
continue
|
|
56
|
+
# replace_strict requires that all the values are in the map. Could not get map_batches or when/then/otherwise to work
|
|
57
|
+
elif not df_copy.implementation.is_pandas() and (df_copy[var_name].dtype==nw.Object or not all([type(v)==type(list(labels.values())[0]) for v in labels.values() if v is not None])):
|
|
58
|
+
# polars is very difficult to convince to mix strings and numbers, so we have to do it this way
|
|
59
|
+
temp = [labels[x] for x in df_copy[var_name]]
|
|
60
|
+
newser = nw.new_series(name=var_name, values= temp, dtype=nw.Object, backend=df_copy.implementation)
|
|
61
|
+
df_copy = df_copy.with_columns(newser.alias(var_name))
|
|
62
|
+
if formats_as_category or formats_as_ordered_category:
|
|
63
|
+
msg = f"You requested formats_as_category=True or formats_as_ordered_category=True, but it was not possible to cast variable '{var_name}' to category"
|
|
64
|
+
warnings.warn(msg, RuntimeWarning)
|
|
65
|
+
continue
|
|
66
|
+
# not sure if we get into this situation ever or what would exactly happen, maybe this is not needed?
|
|
67
|
+
elif not df_copy.implementation.is_pandas() and df_copy[var_name].dtype==nw.Unknown:
|
|
68
|
+
msg = f"It was not possible to apply value formats to variable '{var_name}' due to unknown/not supported data type"
|
|
69
|
+
warnings.warn(msg, RuntimeWarning)
|
|
70
|
+
continue
|
|
71
|
+
else:
|
|
72
|
+
df_copy = df_copy.with_columns(nw.col(var_name).replace_strict(labels))
|
|
44
73
|
if formats_as_ordered_category:
|
|
45
74
|
categories = list(set(labels.values()))
|
|
46
75
|
original_values = list(labels.keys())
|
|
@@ -51,16 +80,12 @@ def set_value_labels(dataframe, metadata, formats_as_category=True, formats_as_o
|
|
|
51
80
|
if not revdict.get(curcat):
|
|
52
81
|
revdict[curcat] = orival
|
|
53
82
|
categories.sort(key=revdict.get)
|
|
54
|
-
df_copy
|
|
55
|
-
df_copy[var_name],
|
|
56
|
-
ordered = True,
|
|
57
|
-
categories = categories
|
|
58
|
-
)
|
|
83
|
+
df_copy = df_copy.with_columns(nw.col(var_name).cast(nw.Enum(categories)))
|
|
59
84
|
elif formats_as_category:
|
|
60
|
-
df_copy
|
|
85
|
+
df_copy = df_copy.with_columns(nw.col(var_name).cast(nw.Categorical))
|
|
61
86
|
|
|
62
87
|
|
|
63
|
-
return df_copy
|
|
88
|
+
return df_copy.to_native()
|
|
64
89
|
|
|
65
90
|
def set_catalog_to_sas(sas_dataframe, sas_metadata, catalog_metadata, formats_as_category=True,
|
|
66
91
|
formats_as_ordered_category=False):
|
|
@@ -108,7 +133,8 @@ def set_catalog_to_sas(sas_dataframe, sas_metadata, catalog_metadata, formats_as
|
|
|
108
133
|
metadata.variable_value_labels = variable_value_labels
|
|
109
134
|
|
|
110
135
|
else:
|
|
111
|
-
df_copy = sas_dataframe.copy()
|
|
136
|
+
#df_copy = sas_dataframe.copy()
|
|
137
|
+
df_copy = nw.from_native(sas_dataframe).clone().to_native()
|
|
112
138
|
metadata = deepcopy(sas_metadata)
|
|
113
139
|
|
|
114
140
|
return df_copy, metadata
|
|
Binary file
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyreadstat
|
|
3
|
-
Version: 1.
|
|
4
|
-
Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas data frames.
|
|
3
|
+
Version: 1.3.1
|
|
4
|
+
Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas and polars data frames.
|
|
5
5
|
Home-page: https://github.com/Roche/pyreadstat
|
|
6
6
|
Download-URL: https://github.com/Roche/pyreadstat/dist
|
|
7
7
|
Author: Otto Fajardo
|
|
8
8
|
Author-email: pleasecontactviagithub@notvalid.com
|
|
9
|
-
License: Apache
|
|
9
|
+
License: Apache-2.0
|
|
10
10
|
Classifier: Programming Language :: Python
|
|
11
11
|
Classifier: Programming Language :: Cython
|
|
12
12
|
Classifier: Programming Language :: C
|
|
13
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
13
|
Classifier: Intended Audience :: Science/Research
|
|
15
14
|
Classifier: Topic :: Scientific/Engineering
|
|
16
15
|
Classifier: Environment :: Console
|
|
17
16
|
Description-Content-Type: text/markdown
|
|
18
17
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist:
|
|
18
|
+
Requires-Dist: narwhals>=2.0
|
|
19
|
+
Requires-Dist: numpy
|
|
20
20
|
Dynamic: author
|
|
21
21
|
Dynamic: author-email
|
|
22
22
|
Dynamic: classifier
|
|
@@ -30,7 +30,7 @@ Dynamic: requires-dist
|
|
|
30
30
|
Dynamic: summary
|
|
31
31
|
|
|
32
32
|
A Python package to read and write SAS
|
|
33
|
-
(sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas data frames. It is a wrapper
|
|
33
|
+
(sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas and polars data frames. It is a wrapper
|
|
34
34
|
around the C library readstat.<br>
|
|
35
35
|
Please visit out project home page for more information:<br>
|
|
36
36
|
https://github.com/Roche/pyreadstat
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
|
|
2
|
+
pyreadstat/_readstat_writer.cpython-313-darwin.so,sha256=z1XAOAGYInglsdLawSIYRPkAHWacMcguZ7wq6UhK-HM,677112
|
|
3
|
+
pyreadstat/pyreadstat.cpython-313-darwin.so,sha256=S-MQFiktODhSOHu5DPEWWUsqadZmchXwk4UEXGJCk1k,636128
|
|
4
|
+
pyreadstat/__init__.py,sha256=v8Bii3pv9gaU6zX6BDQgQHtPLhPxu0RGTJDUtInmPKg,1232
|
|
5
|
+
pyreadstat/pyfunctions.py,sha256=JkCsGUT71iLU5onnIhTXzMbzjCjGcAz2HMDB0b3ptIU,7577
|
|
6
|
+
pyreadstat/_readstat_parser.cpython-313-darwin.so,sha256=3gBmrNmT0tz0jjM98Mbiz4R3XbqS-Qfl24y0PBWklOM,637872
|
|
7
|
+
pyreadstat-1.3.1.dist-info/RECORD,,
|
|
8
|
+
pyreadstat-1.3.1.dist-info/WHEEL,sha256=0rn5ODYhsjI3KEHrk9RXNWLZT7Rowo6wA0Jh2iAIJLk,138
|
|
9
|
+
pyreadstat-1.3.1.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
|
|
10
|
+
pyreadstat-1.3.1.dist-info/METADATA,sha256=SnLBSBGd78hVp0WsWeG787hT5FReEHJmJlqPQjL7ZpQ,1245
|
|
11
|
+
pyreadstat-1.3.1.dist-info/licenses/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
|
|
2
|
-
pyreadstat/_readstat_writer.cpython-313-darwin.so,sha256=XBvkLU2g2VKfF7UPi9p0U03t7E3NCorhvsYOZ94nCww,694120
|
|
3
|
-
pyreadstat/pyreadstat.cpython-313-darwin.so,sha256=nNAgTLiJklQs3JSYpM5-hN8tIllCg8ZMF8oLKqm1Yqo,635120
|
|
4
|
-
pyreadstat/__init__.py,sha256=vtiUNptlyFWXtCbTNZGI4CCY3scZ6BnXgcDkLDRxCyc,1232
|
|
5
|
-
pyreadstat/pyfunctions.py,sha256=wnlWbD5o1knLWX28s9ve8jWWv_MFDUTQ7vQUiyNFYmk,5172
|
|
6
|
-
pyreadstat/_readstat_parser.cpython-313-darwin.so,sha256=urrz5rpElwJTyRes0_gRmxammpxSllRTpZFz3poPERc,603280
|
|
7
|
-
pyreadstat-1.2.9.dist-info/RECORD,,
|
|
8
|
-
pyreadstat-1.2.9.dist-info/WHEEL,sha256=7serL2wuoHsjl6sqYTxAO9RZ3KPeoUy7VZm2_MBalZ8,138
|
|
9
|
-
pyreadstat-1.2.9.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
|
|
10
|
-
pyreadstat-1.2.9.dist-info/METADATA,sha256=ST2Ynct8Vku0UNOoE9NE2EaR6M-a3bwniAU17gqmYhA,1281
|
|
11
|
-
pyreadstat-1.2.9.dist-info/licenses/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903
|
|
File without changes
|
|
File without changes
|