pyreadstat 1.2.6__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 1.2.8__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyreadstat might be problematic. Click here for more details.
- pyreadstat/__init__.py +4 -2
- pyreadstat/_readstat_parser.cpython-312-aarch64-linux-gnu.so +0 -0
- pyreadstat/_readstat_writer.cpython-312-aarch64-linux-gnu.so +0 -0
- pyreadstat/pyfunctions.py +115 -0
- pyreadstat/pyreadstat.cpython-312-aarch64-linux-gnu.so +0 -0
- {pyreadstat-1.2.6.dist-info → pyreadstat-1.2.8.dist-info}/METADATA +2 -2
- pyreadstat-1.2.8.dist-info/RECORD +11 -0
- {pyreadstat-1.2.6.dist-info → pyreadstat-1.2.8.dist-info}/WHEEL +1 -1
- pyreadstat-1.2.6.dist-info/RECORD +0 -10
- {pyreadstat-1.2.6.dist-info → pyreadstat-1.2.8.dist-info}/LICENSE +0 -0
- {pyreadstat-1.2.6.dist-info → pyreadstat-1.2.8.dist-info}/top_level.txt +0 -0
pyreadstat/__init__.py
CHANGED
|
@@ -16,9 +16,11 @@
|
|
|
16
16
|
|
|
17
17
|
from .pyreadstat import read_sas7bdat, read_xport, read_dta, read_sav, read_por, read_sas7bcat
|
|
18
18
|
from .pyreadstat import write_sav, write_dta, write_xport, write_por
|
|
19
|
-
from .pyreadstat import set_value_labels, set_catalog_to_sas
|
|
19
|
+
#from .pyreadstat import set_value_labels, set_catalog_to_sas
|
|
20
|
+
#from .pyreadstat import set_catalog_to_sas
|
|
20
21
|
from .pyreadstat import read_file_in_chunks, read_file_multiprocessing
|
|
21
22
|
from ._readstat_parser import ReadstatError, metadata_container
|
|
23
|
+
from .pyfunctions import set_value_labels, set_catalog_to_sas
|
|
22
24
|
|
|
23
|
-
__version__ = "1.2.
|
|
25
|
+
__version__ = "1.2.8"
|
|
24
26
|
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Functions written in pure python
|
|
3
|
+
"""
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
# Functions to deal with value labels
|
|
9
|
+
|
|
10
|
+
def set_value_labels(dataframe, metadata, formats_as_category=True, formats_as_ordered_category=False):
|
|
11
|
+
"""
|
|
12
|
+
Changes the values in the dataframe according to the value formats in the metadata.
|
|
13
|
+
It will return a copy of the dataframe. If no appropiate formats were found, the result will be an unchanged copy
|
|
14
|
+
of the original dataframe.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
dataframe : pandas dataframe
|
|
19
|
+
resulting from parsing a file
|
|
20
|
+
metadata : dictionary
|
|
21
|
+
resulting from parsing a file
|
|
22
|
+
formats_as_category : bool, optional
|
|
23
|
+
defaults to True. If True the variables having formats will be transformed into pandas categories.
|
|
24
|
+
formats_as_ordered_category : bool, optional
|
|
25
|
+
defaults to False. If True the variables having formats will be transformed into pandas ordered categories.
|
|
26
|
+
it has precedence over formats_as_category, meaning if this is True, it will take effect irrespective of
|
|
27
|
+
the value of formats_as_category.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
df_copy : pandas dataframe
|
|
32
|
+
a copy of the original dataframe with the values changed, if appropiate formats were found, unaltered
|
|
33
|
+
otherwise
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
df_copy = dataframe.copy()
|
|
37
|
+
|
|
38
|
+
if metadata.value_labels and metadata.variable_to_label:
|
|
39
|
+
for var_name, label_name in metadata.variable_to_label.items():
|
|
40
|
+
labels = metadata.value_labels.get(label_name)
|
|
41
|
+
if labels:
|
|
42
|
+
if var_name in df_copy.columns:
|
|
43
|
+
df_copy[var_name] = df_copy[var_name].apply(lambda x: labels.get(x, x))
|
|
44
|
+
if formats_as_ordered_category:
|
|
45
|
+
categories = list(set(labels.values()))
|
|
46
|
+
original_values = list(labels.keys())
|
|
47
|
+
original_values.sort()
|
|
48
|
+
revdict= dict()
|
|
49
|
+
for orival in original_values:
|
|
50
|
+
curcat = labels.get(orival)
|
|
51
|
+
if not revdict.get(curcat):
|
|
52
|
+
revdict[curcat] = orival
|
|
53
|
+
categories.sort(key=revdict.get)
|
|
54
|
+
df_copy[var_name] = pd.Categorical(
|
|
55
|
+
df_copy[var_name],
|
|
56
|
+
ordered = True,
|
|
57
|
+
categories = categories
|
|
58
|
+
)
|
|
59
|
+
elif formats_as_category:
|
|
60
|
+
df_copy[var_name] = df_copy[var_name].astype("category")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
return df_copy
|
|
64
|
+
|
|
65
|
+
def set_catalog_to_sas(sas_dataframe, sas_metadata, catalog_metadata, formats_as_category=True,
|
|
66
|
+
formats_as_ordered_category=False):
|
|
67
|
+
"""
|
|
68
|
+
Changes the values in the dataframe and sas_metadata according to the formats in the catalog.
|
|
69
|
+
It will return a copy of the dataframe and metadata. If no appropriate formats were found, the result will
|
|
70
|
+
be an unchanged copy of the original dataframe.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
sas_dataframe : pandas dataframe
|
|
75
|
+
resulting from parsing a sas7bdat file
|
|
76
|
+
sas_metadata : pyreadstat metadata object
|
|
77
|
+
resulting from parsing a sas7bdat file
|
|
78
|
+
catalog_metadata : pyreadstat metadata object
|
|
79
|
+
resulting from parsing a sas7bcat (catalog) file
|
|
80
|
+
formats_as_category : bool, optional
|
|
81
|
+
defaults to True. If True the variables having formats will be transformed into pandas categories.
|
|
82
|
+
formats_as_ordered_category : bool, optional
|
|
83
|
+
defaults to False. If True the variables having formats will be transformed into pandas ordered categories.
|
|
84
|
+
it has precedence over formats_as_category, meaning if this is True, it will take effect irrespective of
|
|
85
|
+
the value of formats_as_category.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
df_copy : pandas dataframe
|
|
90
|
+
a copy of the original dataframe with the values changed, if appropriate formats were found, unaltered
|
|
91
|
+
otherwise
|
|
92
|
+
metadata : dict
|
|
93
|
+
a copy of the original sas_metadata enriched with catalog information if found, otherwise unaltered
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
if catalog_metadata.value_labels and sas_metadata.variable_to_label:
|
|
97
|
+
catalog_metadata_copy = deepcopy(catalog_metadata)
|
|
98
|
+
metadata = deepcopy(sas_metadata)
|
|
99
|
+
metadata.value_labels = catalog_metadata_copy.value_labels
|
|
100
|
+
df_copy = set_value_labels(sas_dataframe, metadata, formats_as_category=formats_as_category,
|
|
101
|
+
formats_as_ordered_category=formats_as_ordered_category)
|
|
102
|
+
|
|
103
|
+
variable_value_labels = dict()
|
|
104
|
+
for var_name, var_label in metadata.variable_to_label.items():
|
|
105
|
+
current_labels = catalog_metadata_copy.value_labels.get(var_label)
|
|
106
|
+
if current_labels:
|
|
107
|
+
variable_value_labels[var_name] = current_labels
|
|
108
|
+
metadata.variable_value_labels = variable_value_labels
|
|
109
|
+
|
|
110
|
+
else:
|
|
111
|
+
df_copy = sas_dataframe.copy()
|
|
112
|
+
metadata = deepcopy(sas_metadata)
|
|
113
|
+
|
|
114
|
+
return df_copy, metadata
|
|
115
|
+
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pyreadstat
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.8
|
|
4
4
|
Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas data frames.
|
|
5
5
|
Home-page: https://github.com/Roche/pyreadstat
|
|
6
6
|
Download-URL: https://github.com/Roche/pyreadstat/dist
|
|
@@ -16,7 +16,7 @@ Classifier: Topic :: Scientific/Engineering
|
|
|
16
16
|
Classifier: Environment :: Console
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: pandas
|
|
19
|
+
Requires-Dist: pandas>=1.2.0
|
|
20
20
|
|
|
21
21
|
A Python package to read and write SAS
|
|
22
22
|
(sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas data frames. It is a wrapper
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
pyreadstat-1.2.8.dist-info/WHEEL,sha256=Z868N0_Fq1ssfDKgnQWj75ig0pzypFewyov-H4g6Btc,153
|
|
2
|
+
pyreadstat-1.2.8.dist-info/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903
|
|
3
|
+
pyreadstat-1.2.8.dist-info/RECORD,,
|
|
4
|
+
pyreadstat-1.2.8.dist-info/METADATA,sha256=HPwSGb59xY_-hzL38nXadw7IvwfWi8VrodMRVe88jsc,1048
|
|
5
|
+
pyreadstat-1.2.8.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
|
|
6
|
+
pyreadstat/_readstat_parser.cpython-312-aarch64-linux-gnu.so,sha256=ePJw-u8VWPsTVTAYhzZExI-cQOeXFavevYIZ5JxOai4,2974848
|
|
7
|
+
pyreadstat/_readstat_writer.cpython-312-aarch64-linux-gnu.so,sha256=OpNKBCve9P7jJZBGu_aO7oDc1O6CbNvrhK1wbQLdjWY,3593024
|
|
8
|
+
pyreadstat/pyreadstat.cpython-312-aarch64-linux-gnu.so,sha256=SjB1ZjKIdHHfNtUqME9BsUaBNkSxt1MHjgvMwhK-BKo,3153016
|
|
9
|
+
pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
|
|
10
|
+
pyreadstat/__init__.py,sha256=hXfWCvrlQQ6KwBgT4b33_1_YgUw0PBmJu-sRmZZGrJo,1232
|
|
11
|
+
pyreadstat/pyfunctions.py,sha256=wnlWbD5o1knLWX28s9ve8jWWv_MFDUTQ7vQUiyNFYmk,5172
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
pyreadstat/_readstat_parser.cpython-312-aarch64-linux-gnu.so,sha256=HHdK_vJU-9z6dQnmeQwwc8dOh9JUEUgBpsA1JAGGK7M,2931584
|
|
2
|
-
pyreadstat/_readstat_writer.cpython-312-aarch64-linux-gnu.so,sha256=GqUbCqddvUz5PTLrT05HSlJv7NTvrJva3TuyQgibnCw,3510304
|
|
3
|
-
pyreadstat/__init__.py,sha256=71erHFOhrvYUKI3X8RA1l_bRBclqHTIEEb__WM1GS4w,1124
|
|
4
|
-
pyreadstat/pyreadstat.cpython-312-aarch64-linux-gnu.so,sha256=x1fv4H1T2zhgnqvy_T62RAKzzShJbfW1wHWGKgSLEXY,3331464
|
|
5
|
-
pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
|
|
6
|
-
pyreadstat-1.2.6.dist-info/WHEEL,sha256=dSOw0vIE7tyToMwjLRGc34v_dTietGsEoaHI_vCu11U,154
|
|
7
|
-
pyreadstat-1.2.6.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
|
|
8
|
-
pyreadstat-1.2.6.dist-info/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903
|
|
9
|
-
pyreadstat-1.2.6.dist-info/RECORD,,
|
|
10
|
-
pyreadstat-1.2.6.dist-info/METADATA,sha256=wFNERS2Kh28D6cZzrKlzSjsY5eh1zRwwEj_Tx6KD4Hg,1049
|
|
File without changes
|
|
File without changes
|