pyreadstat 1.2.6__cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 1.2.8__cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyreadstat might be problematic. Click here for more details.

pyreadstat/__init__.py CHANGED
@@ -16,9 +16,11 @@
16
16
 
17
17
  from .pyreadstat import read_sas7bdat, read_xport, read_dta, read_sav, read_por, read_sas7bcat
18
18
  from .pyreadstat import write_sav, write_dta, write_xport, write_por
19
- from .pyreadstat import set_value_labels, set_catalog_to_sas
19
+ #from .pyreadstat import set_value_labels, set_catalog_to_sas
20
+ #from .pyreadstat import set_catalog_to_sas
20
21
  from .pyreadstat import read_file_in_chunks, read_file_multiprocessing
21
22
  from ._readstat_parser import ReadstatError, metadata_container
23
+ from .pyfunctions import set_value_labels, set_catalog_to_sas
22
24
 
23
- __version__ = "1.2.6"
25
+ __version__ = "1.2.8"
24
26
 
@@ -0,0 +1,115 @@
1
+ """
2
+ Functions written in pure python
3
+ """
4
+ from copy import deepcopy
5
+
6
+ import pandas as pd
7
+
8
+ # Functions to deal with value labels
9
+
10
+ def set_value_labels(dataframe, metadata, formats_as_category=True, formats_as_ordered_category=False):
11
+ """
12
+ Changes the values in the dataframe according to the value formats in the metadata.
13
+ It will return a copy of the dataframe. If no appropiate formats were found, the result will be an unchanged copy
14
+ of the original dataframe.
15
+
16
+ Parameters
17
+ ----------
18
+ dataframe : pandas dataframe
19
+ resulting from parsing a file
20
+ metadata : dictionary
21
+ resulting from parsing a file
22
+ formats_as_category : bool, optional
23
+ defaults to True. If True the variables having formats will be transformed into pandas categories.
24
+ formats_as_ordered_category : bool, optional
25
+ defaults to False. If True the variables having formats will be transformed into pandas ordered categories.
26
+ it has precedence over formats_as_category, meaning if this is True, it will take effect irrespective of
27
+ the value of formats_as_category.
28
+
29
+ Returns
30
+ -------
31
+ df_copy : pandas dataframe
32
+ a copy of the original dataframe with the values changed, if appropiate formats were found, unaltered
33
+ otherwise
34
+ """
35
+
36
+ df_copy = dataframe.copy()
37
+
38
+ if metadata.value_labels and metadata.variable_to_label:
39
+ for var_name, label_name in metadata.variable_to_label.items():
40
+ labels = metadata.value_labels.get(label_name)
41
+ if labels:
42
+ if var_name in df_copy.columns:
43
+ df_copy[var_name] = df_copy[var_name].apply(lambda x: labels.get(x, x))
44
+ if formats_as_ordered_category:
45
+ categories = list(set(labels.values()))
46
+ original_values = list(labels.keys())
47
+ original_values.sort()
48
+ revdict= dict()
49
+ for orival in original_values:
50
+ curcat = labels.get(orival)
51
+ if not revdict.get(curcat):
52
+ revdict[curcat] = orival
53
+ categories.sort(key=revdict.get)
54
+ df_copy[var_name] = pd.Categorical(
55
+ df_copy[var_name],
56
+ ordered = True,
57
+ categories = categories
58
+ )
59
+ elif formats_as_category:
60
+ df_copy[var_name] = df_copy[var_name].astype("category")
61
+
62
+
63
+ return df_copy
64
+
65
+ def set_catalog_to_sas(sas_dataframe, sas_metadata, catalog_metadata, formats_as_category=True,
66
+ formats_as_ordered_category=False):
67
+ """
68
+ Changes the values in the dataframe and sas_metadata according to the formats in the catalog.
69
+ It will return a copy of the dataframe and metadata. If no appropriate formats were found, the result will
70
+ be an unchanged copy of the original dataframe.
71
+
72
+ Parameters
73
+ ----------
74
+ sas_dataframe : pandas dataframe
75
+ resulting from parsing a sas7bdat file
76
+ sas_metadata : pyreadstat metadata object
77
+ resulting from parsing a sas7bdat file
78
+ catalog_metadata : pyreadstat metadata object
79
+ resulting from parsing a sas7bcat (catalog) file
80
+ formats_as_category : bool, optional
81
+ defaults to True. If True the variables having formats will be transformed into pandas categories.
82
+ formats_as_ordered_category : bool, optional
83
+ defaults to False. If True the variables having formats will be transformed into pandas ordered categories.
84
+ it has precedence over formats_as_category, meaning if this is True, it will take effect irrespective of
85
+ the value of formats_as_category.
86
+
87
+ Returns
88
+ -------
89
+ df_copy : pandas dataframe
90
+ a copy of the original dataframe with the values changed, if appropriate formats were found, unaltered
91
+ otherwise
92
+ metadata : dict
93
+ a copy of the original sas_metadata enriched with catalog information if found, otherwise unaltered
94
+ """
95
+
96
+ if catalog_metadata.value_labels and sas_metadata.variable_to_label:
97
+ catalog_metadata_copy = deepcopy(catalog_metadata)
98
+ metadata = deepcopy(sas_metadata)
99
+ metadata.value_labels = catalog_metadata_copy.value_labels
100
+ df_copy = set_value_labels(sas_dataframe, metadata, formats_as_category=formats_as_category,
101
+ formats_as_ordered_category=formats_as_ordered_category)
102
+
103
+ variable_value_labels = dict()
104
+ for var_name, var_label in metadata.variable_to_label.items():
105
+ current_labels = catalog_metadata_copy.value_labels.get(var_label)
106
+ if current_labels:
107
+ variable_value_labels[var_name] = current_labels
108
+ metadata.variable_value_labels = variable_value_labels
109
+
110
+ else:
111
+ df_copy = sas_dataframe.copy()
112
+ metadata = deepcopy(sas_metadata)
113
+
114
+ return df_copy, metadata
115
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pyreadstat
3
- Version: 1.2.6
3
+ Version: 1.2.8
4
4
  Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas data frames.
5
5
  Home-page: https://github.com/Roche/pyreadstat
6
6
  Download-URL: https://github.com/Roche/pyreadstat/dist
@@ -16,7 +16,7 @@ Classifier: Topic :: Scientific/Engineering
16
16
  Classifier: Environment :: Console
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: pandas >=1.2.0
19
+ Requires-Dist: pandas>=1.2.0
20
20
 
21
21
  A Python package to read and write SAS
22
22
  (sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas data frames. It is a wrapper
@@ -0,0 +1,11 @@
1
+ pyreadstat-1.2.8.dist-info/WHEEL,sha256=IjQRalpktecrnjQ0mlgISGBVe9z7Fv6GdQT0dw4Gf_A,153
2
+ pyreadstat-1.2.8.dist-info/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903
3
+ pyreadstat-1.2.8.dist-info/RECORD,,
4
+ pyreadstat-1.2.8.dist-info/METADATA,sha256=HPwSGb59xY_-hzL38nXadw7IvwfWi8VrodMRVe88jsc,1048
5
+ pyreadstat-1.2.8.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
6
+ pyreadstat/pyreadstat.cpython-310-aarch64-linux-gnu.so,sha256=vKjm8joR164qzEFXTWM6UUyZ36G9WI5Q9HZdVc7pOIE,2895824
7
+ pyreadstat/_readstat_parser.cpython-310-aarch64-linux-gnu.so,sha256=Ckb1iUbAri1IwhJSGHr7eBGhJK5_1IJZrGV1xndrc54,2869752
8
+ pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
9
+ pyreadstat/__init__.py,sha256=hXfWCvrlQQ6KwBgT4b33_1_YgUw0PBmJu-sRmZZGrJo,1232
10
+ pyreadstat/pyfunctions.py,sha256=wnlWbD5o1knLWX28s9ve8jWWv_MFDUTQ7vQUiyNFYmk,5172
11
+ pyreadstat/_readstat_writer.cpython-310-aarch64-linux-gnu.so,sha256=1SuxUbJbBmpjKYqdZeYjPPRfB1WhL8yCdAKJ2bHwr8k,3340704
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp310-cp310-manylinux_2_17_aarch64
5
5
  Tag: cp310-cp310-manylinux2014_aarch64
@@ -1,10 +0,0 @@
1
- pyreadstat/_readstat_writer.cpython-310-aarch64-linux-gnu.so,sha256=rfOrQ51Plgo8ci4ZoupMfB6oVspixXMVMx3vXZb1Wy4,3266328
2
- pyreadstat/__init__.py,sha256=71erHFOhrvYUKI3X8RA1l_bRBclqHTIEEb__WM1GS4w,1124
3
- pyreadstat/_readstat_parser.cpython-310-aarch64-linux-gnu.so,sha256=kq7tlapATc3zSf11GHw6mPwN1nMgzHBqQ1pCG_snTB0,2821816
4
- pyreadstat/pyreadstat.cpython-310-aarch64-linux-gnu.so,sha256=RQ4E-OsY73JnhIzwYoQLRzovoj5FGJiF1Oa99qPw-bs,3058024
5
- pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
6
- pyreadstat-1.2.6.dist-info/WHEEL,sha256=7-IJbjbL0nWUFP_cSrTULDrfI4l8JN0GBqGHPqza7ao,154
7
- pyreadstat-1.2.6.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
8
- pyreadstat-1.2.6.dist-info/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903
9
- pyreadstat-1.2.6.dist-info/RECORD,,
10
- pyreadstat-1.2.6.dist-info/METADATA,sha256=wFNERS2Kh28D6cZzrKlzSjsY5eh1zRwwEj_Tx6KD4Hg,1049