pyreadstat 1.3.0__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl → 1.3.1__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyreadstat might be problematic. Click here for more details.

pyreadstat/__init__.py CHANGED
@@ -22,5 +22,5 @@ from .pyreadstat import read_file_in_chunks, read_file_multiprocessing
22
22
  from ._readstat_parser import ReadstatError, metadata_container
23
23
  from .pyfunctions import set_value_labels, set_catalog_to_sas
24
24
 
25
- __version__ = "1.3.0"
25
+ __version__ = "1.3.1"
26
26
 
pyreadstat/pyfunctions.py CHANGED
@@ -1,9 +1,10 @@
1
1
  """
2
2
  Functions written in pure python
3
3
  """
4
- from copy import deepcopy
4
+ from copy import deepcopy, copy
5
+ import warnings
5
6
 
6
- import pandas as pd
7
+ import narwhals.stable.v2 as nw
7
8
 
8
9
  # Functions to deal with value labels
9
10
 
@@ -33,14 +34,42 @@ def set_value_labels(dataframe, metadata, formats_as_category=True, formats_as_o
33
34
  otherwise
34
35
  """
35
36
 
36
- df_copy = dataframe.copy()
37
+ df_copy = nw.from_native(dataframe).clone()
37
38
 
38
39
  if metadata.value_labels and metadata.variable_to_label:
39
40
  for var_name, label_name in metadata.variable_to_label.items():
40
41
  labels = metadata.value_labels.get(label_name)
41
42
  if labels:
43
+ labels = deepcopy(labels)
42
44
  if var_name in df_copy.columns:
43
- df_copy[var_name] = df_copy[var_name].apply(lambda x: labels.get(x, x))
45
+ # unique does not work for polars Object
46
+ if not df_copy.implementation.is_pandas() and df_copy[var_name].dtype == nw.Object:
47
+ unvals = list(set(df_copy[var_name].to_list()))
48
+ else:
49
+ unvals = df_copy[var_name].unique()
50
+ for uval in unvals:
51
+ if uval not in labels:
52
+ labels[uval] = uval
53
+ # if all values are null, there will be nothing to replace. However we cannot do replace_strict on null dtype, it raises an error
54
+ if not df_copy.implementation.is_pandas() and (len(df_copy[var_name])==df_copy[var_name].null_count()):
55
+ continue
56
+ # replace_strict requires that all the values are in the map. Could not get map_batches or when/then/otherwise to work
57
+ elif not df_copy.implementation.is_pandas() and (df_copy[var_name].dtype==nw.Object or not all([type(v)==type(list(labels.values())[0]) for v in labels.values() if v is not None])):
58
+ # polars is very difficult to convince to mix strings and numbers, so we have to do it this way
59
+ temp = [labels[x] for x in df_copy[var_name]]
60
+ newser = nw.new_series(name=var_name, values= temp, dtype=nw.Object, backend=df_copy.implementation)
61
+ df_copy = df_copy.with_columns(newser.alias(var_name))
62
+ if formats_as_category or formats_as_ordered_category:
63
+ msg = f"You requested formats_as_category=True or formats_as_ordered_category=True, but it was not possible to cast variable '{var_name}' to category"
64
+ warnings.warn(msg, RuntimeWarning)
65
+ continue
66
+ # not sure if we get into this situation ever or what would exactly happen, maybe this is not needed?
67
+ elif not df_copy.implementation.is_pandas() and df_copy[var_name].dtype==nw.Unknown:
68
+ msg = f"It was not possible to apply value formats to variable '{var_name}' due to unknown/not supported data type"
69
+ warnings.warn(msg, RuntimeWarning)
70
+ continue
71
+ else:
72
+ df_copy = df_copy.with_columns(nw.col(var_name).replace_strict(labels))
44
73
  if formats_as_ordered_category:
45
74
  categories = list(set(labels.values()))
46
75
  original_values = list(labels.keys())
@@ -51,16 +80,12 @@ def set_value_labels(dataframe, metadata, formats_as_category=True, formats_as_o
51
80
  if not revdict.get(curcat):
52
81
  revdict[curcat] = orival
53
82
  categories.sort(key=revdict.get)
54
- df_copy[var_name] = pd.Categorical(
55
- df_copy[var_name],
56
- ordered = True,
57
- categories = categories
58
- )
83
+ df_copy = df_copy.with_columns(nw.col(var_name).cast(nw.Enum(categories)))
59
84
  elif formats_as_category:
60
- df_copy[var_name] = df_copy[var_name].astype("category")
85
+ df_copy = df_copy.with_columns(nw.col(var_name).cast(nw.Categorical))
61
86
 
62
87
 
63
- return df_copy
88
+ return df_copy.to_native()
64
89
 
65
90
  def set_catalog_to_sas(sas_dataframe, sas_metadata, catalog_metadata, formats_as_category=True,
66
91
  formats_as_ordered_category=False):
@@ -108,7 +133,8 @@ def set_catalog_to_sas(sas_dataframe, sas_metadata, catalog_metadata, formats_as
108
133
  metadata.variable_value_labels = variable_value_labels
109
134
 
110
135
  else:
111
- df_copy = sas_dataframe.copy()
136
+ #df_copy = sas_dataframe.copy()
137
+ df_copy = nw.from_native(sas_dataframe).clone().to_native()
112
138
  metadata = deepcopy(sas_metadata)
113
139
 
114
140
  return df_copy, metadata
@@ -1,22 +1,22 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyreadstat
3
- Version: 1.3.0
4
- Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas data frames.
3
+ Version: 1.3.1
4
+ Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas and polars data frames.
5
5
  Home-page: https://github.com/Roche/pyreadstat
6
6
  Download-URL: https://github.com/Roche/pyreadstat/dist
7
7
  Author: Otto Fajardo
8
8
  Author-email: pleasecontactviagithub@notvalid.com
9
- License: Apache License Version 2.0
9
+ License: Apache-2.0
10
10
  Classifier: Programming Language :: Python
11
11
  Classifier: Programming Language :: Cython
12
12
  Classifier: Programming Language :: C
13
- Classifier: License :: OSI Approved :: Apache Software License
14
13
  Classifier: Intended Audience :: Science/Research
15
14
  Classifier: Topic :: Scientific/Engineering
16
15
  Classifier: Environment :: Console
17
16
  Description-Content-Type: text/markdown
18
17
  License-File: LICENSE
19
- Requires-Dist: pandas>=1.2.0
18
+ Requires-Dist: narwhals>=2.0
19
+ Requires-Dist: numpy
20
20
  Dynamic: author
21
21
  Dynamic: author-email
22
22
  Dynamic: classifier
@@ -30,7 +30,7 @@ Dynamic: requires-dist
30
30
  Dynamic: summary
31
31
 
32
32
  A Python package to read and write SAS
33
- (sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas data frames. It is a wrapper
33
+ (sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas and polars data frames. It is a wrapper
34
34
  around the C library readstat.<br>
35
35
  Please visit out project home page for more information:<br>
36
36
  https://github.com/Roche/pyreadstat
@@ -0,0 +1,11 @@
1
+ pyreadstat/__init__.py,sha256=v8Bii3pv9gaU6zX6BDQgQHtPLhPxu0RGTJDUtInmPKg,1232
2
+ pyreadstat/_readstat_parser.cpython-311-aarch64-linux-gnu.so,sha256=4cb8f1xPOJqeRJ1NB-kF1sruJYtEzKXXwObX0TDw-ZQ,632152
3
+ pyreadstat/_readstat_writer.cpython-311-aarch64-linux-gnu.so,sha256=KpevgVSi8UiyEsVnGh1pvXNsKWA0iGS3KKeMN9qDpVY,625920
4
+ pyreadstat/pyfunctions.py,sha256=JkCsGUT71iLU5onnIhTXzMbzjCjGcAz2HMDB0b3ptIU,7577
5
+ pyreadstat/pyreadstat.cpython-311-aarch64-linux-gnu.so,sha256=Kj_Me2tOmzRPjFvzbeLgR1GJCvD61zO4wuPIaTj37cc,627392
6
+ pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
7
+ pyreadstat-1.3.1.dist-info/METADATA,sha256=SnLBSBGd78hVp0WsWeG787hT5FReEHJmJlqPQjL7ZpQ,1245
8
+ pyreadstat-1.3.1.dist-info/WHEEL,sha256=QJg38rE8f0PT7_ZWlFpvwOoUFGenUbSJhXM-6SbDiao,153
9
+ pyreadstat-1.3.1.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
10
+ pyreadstat-1.3.1.dist-info/RECORD,,
11
+ pyreadstat-1.3.1.dist-info/licenses/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903
@@ -1,11 +0,0 @@
1
- pyreadstat/__init__.py,sha256=wIENGF3x1VBer_Tp0r5e8PxGlcz5jDU_Wu2T0qHKya4,1232
2
- pyreadstat/_readstat_parser.cpython-311-aarch64-linux-gnu.so,sha256=s9Yv7IalNPuMtvG7d-Eu8lqFewTS23JDY79xxO-sg5s,630336
3
- pyreadstat/_readstat_writer.cpython-311-aarch64-linux-gnu.so,sha256=3ZkVQWom4D2E0d5EJFnkzSW92RsjYuBX3MEiVpyoJts,692800
4
- pyreadstat/pyfunctions.py,sha256=wnlWbD5o1knLWX28s9ve8jWWv_MFDUTQ7vQUiyNFYmk,5172
5
- pyreadstat/pyreadstat.cpython-311-aarch64-linux-gnu.so,sha256=qHeQzIKzdLfP4MdLRZ4gVLLDP9YU3z_oxmPjrMcNoo8,626928
6
- pyreadstat/worker.py,sha256=DHA7KXzZ3KSUSiYrepD7RpABPSLCYDq3-hOcoaFNBaI,972
7
- pyreadstat-1.3.0.dist-info/METADATA,sha256=kqbzYyVqeHb4XpJzpmbwa0YIyvftxZtdcYt0uUEVMCo,1281
8
- pyreadstat-1.3.0.dist-info/WHEEL,sha256=QJg38rE8f0PT7_ZWlFpvwOoUFGenUbSJhXM-6SbDiao,153
9
- pyreadstat-1.3.0.dist-info/top_level.txt,sha256=7LlluhR4SADp00dJTEVpKMet_Jki7JHA6abJ-wu831E,11
10
- pyreadstat-1.3.0.dist-info/RECORD,,
11
- pyreadstat-1.3.0.dist-info/licenses/LICENSE,sha256=Tjohfl1RlkuDoTF5ctnLvkGnr8TU27PEy7PhOHjRz5c,12903