pyreadstat 1.2.9__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyreadstat might be problematic. Click here for more details.

Files changed (103) hide show
  1. {pyreadstat-1.2.9/pyreadstat.egg-info → pyreadstat-1.3.1}/PKG-INFO +18 -7
  2. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/README.md +42 -35
  3. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/__init__.py +1 -1
  4. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/_readstat_parser.c +11089 -10246
  5. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/_readstat_parser.pxd +5 -2
  6. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/_readstat_parser.pyx +157 -51
  7. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/_readstat_writer.c +15418 -15802
  8. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/_readstat_writer.pxd +12 -5
  9. pyreadstat-1.3.1/pyreadstat/_readstat_writer.pyx +986 -0
  10. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/pyfunctions.py +38 -12
  11. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/pyreadstat.c +9281 -8804
  12. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/pyreadstat.pyx +71 -70
  13. {pyreadstat-1.2.9 → pyreadstat-1.3.1/pyreadstat.egg-info}/PKG-INFO +18 -7
  14. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat.egg-info/SOURCES.txt +44 -0
  15. pyreadstat-1.3.1/pyreadstat.egg-info/requires.txt +2 -0
  16. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/setup.py +30 -8
  17. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_sas.c +7 -2
  18. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_sas7bcat_read.c +12 -2
  19. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_sas7bcat_write.c +8 -2
  20. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_sas7bdat_read.c +13 -5
  21. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_por_read.c +1 -2
  22. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_por_write.c +2 -3
  23. pyreadstat-1.3.1/src/spss/readstat_sav_parse.c +843 -0
  24. pyreadstat-1.3.1/src/spss/readstat_sav_parse_mr_name.c +546 -0
  25. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav_read.c +9 -12
  26. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/tests/test_basic.py +3 -3
  27. pyreadstat-1.3.1/tests/test_narwhalified.py +1323 -0
  28. pyreadstat-1.2.9/pyreadstat.egg-info/requires.txt +0 -1
  29. pyreadstat-1.2.9/src/spss/readstat_sav_parse.c +0 -872
  30. pyreadstat-1.2.9/src/spss/readstat_sav_parse_mr_name.c +0 -468
  31. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/LICENSE +0 -0
  32. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/MANIFEST.in +0 -0
  33. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyproject.toml +0 -0
  34. /pyreadstat-1.2.9/pyreadstat/_readstat_writer.pyx → /pyreadstat-1.3.1/pyreadstat/_readstat_writer_pandas.pyx +0 -0
  35. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/conditional_includes.h +0 -0
  36. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/pyreadstat.pxd +0 -0
  37. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/readstat_api.pxd +0 -0
  38. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat/worker.py +0 -0
  39. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat.egg-info/dependency_links.txt +0 -0
  40. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/pyreadstat.egg-info/top_level.txt +0 -0
  41. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/setup.cfg +0 -0
  42. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/CKHashTable.c +0 -0
  43. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/CKHashTable.h +0 -0
  44. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat.h +0 -0
  45. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_bits.c +0 -0
  46. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_bits.h +0 -0
  47. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_convert.c +0 -0
  48. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_convert.h +0 -0
  49. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_error.c +0 -0
  50. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_iconv.h +0 -0
  51. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_io_unistd.c +0 -0
  52. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_io_unistd.h +0 -0
  53. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_malloc.c +0 -0
  54. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_malloc.h +0 -0
  55. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_metadata.c +0 -0
  56. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_parser.c +0 -0
  57. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_strings.h +0 -0
  58. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_value.c +0 -0
  59. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_variable.c +0 -0
  60. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_writer.c +0 -0
  61. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/readstat_writer.h +0 -0
  62. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/ieee.c +0 -0
  63. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/ieee.h +0 -0
  64. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_sas.h +0 -0
  65. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_sas7bdat_write.c +0 -0
  66. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_sas_rle.c +0 -0
  67. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_sas_rle.h +0 -0
  68. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_xport.c +0 -0
  69. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_xport.h +0 -0
  70. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_xport_parse_format.c +0 -0
  71. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_xport_parse_format.h +0 -0
  72. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_xport_read.c +0 -0
  73. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/sas/readstat_xport_write.c +0 -0
  74. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_por.c +0 -0
  75. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_por.h +0 -0
  76. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_por_parse.c +0 -0
  77. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_por_parse.h +0 -0
  78. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav.c +0 -0
  79. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav.h +0 -0
  80. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav_compress.c +0 -0
  81. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav_compress.h +0 -0
  82. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav_parse.h +0 -0
  83. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav_parse_mr_name.h +0 -0
  84. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav_parse_timestamp.c +0 -0
  85. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav_parse_timestamp.h +0 -0
  86. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_sav_write.c +0 -0
  87. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_spss.c +0 -0
  88. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_spss.h +0 -0
  89. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_spss_parse.c +0 -0
  90. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_spss_parse.h +0 -0
  91. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_zsav_compress.c +0 -0
  92. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_zsav_compress.h +0 -0
  93. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_zsav_read.c +0 -0
  94. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_zsav_read.h +0 -0
  95. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_zsav_write.c +0 -0
  96. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/spss/readstat_zsav_write.h +0 -0
  97. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/stata/readstat_dta.c +0 -0
  98. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/stata/readstat_dta.h +0 -0
  99. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/stata/readstat_dta_parse_timestamp.c +0 -0
  100. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/stata/readstat_dta_parse_timestamp.h +0 -0
  101. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/stata/readstat_dta_read.c +0 -0
  102. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/src/stata/readstat_dta_write.c +0 -0
  103. {pyreadstat-1.2.9 → pyreadstat-1.3.1}/tests/test_version.py +0 -0
@@ -1,25 +1,36 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: pyreadstat
3
- Version: 1.2.9
4
- Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas data frames.
3
+ Version: 1.3.1
4
+ Summary: Reads and Writes SAS, SPSS and Stata files into/from pandas and polars data frames.
5
5
  Home-page: https://github.com/Roche/pyreadstat
6
6
  Download-URL: https://github.com/Roche/pyreadstat/dist
7
7
  Author: Otto Fajardo
8
8
  Author-email: pleasecontactviagithub@notvalid.com
9
- License: Apache License Version 2.0
9
+ License: Apache-2.0
10
10
  Classifier: Programming Language :: Python
11
11
  Classifier: Programming Language :: Cython
12
12
  Classifier: Programming Language :: C
13
- Classifier: License :: OSI Approved :: Apache Software License
14
13
  Classifier: Intended Audience :: Science/Research
15
14
  Classifier: Topic :: Scientific/Engineering
16
15
  Classifier: Environment :: Console
17
16
  Description-Content-Type: text/markdown
18
17
  License-File: LICENSE
19
- Requires-Dist: pandas>=1.2.0
18
+ Requires-Dist: narwhals>=2.0
19
+ Requires-Dist: numpy
20
+ Dynamic: author
21
+ Dynamic: author-email
22
+ Dynamic: classifier
23
+ Dynamic: description
24
+ Dynamic: description-content-type
25
+ Dynamic: download-url
26
+ Dynamic: home-page
27
+ Dynamic: license
28
+ Dynamic: license-file
29
+ Dynamic: requires-dist
30
+ Dynamic: summary
20
31
 
21
32
  A Python package to read and write SAS
22
- (sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas data frames. It is a wrapper
33
+ (sas7bdat, sas7bcat, xport/xpt), SPSS (sav, zsav, por) and Stata (dta) files into/from pandas and polars data frames. It is a wrapper
23
34
  around the C library readstat.<br>
24
35
  Please visit out project home page for more information:<br>
25
36
  https://github.com/Roche/pyreadstat
@@ -1,7 +1,7 @@
1
1
  # pyreadstat
2
2
 
3
3
  A python package to read and write sas (sas7bdat, sas7bcat, xport), spps (sav, zsav, por) and stata (dta) data files
4
- into/from pandas dataframes.
4
+ into/from pandas and polars dataframes.
5
5
  <br>
6
6
 
7
7
  This module is a wrapper around the excellent [Readstat](https://github.com/WizardMac/ReadStat) C library by
@@ -133,7 +133,8 @@ brings a big hit in performance. The situation can be improved tough by reading
133
133
 
134
134
  ## Dependencies
135
135
 
136
- The module depends on pandas, which you normally have installed if you got Anaconda (highly recommended.)
136
+ The module depends on numpy and narwhals, a package to interface with pandas and polars. In addition you will need to have installed
137
+ either pandas or polars.
137
138
 
138
139
  In order to compile from source you will need a C compiler (see installation).
139
140
  Only if you want to do changes to the cython source code, you will need cython (normally not necessary).
@@ -222,7 +223,7 @@ the folder build, otherwise you may be installing the old compilation again).
222
223
 
223
224
  #### Reading files
224
225
 
225
- Pass the path to a file to any of the functions provided by pyreadstat. It will return a pandas data frame and a metadata
226
+ Pass the path to a file to any of the functions provided by pyreadstat. It will return a pandas or polars data frame and a metadata
226
227
  object. <br>
227
228
  The dataframe uses the column names. The metadata object contains the column names, column labels, number_rows,
228
229
  number_columns, file label
@@ -234,7 +235,8 @@ For example, in order to read a sas7bdat file:
234
235
  ```python
235
236
  import pyreadstat
236
237
 
237
- df, meta = pyreadstat.read_sas7bdat('/path/to/a/file.sas7bdat')
238
+ # output format by default is pandas. You can use polars to get a polars dataframe.
239
+ df, meta = pyreadstat.read_sas7bdat('/path/to/a/file.sas7bdat', output_format="pandas")
238
240
 
239
241
  # done! let's see what we got
240
242
  print(df.head())
@@ -257,25 +259,38 @@ df.columns = meta.column_labels
257
259
  df.columns = meta.column_names
258
260
  ```
259
261
 
262
+ As mentioned before you can very easily read into a polars dataframe by using the output_format argument:
263
+
264
+ ```python
265
+ import pyreadstat
266
+
267
+ # this time df will be polars
268
+ df, meta = pyreadstat.read_sas7bdat('/path/to/a/file.sas7bdat', output_format="polars")
269
+
270
+ # done! let's see what we got
271
+ print(df.head())
272
+ ```
273
+
260
274
  #### Writing files
261
275
 
262
276
  Pyreadstat can write STATA (dta), SPSS (sav and zsav, por currently nor supported) and SAS (Xport, sas7bdat and sas7bcat
263
- currently not supported) files from pandas data frames.
277
+ currently not supported) files from pandas or polars dataframes.
264
278
 
265
- write functions take as first argument a pandas data frame (other data structures are not supported), as a second argument
279
+ write functions take as first argument a pandas or polars dataframe (other data structures are not supported), as a second argument
266
280
  the path to the destination file. Optionally you can also pass a file label and a list with column labels.
267
281
 
268
282
  ```python
269
283
  import pandas as pd
270
284
  import pyreadstat
271
285
 
286
+ # this would work the same for a polars dataframe
272
287
  df = pd.DataFrame([[1,2.0,"a"],[3,4.0,"b"]], columns=["v1", "v2", "v3"])
273
288
  # column_labels can also be a dictionary with variable name as key and label as value
274
289
  column_labels = ["Variable 1", "Variable 2", "Variable 3"]
275
290
  pyreadstat.write_sav(df, "path/to/destination.sav", file_label="test", column_labels=column_labels)
276
291
  ```
277
292
 
278
- Some special arguments are available depending on the function. write_sav can take also notes as string, wheter to
293
+ Some special arguments are available depending on the function. write_sav can take also notes as string or list of strings, wheter to
279
294
  compress or not as zsav or apply row compression, variable display widths and variable measures. write_dta can take a stata version.
280
295
  write_xport a name for the dataset. User defined missing values and value labels are also supported. See the
281
296
  [Module documentation](https://ofajardo.github.io/pyreadstat_documentation/_build/html/index.html) for more details.
@@ -434,7 +449,7 @@ function. The original values will be replaced by the values in the catalog.
434
449
  ```python
435
450
  import pyreadstat
436
451
 
437
- # formats_as_category is by default True, and it means the replaced values will be transformed to a pandas category column. There is also formats_as_ordered_category to get an ordered category, this by default is False.
452
+ # formats_as_category is by default True, and it means the replaced values will be transformed to a pandas/polars category column. There is also formats_as_ordered_category to get an ordered category, this by default is False.
438
453
  df, meta = pyreadstat.read_sas7bdat('/path/to/a/file.sas7bdat', catalog_file='/path/to/a/file.sas7bcat', formats_as_category=True, formats_as_ordered_category=False)
439
454
  ```
440
455
 
@@ -449,7 +464,7 @@ df, meta = pyreadstat.read_sas7bdat('/path/to/a/file.sas7bdat')
449
464
  # read_sas7bdat returns an emtpy data frame and the catalog
450
465
  df_empty, catalog = pyreadstat.read_sas7bdat('/path/to/a/file.sas7bcat')
451
466
  # enrich the dataframe with the catalog
452
- # formats_as_category is by default True, and it means the replaced values will be transformed to a pandas category column. formats_as_ordered_category is by default False meaning by default categories are not ordered.
467
+ # formats_as_category is by default True, and it means the replaced values will be transformed to a pandas/polars category column. formats_as_ordered_category is by default False meaning by default categories are not ordered.
453
468
  df_enriched, meta_enriched = pyreadstat.set_catalog_to_sas(df, meta, catalog,
454
469
  formats_as_category=True, formats_as_ordered_category=False)
455
470
  ```
@@ -461,7 +476,7 @@ when reading the file using the option apply_value_formats, ...
461
476
  import pyreadstat
462
477
 
463
478
  # apply_value_formats is by default False, so you have to set it to True manually if you want the labels
464
- # formats_as_category is by default True, and it means the replaced values will be transformed to a pandas category column. formats_as_ordered_category is by default False meaning by default categories are not ordered.
479
+ # formats_as_category is by default True, and it means the replaced values will be transformed to a pandas/polars category column. formats_as_ordered_category is by default False meaning by default categories are not ordered.
465
480
  df, meta = pyreadstat.read_sav("/path/to/sav/file.sav", apply_value_formats=True,
466
481
  formats_as_category=True, formats_as_ordered_category=False)
467
482
  ```
@@ -530,9 +545,9 @@ example if one has a categorical variable representing if the person passed a te
530
545
  1 for pass, and as user defined missing variables 2 for did not show up for the test, 3 for unable to process the results,
531
546
  etc.
532
547
 
533
- **By default both cases are represented by NaN when
548
+ **By default both cases are represented by NaN in pandas and null in polars when
534
549
  read with pyreadstat**. Notice that the only possible missing value in pandas is NaN (Not a Number) for both string and numeric
535
- variables, date, datetime and time variables have NaT (Not a Time).
550
+ variables, date, datetime and time variables have NaT (Not a Time). Polars use null for all datatypes.
536
551
 
537
552
  ##### SPSS
538
553
 
@@ -599,16 +614,16 @@ translated as NaN by default and to the correspoding string value if
599
614
  user_missing is set to True. meta.missing_ranges will show the string
600
615
  value as well.
601
616
 
602
- When writing a pandas dataframe to a sav file, if user defined missing values are not set, NaNs are translated to
617
+ When writing a dataframe to a sav file, if user defined missing values are not set, NaNs are translated to
603
618
  empty strings, as there is no other possibility to represent those missing values and user defined missing values
604
619
  are not set automatically.
605
620
 
606
- When reading a sav into a pandas dataframe, if the value in
607
- a character variable is an empty string (''), it will not be translated to NaN, but will stay as an empty string. This
621
+ When reading a sav into a dataframe, if the value in
622
+ a character variable is an empty string (''), it will not be translated to NaN/null, but will stay as an empty string. This
608
623
  is because the empty string is a valid character value in SPSS and pyreadstat preserves that property.
609
624
 
610
625
  This behaviour generates an asymetrical situation that has to be managed by the user. You can convert
611
- empty strings to nan very easily with pandas if you think it is appropiate
626
+ empty strings to nan very easily if you think it is appropiate
612
627
  for your dataset, or you can use defined missing values as described before.
613
628
 
614
629
 
@@ -700,7 +715,7 @@ df, meta = pyreadstat.read_sas7bdat('/path/to/a/file.sas7bdat', encoding="LATIN1
700
715
  ```
701
716
 
702
717
  You can preserve the original pandas behavior regarding dates (meaning dates are converted to pandas datetime) with the
703
- dates_as_pandas_datetime option
718
+ dates_as_pandas_datetime option. This option is effective for pandas only, not for polars.
704
719
 
705
720
  ```python
706
721
  import pyreadstat
@@ -708,18 +723,10 @@ import pyreadstat
708
723
  df, meta = pyreadstat.read_sas7bdat('/path/to/a/file.sas7bdat', dates_as_pandas_datetime=True)
709
724
  ```
710
725
 
711
- You can get a dictionary of numpy arrays instead of a pandas dataframe when reading any file format.
712
- In order to do that, set the parameter output_format='dict' (default is 'pandas'). This is useful if
713
- you want to transform the data to some other format different to pandas, as transforming the data to pandas is a costly
714
- process both in terms of speed and memory. Here for example an efficient way to transform the data to a polars dataframe:
715
-
716
- ```python
717
- import pyreadstat
718
- import polars
719
-
720
- dicdata, meta = pyreadstat.read_sav('/path/to/a/file.sav', output_format='dict')
721
- df = polars.DataFrame(dicdata)
722
- ```
726
+ You can get a dictionary of numpy arrays instead of a pandas or polars dataframe when reading any file format.
727
+ In order to do that, set the parameter output_format='dict' (default is 'pandas', the other option is 'polars'). This is useful if
728
+ you want to transform the data to some other format different to pandas/polars, as transforming the data to pandas is a costly
729
+ process both in terms of speed and memory.
723
730
 
724
731
  For more information, please check the [Module documentation](https://ofajardo.github.io/pyreadstat_documentation/_build/html/index.html).
725
732
 
@@ -727,7 +734,7 @@ For more information, please check the [Module documentation](https://ofajardo.g
727
734
 
728
735
  #### File specific options
729
736
 
730
- Some special arguments are available depending on the function. write_sav can take also notes as string, wheter to
737
+ Some special arguments are available depending on the function. write_sav can take also notes as string or list of strings, wheter to
731
738
  compress or not as zsav or apply row compression, variable display widths and variable measures. write_dta can take a stata version.
732
739
  write_xport a name for the dataset. See the
733
740
  [Module documentation](https://ofajardo.github.io/pyreadstat_documentation/_build/html/index.html) for more details.
@@ -735,7 +742,7 @@ write_xport a name for the dataset. See the
735
742
  #### Writing value labels
736
743
 
737
744
  The argument variable_value_labels can be passed to write_sav and write_dta to write value labels. This argument must be a
738
- dictionary where keys are variable names (names must match column names in the pandas data frame). Values are another dictionary where
745
+ dictionary where keys are variable names (names must match column names in the dataframe). Values are another dictionary where
739
746
  keys are the value present in the dataframe and values are the labels (strings).
740
747
 
741
748
  ```python
@@ -812,7 +819,7 @@ for the documentation of the original application.
812
819
  In the case of SPSS we have some presets for some formats:
813
820
  * restricted_integer: with leading zeros, equivalent to N + variable width (e.g N4)
814
821
  * integer: Numeric with no decimal places, equivalent to F + variable width + ".0" (0 decimal positions). A
815
- pandas column of type integer will also be translated into this format automatically.
822
+ column of type integer will also be translated into this format automatically.
816
823
 
817
824
  ```python
818
825
  import pandas as pd
@@ -828,12 +835,12 @@ There is some information about the possible formats [here](https://www.gnu.org/
828
835
 
829
836
  #### Variable type conversion
830
837
 
831
- The following rules are used in order to convert from pandas/numpy/python types to the target file types:
838
+ The following rules are used in order to convert from pandas/polars/numpy/python types to the target file types:
832
839
 
833
840
  | Python Type | Converted Type |
834
841
  | ------------------- | --------- |
835
- | np.int32 or lower | integer (stata), numeric (spss, sas) |
836
- | int, np.int64, np.float | double (stata), numeric (spss, sas) |
842
+ | np.int32/pl.int32 or lower | integer (stata), numeric (spss, sas) |
843
+ | int, np.int64, pl.int64, np.float, pl.float64 | double (stata), numeric (spss, sas) |
837
844
  | str | character |
838
845
  | bool | integer (stata), numeric (spss, sas) |
839
846
  | datetime, date, time | numeric with datetime/date/time formatting |
@@ -22,5 +22,5 @@ from .pyreadstat import read_file_in_chunks, read_file_multiprocessing
22
22
  from ._readstat_parser import ReadstatError, metadata_container
23
23
  from .pyfunctions import set_value_labels, set_catalog_to_sas
24
24
 
25
- __version__ = "1.2.9"
25
+ __version__ = "1.3.1"
26
26