pybcsv 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pybcsv-1.0.1/LICENSE +21 -0
- pybcsv-1.0.1/MANIFEST.in +14 -0
- pybcsv-1.0.1/PKG-INFO +278 -0
- pybcsv-1.0.1/README.md +231 -0
- pybcsv-1.0.1/include/bcsv/bcsv.h +49 -0
- pybcsv-1.0.1/include/bcsv/bcsv.hpp +68 -0
- pybcsv-1.0.1/include/bcsv/bitset.hpp +1107 -0
- pybcsv-1.0.1/include/bcsv/bitset_dynamic.hpp +964 -0
- pybcsv-1.0.1/include/bcsv/byte_buffer.h +85 -0
- pybcsv-1.0.1/include/bcsv/definitions.h +407 -0
- pybcsv-1.0.1/include/bcsv/file_header.h +235 -0
- pybcsv-1.0.1/include/bcsv/file_header.hpp +273 -0
- pybcsv-1.0.1/include/bcsv/layout.h +202 -0
- pybcsv-1.0.1/include/bcsv/layout.hpp +463 -0
- pybcsv-1.0.1/include/bcsv/packet_header.h +86 -0
- pybcsv-1.0.1/include/bcsv/packet_header.hpp +89 -0
- pybcsv-1.0.1/include/bcsv/reader.h +88 -0
- pybcsv-1.0.1/include/bcsv/reader.hpp +323 -0
- pybcsv-1.0.1/include/bcsv/row.h +602 -0
- pybcsv-1.0.1/include/bcsv/row.hpp +1050 -0
- pybcsv-1.0.1/include/bcsv/string_addr.h +54 -0
- pybcsv-1.0.1/include/bcsv/writer.h +69 -0
- pybcsv-1.0.1/include/bcsv/writer.hpp +256 -0
- pybcsv-1.0.1/include/boost-1.89.0/boost/crc.hpp +2353 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4.c +2829 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4.h +884 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4file.c +341 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4file.h +93 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4frame.c +2136 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4frame.h +751 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4frame_static.h +47 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4hc.c +2192 -0
- pybcsv-1.0.1/include/lz4-1.10.0/lz4hc.h +414 -0
- pybcsv-1.0.1/include/lz4-1.10.0/xxhash.c +1030 -0
- pybcsv-1.0.1/include/lz4-1.10.0/xxhash.h +328 -0
- pybcsv-1.0.1/pybcsv/__init__.py +63 -0
- pybcsv-1.0.1/pybcsv/__version__.py +13 -0
- pybcsv-1.0.1/pybcsv/bindings.cpp +448 -0
- pybcsv-1.0.1/pybcsv/pandas_utils.py +436 -0
- pybcsv-1.0.1/pybcsv/pandas_utils_original.py +275 -0
- pybcsv-1.0.1/pybcsv/py.typed +5 -0
- pybcsv-1.0.1/pybcsv.egg-info/PKG-INFO +278 -0
- pybcsv-1.0.1/pybcsv.egg-info/SOURCES.txt +48 -0
- pybcsv-1.0.1/pybcsv.egg-info/dependency_links.txt +1 -0
- pybcsv-1.0.1/pybcsv.egg-info/not-zip-safe +1 -0
- pybcsv-1.0.1/pybcsv.egg-info/requires.txt +14 -0
- pybcsv-1.0.1/pybcsv.egg-info/top_level.txt +2 -0
- pybcsv-1.0.1/pyproject.toml +68 -0
- pybcsv-1.0.1/setup.cfg +4 -0
- pybcsv-1.0.1/setup.py +227 -0
pybcsv-1.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Tobias Weber
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pybcsv-1.0.1/MANIFEST.in
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
include pybcsv/py.typed
|
|
4
|
+
recursive-include include *.h
|
|
5
|
+
recursive-include include *.hpp
|
|
6
|
+
recursive-include include *.c
|
|
7
|
+
recursive-include pybcsv *.h
|
|
8
|
+
recursive-include pybcsv *.hpp
|
|
9
|
+
recursive-include pybcsv *.cpp
|
|
10
|
+
recursive-exclude tests *
|
|
11
|
+
recursive-exclude examples *
|
|
12
|
+
global-exclude *.pyc
|
|
13
|
+
global-exclude __pycache__
|
|
14
|
+
global-exclude .DS_Store
|
pybcsv-1.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pybcsv
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: High-performance Python bindings for the BCSV (Binary CSV) library with pandas integration
|
|
5
|
+
Author: Tobias Weber
|
|
6
|
+
Author-email: Tobias Weber <weber.tobias.md@gmail.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/weber-tobias/bcsv
|
|
9
|
+
Project-URL: Documentation, https://github.com/weber-tobias/bcsv#readme
|
|
10
|
+
Project-URL: Repository, https://github.com/weber-tobias/bcsv
|
|
11
|
+
Project-URL: Bug Tracker, https://github.com/weber-tobias/bcsv/issues
|
|
12
|
+
Project-URL: Source Code, https://github.com/weber-tobias/bcsv
|
|
13
|
+
Keywords: csv,binary,data-science,compression,performance,pandas,numpy,big-data,data-processing
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: C++
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering
|
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
28
|
+
Classifier: Topic :: Database
|
|
29
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
30
|
+
Requires-Python: >=3.7
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
License-File: LICENSE
|
|
33
|
+
Requires-Dist: numpy>=1.19.0
|
|
34
|
+
Provides-Extra: pandas
|
|
35
|
+
Requires-Dist: pandas>=1.0.0; extra == "pandas"
|
|
36
|
+
Provides-Extra: test
|
|
37
|
+
Requires-Dist: pytest>=6.0.0; extra == "test"
|
|
38
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=6.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
42
|
+
Requires-Dist: black; extra == "dev"
|
|
43
|
+
Requires-Dist: flake8; extra == "dev"
|
|
44
|
+
Dynamic: author
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
Dynamic: requires-python
|
|
47
|
+
|
|
48
|
+
# PyBCSV - Python Bindings for BCSV Library
|
|
49
|
+
|
|
50
|
+
PyBCSV provides Python bindings for the high-performance BCSV (Binary CSV) library, enabling efficient binary CSV file handling with pandas integration.
|
|
51
|
+
|
|
52
|
+
## Features
|
|
53
|
+
|
|
54
|
+
- **High Performance**: Binary format with optional LZ4 compression
|
|
55
|
+
- **Pandas Integration**: Direct DataFrame read/write support
|
|
56
|
+
- **Type Safety**: Preserves column types and data integrity
|
|
57
|
+
- **Cross-platform**: Works on Linux, macOS, and Windows
|
|
58
|
+
- **Memory Efficient**: Streaming support for large datasets
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
The Python wrapper has been successfully built and installed. To use it:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
cd /home/tobias/bcsv/python
|
|
66
|
+
source venv/bin/activate
|
|
67
|
+
pip install .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Basic Usage
|
|
71
|
+
|
|
72
|
+
### Core BCSV Operations
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
import pybcsv
|
|
76
|
+
|
|
77
|
+
# Create a layout
|
|
78
|
+
layout = pybcsv.Layout()
|
|
79
|
+
layout.add_column("id", pybcsv.INT32)
|
|
80
|
+
layout.add_column("name", pybcsv.STRING)
|
|
81
|
+
layout.add_column("value", pybcsv.DOUBLE)
|
|
82
|
+
|
|
83
|
+
# Write data
|
|
84
|
+
writer = pybcsv.Writer(layout)
|
|
85
|
+
writer.open("data.bcsv")
|
|
86
|
+
writer.write_row([1, "Alice", 123.45])
|
|
87
|
+
writer.write_row([2, "Bob", 678.90])
|
|
88
|
+
writer.close()
|
|
89
|
+
|
|
90
|
+
# Read data
|
|
91
|
+
reader = pybcsv.Reader()
|
|
92
|
+
reader.open("data.bcsv")
|
|
93
|
+
all_rows = reader.read_all()
|
|
94
|
+
reader.close()
|
|
95
|
+
|
|
96
|
+
print(all_rows)
|
|
97
|
+
# Output: [[1, 'Alice', 123.45], [2, 'Bob', 678.9]]
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Pandas Integration
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
import pybcsv
|
|
104
|
+
import pandas as pd
|
|
105
|
+
|
|
106
|
+
# Create a DataFrame
|
|
107
|
+
df = pd.DataFrame({
|
|
108
|
+
'id': [1, 2, 3],
|
|
109
|
+
'name': ['Alice', 'Bob', 'Charlie'],
|
|
110
|
+
'value': [123.45, 678.90, 111.22]
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
# Write DataFrame to BCSV
|
|
114
|
+
pybcsv.write_dataframe(df, "data.bcsv")
|
|
115
|
+
|
|
116
|
+
# Read back as DataFrame
|
|
117
|
+
df_read = pybcsv.read_dataframe("data.bcsv")
|
|
118
|
+
print(df_read.equals(df)) # True
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### CSV Conversion
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
import pybcsv
|
|
125
|
+
|
|
126
|
+
# Convert CSV to BCSV
|
|
127
|
+
pybcsv.from_csv("input.csv", "output.bcsv")
|
|
128
|
+
|
|
129
|
+
# Convert BCSV to CSV
|
|
130
|
+
pybcsv.to_csv("output.bcsv", "output.csv")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Available Types
|
|
134
|
+
|
|
135
|
+
- `pybcsv.BOOL` - Boolean values
|
|
136
|
+
- `pybcsv.INT8` / `pybcsv.UINT8` - 8-bit integers
|
|
137
|
+
- `pybcsv.INT16` / `pybcsv.UINT16` - 16-bit integers
|
|
138
|
+
- `pybcsv.INT32` / `pybcsv.UINT32` - 32-bit integers
|
|
139
|
+
- `pybcsv.INT64` / `pybcsv.UINT64` - 64-bit integers
|
|
140
|
+
- `pybcsv.FLOAT` - 32-bit floating point
|
|
141
|
+
- `pybcsv.DOUBLE` - 64-bit floating point
|
|
142
|
+
- `pybcsv.STRING` - Variable-length strings
|
|
143
|
+
|
|
144
|
+
## API Reference
|
|
145
|
+
|
|
146
|
+
### Layout Class
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
layout = pybcsv.Layout()
|
|
150
|
+
layout.add_column(name: str, column_type: ColumnType)
|
|
151
|
+
layout.column_count() -> int
|
|
152
|
+
layout.column_name(index: int) -> str
|
|
153
|
+
layout.column_type(index: int) -> ColumnType
|
|
154
|
+
layout.has_column(name: str) -> bool
|
|
155
|
+
layout.column_index(name: str) -> int
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Writer Class
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
writer = pybcsv.Writer(layout: Layout)
|
|
162
|
+
writer.open(filename: str) -> bool
|
|
163
|
+
writer.write_row(values: list) -> None
|
|
164
|
+
writer.flush() -> None
|
|
165
|
+
writer.close() -> None
|
|
166
|
+
writer.is_open() -> bool
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Reader Class
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
reader = pybcsv.Reader()
|
|
173
|
+
reader.open(filename: str) -> bool
|
|
174
|
+
reader.read_next() -> bool
|
|
175
|
+
reader.read_all() -> list[list]
|
|
176
|
+
reader.close() -> None
|
|
177
|
+
reader.is_open() -> bool
|
|
178
|
+
reader.layout() -> Layout
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Utility Functions
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
# Pandas integration
|
|
185
|
+
pybcsv.write_dataframe(df: pd.DataFrame, filename: str, compression: bool = True)
|
|
186
|
+
pybcsv.read_dataframe(filename: str) -> pd.DataFrame
|
|
187
|
+
|
|
188
|
+
# CSV conversion
|
|
189
|
+
pybcsv.from_csv(csv_filename: str, bcsv_filename: str, compression: bool = True)
|
|
190
|
+
pybcsv.to_csv(bcsv_filename: str, csv_filename: str)
|
|
191
|
+
|
|
192
|
+
# Type utilities
|
|
193
|
+
pybcsv.type_to_string(column_type: ColumnType) -> str
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Performance Benefits
|
|
197
|
+
|
|
198
|
+
The binary format provides significant advantages:
|
|
199
|
+
|
|
200
|
+
1. **Faster I/O**: Binary format is faster to read/write than text CSV
|
|
201
|
+
2. **Type Safety**: Preserves exact data types without parsing
|
|
202
|
+
3. **Compression**: Optional LZ4 compression reduces file size
|
|
203
|
+
4. **Memory Efficiency**: Streaming support for large datasets
|
|
204
|
+
|
|
205
|
+
## Testing
|
|
206
|
+
|
|
207
|
+
Run the included test scripts to verify functionality:
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
python test_basic.py # Basic BCSV operations
|
|
211
|
+
python test_pandas.py # Pandas integration tests
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## File Structure
|
|
215
|
+
|
|
216
|
+
```text
|
|
217
|
+
python/
|
|
218
|
+
├── pybcsv/
|
|
219
|
+
│ ├── __init__.py # Main module interface
|
|
220
|
+
│ ├── __version__.py # Version information
|
|
221
|
+
│ ├── bindings.cpp # C++ pybind11 bindings
|
|
222
|
+
│ └── pandas_utils.py # Pandas integration utilities
|
|
223
|
+
├── examples/
|
|
224
|
+
│ ├── basic_example.py # Basic usage examples
|
|
225
|
+
│ └── pandas_example.py # Pandas integration examples
|
|
226
|
+
├── tests/
|
|
227
|
+
│ ├── test_basic.py # Basic functionality tests
|
|
228
|
+
│ └── test_pandas.py # Pandas integration tests
|
|
229
|
+
├── setup.py # Package build configuration
|
|
230
|
+
├── pyproject.toml # Modern Python packaging config
|
|
231
|
+
└── README.md # This documentation
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Compatibility
|
|
235
|
+
|
|
236
|
+
- **Python**: 3.7+ (tested with 3.12)
|
|
237
|
+
- **Dependencies**:
|
|
238
|
+
- numpy >= 1.19.0 (required)
|
|
239
|
+
- pandas >= 1.3.0 (optional, for DataFrame integration)
|
|
240
|
+
- **Platforms**: Linux, macOS, Windows
|
|
241
|
+
- **Compilers**: GCC 7+, Clang 8+, MSVC 2019+
|
|
242
|
+
|
|
243
|
+
## Performance Results
|
|
244
|
+
|
|
245
|
+
Based on testing with sample data:
|
|
246
|
+
|
|
247
|
+
- **DataFrame I/O**: Perfect data fidelity with type preservation
|
|
248
|
+
- **File Size**: Efficient binary encoding (varies by data and compression)
|
|
249
|
+
- **Speed**: Significantly faster than CSV for repeated I/O operations
|
|
250
|
+
- **Memory**: Streaming support for large datasets
|
|
251
|
+
|
|
252
|
+
The Python wrapper successfully bridges the high-performance C++ BCSV library with Python's data science ecosystem, providing both convenience and performance for data processing workflows.
|
|
253
|
+
|
|
254
|
+
## License
|
|
255
|
+
|
|
256
|
+
MIT License
|
|
257
|
+
|
|
258
|
+
Copyright (c) 2025 Tobias Weber <weber.tobias.md@gmail.com>
|
|
259
|
+
|
|
260
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
261
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
262
|
+
in the Software without restriction, including without limitation the rights
|
|
263
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
264
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
265
|
+
furnished to do so, subject to the following conditions:
|
|
266
|
+
|
|
267
|
+
The above copyright notice and this permission notice shall be included in all
|
|
268
|
+
copies or substantial portions of the Software.
|
|
269
|
+
|
|
270
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
271
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
272
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
273
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
274
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
275
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
276
|
+
SOFTWARE.
|
|
277
|
+
|
|
278
|
+
See the [LICENSE](LICENSE) file for full details.
|
pybcsv-1.0.1/README.md
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# PyBCSV - Python Bindings for BCSV Library
|
|
2
|
+
|
|
3
|
+
PyBCSV provides Python bindings for the high-performance BCSV (Binary CSV) library, enabling efficient binary CSV file handling with pandas integration.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **High Performance**: Binary format with optional LZ4 compression
|
|
8
|
+
- **Pandas Integration**: Direct DataFrame read/write support
|
|
9
|
+
- **Type Safety**: Preserves column types and data integrity
|
|
10
|
+
- **Cross-platform**: Works on Linux, macOS, and Windows
|
|
11
|
+
- **Memory Efficient**: Streaming support for large datasets
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
The Python wrapper has been successfully built and installed. To use it:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
cd /home/tobias/bcsv/python
|
|
19
|
+
source venv/bin/activate
|
|
20
|
+
pip install .
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Basic Usage
|
|
24
|
+
|
|
25
|
+
### Core BCSV Operations
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import pybcsv
|
|
29
|
+
|
|
30
|
+
# Create a layout
|
|
31
|
+
layout = pybcsv.Layout()
|
|
32
|
+
layout.add_column("id", pybcsv.INT32)
|
|
33
|
+
layout.add_column("name", pybcsv.STRING)
|
|
34
|
+
layout.add_column("value", pybcsv.DOUBLE)
|
|
35
|
+
|
|
36
|
+
# Write data
|
|
37
|
+
writer = pybcsv.Writer(layout)
|
|
38
|
+
writer.open("data.bcsv")
|
|
39
|
+
writer.write_row([1, "Alice", 123.45])
|
|
40
|
+
writer.write_row([2, "Bob", 678.90])
|
|
41
|
+
writer.close()
|
|
42
|
+
|
|
43
|
+
# Read data
|
|
44
|
+
reader = pybcsv.Reader()
|
|
45
|
+
reader.open("data.bcsv")
|
|
46
|
+
all_rows = reader.read_all()
|
|
47
|
+
reader.close()
|
|
48
|
+
|
|
49
|
+
print(all_rows)
|
|
50
|
+
# Output: [[1, 'Alice', 123.45], [2, 'Bob', 678.9]]
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Pandas Integration
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
import pybcsv
|
|
57
|
+
import pandas as pd
|
|
58
|
+
|
|
59
|
+
# Create a DataFrame
|
|
60
|
+
df = pd.DataFrame({
|
|
61
|
+
'id': [1, 2, 3],
|
|
62
|
+
'name': ['Alice', 'Bob', 'Charlie'],
|
|
63
|
+
'value': [123.45, 678.90, 111.22]
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
# Write DataFrame to BCSV
|
|
67
|
+
pybcsv.write_dataframe(df, "data.bcsv")
|
|
68
|
+
|
|
69
|
+
# Read back as DataFrame
|
|
70
|
+
df_read = pybcsv.read_dataframe("data.bcsv")
|
|
71
|
+
print(df_read.equals(df)) # True
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### CSV Conversion
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import pybcsv
|
|
78
|
+
|
|
79
|
+
# Convert CSV to BCSV
|
|
80
|
+
pybcsv.from_csv("input.csv", "output.bcsv")
|
|
81
|
+
|
|
82
|
+
# Convert BCSV to CSV
|
|
83
|
+
pybcsv.to_csv("output.bcsv", "output.csv")
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Available Types
|
|
87
|
+
|
|
88
|
+
- `pybcsv.BOOL` - Boolean values
|
|
89
|
+
- `pybcsv.INT8` / `pybcsv.UINT8` - 8-bit integers
|
|
90
|
+
- `pybcsv.INT16` / `pybcsv.UINT16` - 16-bit integers
|
|
91
|
+
- `pybcsv.INT32` / `pybcsv.UINT32` - 32-bit integers
|
|
92
|
+
- `pybcsv.INT64` / `pybcsv.UINT64` - 64-bit integers
|
|
93
|
+
- `pybcsv.FLOAT` - 32-bit floating point
|
|
94
|
+
- `pybcsv.DOUBLE` - 64-bit floating point
|
|
95
|
+
- `pybcsv.STRING` - Variable-length strings
|
|
96
|
+
|
|
97
|
+
## API Reference
|
|
98
|
+
|
|
99
|
+
### Layout Class
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
layout = pybcsv.Layout()
|
|
103
|
+
layout.add_column(name: str, column_type: ColumnType)
|
|
104
|
+
layout.column_count() -> int
|
|
105
|
+
layout.column_name(index: int) -> str
|
|
106
|
+
layout.column_type(index: int) -> ColumnType
|
|
107
|
+
layout.has_column(name: str) -> bool
|
|
108
|
+
layout.column_index(name: str) -> int
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Writer Class
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
writer = pybcsv.Writer(layout: Layout)
|
|
115
|
+
writer.open(filename: str) -> bool
|
|
116
|
+
writer.write_row(values: list) -> None
|
|
117
|
+
writer.flush() -> None
|
|
118
|
+
writer.close() -> None
|
|
119
|
+
writer.is_open() -> bool
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Reader Class
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
reader = pybcsv.Reader()
|
|
126
|
+
reader.open(filename: str) -> bool
|
|
127
|
+
reader.read_next() -> bool
|
|
128
|
+
reader.read_all() -> list[list]
|
|
129
|
+
reader.close() -> None
|
|
130
|
+
reader.is_open() -> bool
|
|
131
|
+
reader.layout() -> Layout
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Utility Functions
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
# Pandas integration
|
|
138
|
+
pybcsv.write_dataframe(df: pd.DataFrame, filename: str, compression: bool = True)
|
|
139
|
+
pybcsv.read_dataframe(filename: str) -> pd.DataFrame
|
|
140
|
+
|
|
141
|
+
# CSV conversion
|
|
142
|
+
pybcsv.from_csv(csv_filename: str, bcsv_filename: str, compression: bool = True)
|
|
143
|
+
pybcsv.to_csv(bcsv_filename: str, csv_filename: str)
|
|
144
|
+
|
|
145
|
+
# Type utilities
|
|
146
|
+
pybcsv.type_to_string(column_type: ColumnType) -> str
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Performance Benefits
|
|
150
|
+
|
|
151
|
+
The binary format provides significant advantages:
|
|
152
|
+
|
|
153
|
+
1. **Faster I/O**: Binary format is faster to read/write than text CSV
|
|
154
|
+
2. **Type Safety**: Preserves exact data types without parsing
|
|
155
|
+
3. **Compression**: Optional LZ4 compression reduces file size
|
|
156
|
+
4. **Memory Efficiency**: Streaming support for large datasets
|
|
157
|
+
|
|
158
|
+
## Testing
|
|
159
|
+
|
|
160
|
+
Run the included test scripts to verify functionality:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
python test_basic.py # Basic BCSV operations
|
|
164
|
+
python test_pandas.py # Pandas integration tests
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## File Structure
|
|
168
|
+
|
|
169
|
+
```text
|
|
170
|
+
python/
|
|
171
|
+
├── pybcsv/
|
|
172
|
+
│ ├── __init__.py # Main module interface
|
|
173
|
+
│ ├── __version__.py # Version information
|
|
174
|
+
│ ├── bindings.cpp # C++ pybind11 bindings
|
|
175
|
+
│ └── pandas_utils.py # Pandas integration utilities
|
|
176
|
+
├── examples/
|
|
177
|
+
│ ├── basic_example.py # Basic usage examples
|
|
178
|
+
│ └── pandas_example.py # Pandas integration examples
|
|
179
|
+
├── tests/
|
|
180
|
+
│ ├── test_basic.py # Basic functionality tests
|
|
181
|
+
│ └── test_pandas.py # Pandas integration tests
|
|
182
|
+
├── setup.py # Package build configuration
|
|
183
|
+
├── pyproject.toml # Modern Python packaging config
|
|
184
|
+
└── README.md # This documentation
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Compatibility
|
|
188
|
+
|
|
189
|
+
- **Python**: 3.7+ (tested with 3.12)
|
|
190
|
+
- **Dependencies**:
|
|
191
|
+
- numpy >= 1.19.0 (required)
|
|
192
|
+
- pandas >= 1.3.0 (optional, for DataFrame integration)
|
|
193
|
+
- **Platforms**: Linux, macOS, Windows
|
|
194
|
+
- **Compilers**: GCC 7+, Clang 8+, MSVC 2019+
|
|
195
|
+
|
|
196
|
+
## Performance Results
|
|
197
|
+
|
|
198
|
+
Based on testing with sample data:
|
|
199
|
+
|
|
200
|
+
- **DataFrame I/O**: Perfect data fidelity with type preservation
|
|
201
|
+
- **File Size**: Efficient binary encoding (varies by data and compression)
|
|
202
|
+
- **Speed**: Significantly faster than CSV for repeated I/O operations
|
|
203
|
+
- **Memory**: Streaming support for large datasets
|
|
204
|
+
|
|
205
|
+
The Python wrapper successfully bridges the high-performance C++ BCSV library with Python's data science ecosystem, providing both convenience and performance for data processing workflows.
|
|
206
|
+
|
|
207
|
+
## License
|
|
208
|
+
|
|
209
|
+
MIT License
|
|
210
|
+
|
|
211
|
+
Copyright (c) 2025 Tobias Weber <weber.tobias.md@gmail.com>
|
|
212
|
+
|
|
213
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
214
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
215
|
+
in the Software without restriction, including without limitation the rights
|
|
216
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
217
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
218
|
+
furnished to do so, subject to the following conditions:
|
|
219
|
+
|
|
220
|
+
The above copyright notice and this permission notice shall be included in all
|
|
221
|
+
copies or substantial portions of the Software.
|
|
222
|
+
|
|
223
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
224
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
225
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
226
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
227
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
228
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
229
|
+
SOFTWARE.
|
|
230
|
+
|
|
231
|
+
See the [LICENSE](LICENSE) file for full details.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2025 Tobias Weber <weber.tobias.md@gmail.com>
|
|
3
|
+
*
|
|
4
|
+
* This file is part of the BCSV library.
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the MIT License. See LICENSE file in the project root
|
|
7
|
+
* for full license information.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
#pragma once
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* @file bcsv.h
|
|
14
|
+
* @brief Binary CSV (BCSV) Library - Main Header with Declarations
|
|
15
|
+
*
|
|
16
|
+
* A C++17 header-only library for reading and writing binary CSV files
|
|
17
|
+
* with type safety, compression support, and efficient I/O operations.
|
|
18
|
+
*
|
|
19
|
+
* This header includes all BCSV component declarations:
|
|
20
|
+
* - FileHeader: Binary file header management
|
|
21
|
+
* - ColumnLayout: Column definitions and metadata
|
|
22
|
+
* - Row: Individual data rows
|
|
23
|
+
* - Packet: Data packet abstraction
|
|
24
|
+
* - Reader: Template-based file reading
|
|
25
|
+
* - Writer: Template-based file writing
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
#include <iostream>
|
|
29
|
+
#include <string>
|
|
30
|
+
|
|
31
|
+
// Core definitions first
|
|
32
|
+
#include "definitions.h"
|
|
33
|
+
|
|
34
|
+
// Core component declarations
|
|
35
|
+
#include "file_header.h"
|
|
36
|
+
#include "layout.h"
|
|
37
|
+
#include "packet_header.h"
|
|
38
|
+
#include "reader.h"
|
|
39
|
+
#include "row.h"
|
|
40
|
+
#include "writer.h"
|
|
41
|
+
|
|
42
|
+
// Include implementations
|
|
43
|
+
#include "bcsv.hpp"
|
|
44
|
+
#include "file_header.hpp"
|
|
45
|
+
#include "layout.hpp"
|
|
46
|
+
#include "packet_header.hpp"
|
|
47
|
+
#include "reader.hpp"
|
|
48
|
+
#include "row.hpp"
|
|
49
|
+
#include "writer.hpp"
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2025 Tobias Weber <weber.tobias.md@gmail.com>
|
|
3
|
+
*
|
|
4
|
+
* This file is part of the BCSV library.
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the MIT License. See LICENSE file in the project root
|
|
7
|
+
* for full license information.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
#pragma once
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* @file bcsv.hpp
|
|
14
|
+
* @brief Binary CSV (BCSV) Library - Shared implementations and utilities
|
|
15
|
+
*
|
|
16
|
+
* This file contains implementations that require multiple headers to be included first.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#include "file_header.h"
|
|
20
|
+
#include "layout.h"
|
|
21
|
+
|
|
22
|
+
namespace bcsv {
|
|
23
|
+
// Type traits to check if StreamType supports file operations
|
|
24
|
+
template<typename T>
|
|
25
|
+
struct is_fstream {
|
|
26
|
+
static constexpr bool value = std::is_same_v<T, std::fstream> || std::is_base_of_v<std::fstream, T>;
|
|
27
|
+
};
|
|
28
|
+
template<typename T>
|
|
29
|
+
struct is_ifstream {
|
|
30
|
+
static constexpr bool value = std::is_same_v<T, std::ifstream> || std::is_base_of_v<std::ifstream, T> || std::is_same_v<T, std::fstream> || std::is_base_of_v<std::fstream, T>;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
template<typename T>
|
|
34
|
+
struct is_ofstream {
|
|
35
|
+
static constexpr bool value = std::is_same_v<T, std::ofstream> || std::is_base_of_v<std::ofstream, T> || std::is_same_v<T, std::fstream> || std::is_base_of_v<std::fstream, T>;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
template<typename T>
|
|
39
|
+
struct has_open_method {
|
|
40
|
+
template<typename U>
|
|
41
|
+
static auto test(int) -> decltype(std::declval<U>().open(std::string{}), std::true_type{});
|
|
42
|
+
template<typename>
|
|
43
|
+
static std::false_type test(...);
|
|
44
|
+
using type = decltype(test<T>(0));
|
|
45
|
+
static constexpr bool value = type::value;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
template<typename T>
|
|
49
|
+
struct has_close_method {
|
|
50
|
+
template<typename U>
|
|
51
|
+
static auto test(int) -> decltype(std::declval<U>().close(), std::true_type{});
|
|
52
|
+
template<typename>
|
|
53
|
+
static std::false_type test(...);
|
|
54
|
+
using type = decltype(test<T>(0));
|
|
55
|
+
static constexpr bool value = type::value;
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
template<typename T>
|
|
59
|
+
struct has_is_open_method {
|
|
60
|
+
template<typename U>
|
|
61
|
+
static auto test(int) -> decltype(std::declval<U>().is_open(), std::true_type{});
|
|
62
|
+
template<typename>
|
|
63
|
+
static std::false_type test(...);
|
|
64
|
+
using type = decltype(test<T>(0));
|
|
65
|
+
static constexpr bool value = type::value;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
} // namespace bcsv
|