iparq 0.1.7__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
iparq/py.typed ADDED
File without changes
iparq/source.py CHANGED
@@ -1,13 +1,27 @@
1
+ import json
2
+ from enum import Enum
3
+ from typing import List, Optional
4
+
1
5
  import pyarrow.parquet as pq
2
6
  import typer
3
7
  from pydantic import BaseModel
4
8
  from rich import print
5
9
  from rich.console import Console
10
+ from rich.table import Table
6
11
 
7
- app = typer.Typer()
12
+ app = typer.Typer(
13
+ help="Inspect Parquet files for metadata, compression, and bloom filters"
14
+ )
8
15
  console = Console()
9
16
 
10
17
 
18
+ class OutputFormat(str, Enum):
19
+ """Enum for output format options."""
20
+
21
+ RICH = "rich"
22
+ JSON = "json"
23
+
24
+
11
25
  class ParquetMetaModel(BaseModel):
12
26
  """
13
27
  ParquetMetaModel is a data model representing metadata for a Parquet file.
@@ -29,6 +43,36 @@ class ParquetMetaModel(BaseModel):
29
43
  serialized_size: int
30
44
 
31
45
 
46
+ class ColumnInfo(BaseModel):
47
+ """
48
+ ColumnInfo is a data model representing information about a column in a Parquet file.
49
+
50
+ Attributes:
51
+ row_group (int): The row group index.
52
+ column_name (str): The name of the column.
53
+ column_index (int): The index of the column.
54
+ compression_type (str): The compression type used for the column.
55
+ has_bloom_filter (bool): Whether the column has a bloom filter.
56
+ """
57
+
58
+ row_group: int
59
+ column_name: str
60
+ column_index: int
61
+ compression_type: str
62
+ has_bloom_filter: Optional[bool] = False
63
+
64
+
65
+ class ParquetColumnInfo(BaseModel):
66
+ """
67
+ ParquetColumnInfo is a data model representing information about all columns in a Parquet file.
68
+
69
+ Attributes:
70
+ columns (List[ColumnInfo]): List of column information.
71
+ """
72
+
73
+ columns: List[ColumnInfo] = []
74
+
75
+
32
76
  def read_parquet_metadata(filename: str):
33
77
  """
34
78
  Reads the metadata of a Parquet file and extracts the compression codecs used.
@@ -94,88 +138,188 @@ def print_parquet_metadata(parquet_metadata):
94
138
  pass
95
139
 
96
140
 
97
- def print_compression_types(parquet_metadata) -> None:
141
+ def print_compression_types(parquet_metadata, column_info: ParquetColumnInfo) -> None:
98
142
  """
99
- Prints the compression type for each column in each row group of the Parquet file.
143
+ Collects compression type information for each column and adds it to the column_info model.
144
+
145
+ Args:
146
+ parquet_metadata: The Parquet file metadata.
147
+ column_info: The ParquetColumnInfo model to update.
100
148
  """
101
149
  try:
102
150
  num_row_groups = parquet_metadata.num_row_groups
103
151
  num_columns = parquet_metadata.num_columns
104
- console.print("[bold underline]Column Compression Info:[/bold underline]")
152
+
105
153
  for i in range(num_row_groups):
106
- console.print(f"[bold]Row Group {i}:[/bold]")
154
+ row_group = parquet_metadata.row_group(i)
107
155
  for j in range(num_columns):
108
- column_chunk = parquet_metadata.row_group(i).column(j)
156
+ column_chunk = row_group.column(j)
109
157
  compression = column_chunk.compression
110
- column_name = parquet_metadata.schema.column(j).name
111
- console.print(
112
- f" Column '{column_name}' (Index {j}): [italic]{compression}[/italic]"
158
+ column_name = parquet_metadata.schema.names[j]
159
+
160
+ # Create or update column info
161
+ column_info.columns.append(
162
+ ColumnInfo(
163
+ row_group=i,
164
+ column_name=column_name,
165
+ column_index=j,
166
+ compression_type=compression,
167
+ )
113
168
  )
114
169
  except Exception as e:
115
170
  console.print(
116
- f"Error while printing compression types: {e}",
171
+ f"Error while collecting compression types: {e}",
117
172
  style="blink bold red underline on white",
118
173
  )
119
- finally:
120
- pass
121
174
 
122
175
 
123
- def print_bloom_filter_info(parquet_metadata) -> None:
176
+ def print_bloom_filter_info(parquet_metadata, column_info: ParquetColumnInfo) -> None:
124
177
  """
125
- Prints information about bloom filters for each column in each row group of the Parquet file.
178
+ Updates the column_info model with bloom filter information.
179
+
180
+ Args:
181
+ parquet_metadata: The Parquet file metadata.
182
+ column_info: The ParquetColumnInfo model to update.
126
183
  """
127
184
  try:
128
185
  num_row_groups = parquet_metadata.num_row_groups
129
186
  num_columns = parquet_metadata.num_columns
130
- has_bloom_filters = False
131
-
132
- console.print("[bold underline]Bloom Filter Info:[/bold underline]")
133
187
 
134
188
  for i in range(num_row_groups):
135
189
  row_group = parquet_metadata.row_group(i)
136
- bloom_filters_in_group = False
137
190
 
138
191
  for j in range(num_columns):
139
192
  column_chunk = row_group.column(j)
140
- column_name = parquet_metadata.schema.column(j).name
141
-
142
- # Check if this column has bloom filters using is_stats_set
143
- if hasattr(column_chunk, "is_stats_set") and column_chunk.is_stats_set:
144
- if not bloom_filters_in_group:
145
- console.print(f"[bold]Row Group {i}:[/bold]")
146
- bloom_filters_in_group = True
147
- has_bloom_filters = True
148
- console.print(
149
- f" Column '{column_name}' (Index {j}): [green]Has bloom filter[/green]"
150
- )
151
-
152
- if not has_bloom_filters:
153
- console.print(" [italic]No bloom filters found in any column[/italic]")
154
193
 
194
+ # Find the corresponding column in our model
195
+ for col in column_info.columns:
196
+ if col.row_group == i and col.column_index == j:
197
+ # Check if this column has bloom filters
198
+ has_bloom_filter = (
199
+ hasattr(column_chunk, "is_stats_set")
200
+ and column_chunk.is_stats_set
201
+ )
202
+ col.has_bloom_filter = has_bloom_filter
203
+ break
155
204
  except Exception as e:
156
205
  console.print(
157
- f"Error while printing bloom filter information: {e}",
206
+ f"Error while collecting bloom filter information: {e}",
158
207
  style="blink bold red underline on white",
159
208
  )
160
209
 
161
210
 
162
- @app.command()
163
- def main(filename: str):
211
+ def print_column_info_table(column_info: ParquetColumnInfo) -> None:
164
212
  """
165
- Main function to read and print Parquet file metadata.
213
+ Prints the column information using a Rich table.
166
214
 
167
215
  Args:
168
- filename (str): The path to the Parquet file.
216
+ column_info: The ParquetColumnInfo model to display.
217
+ """
218
+ table = Table(title="Parquet Column Information")
219
+
220
+ # Add table columns
221
+ table.add_column("Row Group", justify="center", style="cyan")
222
+ table.add_column("Column Name", style="green")
223
+ table.add_column("Index", justify="center")
224
+ table.add_column("Compression", style="magenta")
225
+ table.add_column("Bloom Filter", justify="center")
226
+
227
+ # Add rows to the table
228
+ for col in column_info.columns:
229
+ table.add_row(
230
+ str(col.row_group),
231
+ col.column_name,
232
+ str(col.column_index),
233
+ col.compression_type,
234
+ "✅" if col.has_bloom_filter else "❌",
235
+ )
169
236
 
170
- Returns:
171
- Metadata of the Parquet file and the compression codecs used.
237
+ # Print the table
238
+ console.print(table)
239
+
240
+
241
+ def output_json(
242
+ meta_model: ParquetMetaModel,
243
+ column_info: ParquetColumnInfo,
244
+ compression_codecs: set,
245
+ ) -> None:
246
+ """
247
+ Outputs the parquet information in JSON format.
248
+
249
+ Args:
250
+ meta_model: The Parquet metadata model
251
+ column_info: The column information model
252
+ compression_codecs: Set of compression codecs used
253
+ """
254
+ result = {
255
+ "metadata": meta_model.model_dump(),
256
+ "columns": [column.model_dump() for column in column_info.columns],
257
+ "compression_codecs": list(compression_codecs),
258
+ }
259
+
260
+ print(json.dumps(result, indent=2))
261
+
262
+
263
+ @app.command(name="")
264
+ @app.command(name="inspect")
265
+ def inspect(
266
+ filename: str = typer.Argument(..., help="Path to the Parquet file to inspect"),
267
+ format: OutputFormat = typer.Option(
268
+ OutputFormat.RICH, "--format", "-f", help="Output format (rich or json)"
269
+ ),
270
+ metadata_only: bool = typer.Option(
271
+ False,
272
+ "--metadata-only",
273
+ "-m",
274
+ help="Show only file metadata without column details",
275
+ ),
276
+ column_filter: Optional[str] = typer.Option(
277
+ None, "--column", "-c", help="Filter results to show only specific column"
278
+ ),
279
+ ):
280
+ """
281
+ Inspect a Parquet file and display its metadata, compression settings, and bloom filter information.
172
282
  """
173
283
  (parquet_metadata, compression) = read_parquet_metadata(filename)
174
284
 
175
- print_parquet_metadata(parquet_metadata)
176
- print_compression_types(parquet_metadata)
177
- print_bloom_filter_info(parquet_metadata)
178
- print(f"Compression codecs: {compression}")
285
+ # Create metadata model
286
+ meta_model = ParquetMetaModel(
287
+ created_by=parquet_metadata.created_by,
288
+ num_columns=parquet_metadata.num_columns,
289
+ num_rows=parquet_metadata.num_rows,
290
+ num_row_groups=parquet_metadata.num_row_groups,
291
+ format_version=str(parquet_metadata.format_version),
292
+ serialized_size=parquet_metadata.serialized_size,
293
+ )
294
+
295
+ # Create a model to store column information
296
+ column_info = ParquetColumnInfo()
297
+
298
+ # Collect information
299
+ print_compression_types(parquet_metadata, column_info)
300
+ print_bloom_filter_info(parquet_metadata, column_info)
301
+
302
+ # Filter columns if requested
303
+ if column_filter:
304
+ column_info.columns = [
305
+ col for col in column_info.columns if col.column_name == column_filter
306
+ ]
307
+ if not column_info.columns:
308
+ console.print(
309
+ f"No columns match the filter: {column_filter}", style="yellow"
310
+ )
311
+
312
+ # Output based on format selection
313
+ if format == OutputFormat.JSON:
314
+ output_json(meta_model, column_info, compression)
315
+ else: # Rich format
316
+ # Print the metadata
317
+ console.print(meta_model)
318
+
319
+ # Print column details if not metadata only
320
+ if not metadata_only:
321
+ print_column_info_table(column_info)
322
+ console.print(f"Compression codecs: {compression}")
179
323
 
180
324
 
181
325
  if __name__ == "__main__":
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.4
2
+ Name: iparq
3
+ Version: 0.2.5
4
+ Summary: Display version compression and bloom filter information about a parquet file
5
+ Author-email: MiguelElGallo <miguel.zurcher@gmail.com>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: pyarrow
9
+ Requires-Dist: pydantic
10
+ Requires-Dist: rich
11
+ Requires-Dist: typer[all]
12
+ Provides-Extra: checks
13
+ Requires-Dist: mypy>=1.14.1; extra == 'checks'
14
+ Requires-Dist: ruff>=0.9.3; extra == 'checks'
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest>=7.0; extra == 'test'
17
+ Description-Content-Type: text/markdown
18
+
19
+ # iparq
20
+
21
+ [![Python package](https://github.com/MiguelElGallo/iparq/actions/workflows/python-package.yml/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/python-package.yml)
22
+
23
+ [![Dependabot Updates](https://github.com/MiguelElGallo/iparq/actions/workflows/dependabot/dependabot-updates/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/dependabot/dependabot-updates)
24
+
25
+ [![Upload Python Package](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml)
26
+
27
+ ![alt text](media/iparq.png)
28
+ After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html), I began to wonder which Parquet version and compression methods the everyday tools we rely on actually use, only to find that there's no straightforward way to determine this. That curiosity and the difficulty of quickly discovering such details motivated me to create iparq (Information Parquet). My goal with iparq is to help users easily identify the specifics of the Parquet files generated by different engines, making it clear which features—like newer encodings or certain compression algorithms—the creator of the parquet is using.
29
+
30
+ ***New*** Bloom filters information: Displays if there are bloom filters.
31
+ Read more about bloom filters in this [great article](https://duckdb.org/2025/03/07/parquet-bloom-filters-in-duckdb.html).
32
+
33
+ ## Installation
34
+
35
+ ### Zero installation - Recommended
36
+
37
+ 1) Make sure to have Astral's UV installed by following the steps here:
38
+
39
+ <https://docs.astral.sh/uv/getting-started/installation/>
40
+
41
+ 2) Execute the following command:
42
+
43
+ ```sh
44
+ uvx --refresh iparq inspect yourparquet.parquet
45
+ ```
46
+
47
+ ### Using pip
48
+
49
+ 1) Install the package using pip:
50
+
51
+ ```sh
52
+ pip install iparq
53
+ ```
54
+
55
+ 2) Verify the installation by running:
56
+
57
+ ```sh
58
+ iparq --help
59
+ ```
60
+
61
+ ### Using uv
62
+
63
+ 1) Make sure to have Astral's UV installed by following the steps here:
64
+
65
+ <https://docs.astral.sh/uv/getting-started/installation/>
66
+
67
+ 2) Execute the following command:
68
+
69
+ ```sh
70
+ uv pip install iparq
71
+ ```
72
+
73
+ 3) Verify the installation by running:
74
+
75
+ ```sh
76
+ iparq --help
77
+ ```
78
+
79
+ ### Using Homebrew in a MAC
80
+
81
+ 1) Run the following:
82
+
83
+ ```sh
84
+ brew tap MiguelElGallo/tap https://github.com/MiguelElGallo//homebrew-iparq.git
85
+ brew install MiguelElGallo/tap/iparq
86
+ iparq --help
87
+ ```
88
+
89
+ ## Usage
90
+
91
+ iparq now supports additional options:
92
+
93
+ ```sh
94
+ iparq inspect <filename> [OPTIONS]
95
+ ```
96
+
97
+ Options include:
98
+
99
+ - `--format`, `-f`: Output format, either `rich` (default) or `json`
100
+ - `--metadata-only`, `-m`: Show only file metadata without column details
101
+ - `--column`, `-c`: Filter results to show only a specific column
102
+
103
+ Examples:
104
+
105
+ ```sh
106
+ # Output in JSON format
107
+ iparq inspect yourfile.parquet --format json
108
+
109
+ # Show only metadata
110
+ iparq inspect yourfile.parquet --metadata-only
111
+
112
+ # Filter to show only a specific column
113
+ iparq inspect yourfile.parquet --column column_name
114
+ ```
115
+
116
+ Replace `<filename>` with the path to your .parquet file. The utility will read the metadata of the file and print the compression codecs used in the parquet file.
117
+
118
+ ## Example ouput - Bloom Filters
119
+
120
+ ```log
121
+ ParquetMetaModel(
122
+ created_by='DuckDB version v1.2.1 (build 8e52ec4395)',
123
+ num_columns=1,
124
+ num_rows=100000000,
125
+ num_row_groups=10,
126
+ format_version='1.0',
127
+ serialized_size=1196
128
+ )
129
+ Parquet Column Information
130
+ ┏━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
131
+ ┃ Row Group ┃ Column Name ┃ Index ┃ Compression ┃ Bloom Filter ┃
132
+ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
133
+ │ 0 │ r │ 0 │ SNAPPY │ ✅ │
134
+ │ 1 │ r │ 0 │ SNAPPY │ ✅ │
135
+ │ 2 │ r │ 0 │ SNAPPY │ ✅ │
136
+ │ 3 │ r │ 0 │ SNAPPY │ ✅ │
137
+ │ 4 │ r │ 0 │ SNAPPY │ ✅ │
138
+ │ 5 │ r │ 0 │ SNAPPY │ ✅ │
139
+ │ 6 │ r │ 0 │ SNAPPY │ ✅ │
140
+ │ 7 │ r │ 0 │ SNAPPY │ ✅ │
141
+ │ 8 │ r │ 0 │ SNAPPY │ ✅ │
142
+ │ 9 │ r │ 0 │ SNAPPY │ ✅ │
143
+ └───────────┴─────────────┴───────┴─────────────┴──────────────┘
144
+ Compression codecs: {'SNAPPY'}
145
+ ```
@@ -0,0 +1,8 @@
1
+ iparq/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
2
+ iparq/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ iparq/source.py,sha256=huC6I0hqwyv4BZ5xjI6FMZs9KH60xVHEKbmX6X8hhiA,10721
4
+ iparq-0.2.5.dist-info/METADATA,sha256=QpkD25vwzqlQo9e2JQKFx9VHdyV0So9u20TtZHkf4LY,5501
5
+ iparq-0.2.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ iparq-0.2.5.dist-info/entry_points.txt,sha256=vrE2lwvuheySWTOJdr_gh9AT47ck02WCHo0muRq5HS8,43
7
+ iparq-0.2.5.dist-info/licenses/LICENSE,sha256=apqXCIYD_rrtbJVE-Ex1-1X7N0cBwZTOm4KL3TEFmYA,1067
8
+ iparq-0.2.5.dist-info/RECORD,,
@@ -1,216 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: iparq
3
- Version: 0.1.7
4
- Summary: Display version and compression information about a parquet file
5
- Author-email: MiguelElGallo <miguel.zurcher@gmail.com>
6
- License-File: LICENSE
7
- Requires-Python: >=3.9
8
- Requires-Dist: pyarrow>=19.0.0
9
- Requires-Dist: pydantic>=2.10.6
10
- Requires-Dist: typer>=0.15.1
11
- Provides-Extra: checks
12
- Requires-Dist: mypy>=1.14.1; extra == 'checks'
13
- Requires-Dist: ruff>=0.9.3; extra == 'checks'
14
- Provides-Extra: test
15
- Requires-Dist: pytest>=7.0; extra == 'test'
16
- Description-Content-Type: text/markdown
17
-
18
- # iparq
19
-
20
- [![Python package](https://github.com/MiguelElGallo/iparq/actions/workflows/python-package.yml/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/python-package.yml)
21
-
22
- [![Dependabot Updates](https://github.com/MiguelElGallo/iparq/actions/workflows/dependabot/dependabot-updates/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/dependabot/dependabot-updates)
23
-
24
- [![Upload Python Package](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml/badge.svg)](https://github.com/MiguelElGallo/iparq/actions/workflows/python-publish.yml)
25
-
26
- ![alt text](media/iparq.png)
27
- After reading [this blog](https://duckdb.org/2025/01/22/parquet-encodings.html), I began to wonder which Parquet version and compression methods the everyday tools we rely on actually use, only to find that there’s no straightforward way to determine this. That curiosity and the difficulty of quickly discovering such details motivated me to create iparq (Information Parquet). My goal with iparq is to help users easily identify the specifics of the Parquet files generated by different engines, making it clear which features—like newer encodings or certain compression algorithms—the creator of the parquet is using.
28
-
29
- ***New*** Bloom filters information: Displays if there are bloom filters.
30
- Read more about bloom filters in this [great article](https://duckdb.org/2025/03/07/parquet-bloom-filters-in-duckdb.html).
31
-
32
-
33
- ## Installation
34
-
35
- ### Using pip
36
-
37
- 1) Install the package using pip:
38
-
39
- ```sh
40
- pip install iparq
41
- ```
42
-
43
- 2) Verify the installation by running:
44
-
45
- ```sh
46
- iparq --help
47
- ```
48
-
49
- ### Using uv
50
-
51
- 1) Make sure to have Astral’s UV installed by following the steps here:
52
-
53
- <https://docs.astral.sh/uv/getting-started/installation/>
54
-
55
- 2) Execute the following command:
56
-
57
- ```sh
58
- uv pip install iparq
59
- ```
60
-
61
- 3) Verify the installation by running:
62
-
63
- ```sh
64
- iparq --help
65
- ```
66
-
67
- ### Using Homebrew in a MAC
68
-
69
- 1) Run the following:
70
-
71
- ```sh
72
- brew tap MiguelElGallo/tap https://github.com/MiguelElGallo//homebrew-iparq.git
73
- brew install MiguelElGallo/tap/iparq
74
- iparq —help
75
- ```
76
-
77
- ## Usage
78
-
79
- Run
80
-
81
- ```sh
82
- iparq <filename>
83
- ```
84
-
85
- Replace `<filename>` with the path to your .parquet file. The utility will read the metadata of the file and print the compression codecs used in the parquet file.
86
-
87
- ## Example ouput - Bloom Filters
88
-
89
- ```log
90
- ParquetMetaModel(
91
- created_by='DuckDB version v1.2.1 (build 8e52ec4395)',
92
- num_columns=1,
93
- num_rows=100000000,
94
- num_row_groups=10,
95
- format_version='1.0',
96
- serialized_size=1196
97
- )
98
- Column Compression Info:
99
- Row Group 0:
100
- Column 'r' (Index 0): SNAPPY
101
- Row Group 1:
102
- Column 'r' (Index 0): SNAPPY
103
- Row Group 2:
104
- Column 'r' (Index 0): SNAPPY
105
- Row Group 3:
106
- Column 'r' (Index 0): SNAPPY
107
- Row Group 4:
108
- Column 'r' (Index 0): SNAPPY
109
- Row Group 5:
110
- Column 'r' (Index 0): SNAPPY
111
- Row Group 6:
112
- Column 'r' (Index 0): SNAPPY
113
- Row Group 7:
114
- Column 'r' (Index 0): SNAPPY
115
- Row Group 8:
116
- Column 'r' (Index 0): SNAPPY
117
- Row Group 9:
118
- Column 'r' (Index 0): SNAPPY
119
- Bloom Filter Info:
120
- Row Group 0:
121
- Column 'r' (Index 0): Has bloom filter
122
- Row Group 1:
123
- Column 'r' (Index 0): Has bloom filter
124
- Row Group 2:
125
- Column 'r' (Index 0): Has bloom filter
126
- Row Group 3:
127
- Column 'r' (Index 0): Has bloom filter
128
- Row Group 4:
129
- Column 'r' (Index 0): Has bloom filter
130
- Row Group 5:
131
- Column 'r' (Index 0): Has bloom filter
132
- Row Group 6:
133
- Column 'r' (Index 0): Has bloom filter
134
- Row Group 7:
135
- Column 'r' (Index 0): Has bloom filter
136
- Row Group 8:
137
- Column 'r' (Index 0): Has bloom filter
138
- Row Group 9:
139
- Column 'r' (Index 0): Has bloom filter
140
- Compression codecs: {'SNAPPY'}
141
- ```
142
-
143
- ## Example output
144
-
145
- ```log
146
- ParquetMetaModel(
147
- created_by='parquet-cpp-arrow version 14.0.2',
148
- num_columns=19,
149
- num_rows=2964624,
150
- num_row_groups=3,
151
- format_version='2.6',
152
- serialized_size=6357
153
- )
154
- Column Compression Info:
155
- Row Group 0:
156
- Column 'VendorID' (Index 0): ZSTD
157
- Column 'tpep_pickup_datetime' (Index 1): ZSTD
158
- Column 'tpep_dropoff_datetime' (Index 2): ZSTD
159
- Column 'passenger_count' (Index 3): ZSTD
160
- Column 'trip_distance' (Index 4): ZSTD
161
- Column 'RatecodeID' (Index 5): ZSTD
162
- Column 'store_and_fwd_flag' (Index 6): ZSTD
163
- Column 'PULocationID' (Index 7): ZSTD
164
- Column 'DOLocationID' (Index 8): ZSTD
165
- Column 'payment_type' (Index 9): ZSTD
166
- Column 'fare_amount' (Index 10): ZSTD
167
- Column 'extra' (Index 11): ZSTD
168
- Column 'mta_tax' (Index 12): ZSTD
169
- Column 'tip_amount' (Index 13): ZSTD
170
- Column 'tolls_amount' (Index 14): ZSTD
171
- Column 'improvement_surcharge' (Index 15): ZSTD
172
- Column 'total_amount' (Index 16): ZSTD
173
- Column 'congestion_surcharge' (Index 17): ZSTD
174
- Column 'Airport_fee' (Index 18): ZSTD
175
- Row Group 1:
176
- Column 'VendorID' (Index 0): ZSTD
177
- Column 'tpep_pickup_datetime' (Index 1): ZSTD
178
- Column 'tpep_dropoff_datetime' (Index 2): ZSTD
179
- Column 'passenger_count' (Index 3): ZSTD
180
- Column 'trip_distance' (Index 4): ZSTD
181
- Column 'RatecodeID' (Index 5): ZSTD
182
- Column 'store_and_fwd_flag' (Index 6): ZSTD
183
- Column 'PULocationID' (Index 7): ZSTD
184
- Column 'DOLocationID' (Index 8): ZSTD
185
- Column 'payment_type' (Index 9): ZSTD
186
- Column 'fare_amount' (Index 10): ZSTD
187
- Column 'extra' (Index 11): ZSTD
188
- Column 'mta_tax' (Index 12): ZSTD
189
- Column 'tip_amount' (Index 13): ZSTD
190
- Column 'tolls_amount' (Index 14): ZSTD
191
- Column 'improvement_surcharge' (Index 15): ZSTD
192
- Column 'total_amount' (Index 16): ZSTD
193
- Column 'congestion_surcharge' (Index 17): ZSTD
194
- Column 'Airport_fee' (Index 18): ZSTD
195
- Row Group 2:
196
- Column 'VendorID' (Index 0): ZSTD
197
- Column 'tpep_pickup_datetime' (Index 1): ZSTD
198
- Column 'tpep_dropoff_datetime' (Index 2): ZSTD
199
- Column 'passenger_count' (Index 3): ZSTD
200
- Column 'trip_distance' (Index 4): ZSTD
201
- Column 'RatecodeID' (Index 5): ZSTD
202
- Column 'store_and_fwd_flag' (Index 6): ZSTD
203
- Column 'PULocationID' (Index 7): ZSTD
204
- Column 'DOLocationID' (Index 8): ZSTD
205
- Column 'payment_type' (Index 9): ZSTD
206
- Column 'fare_amount' (Index 10): ZSTD
207
- Column 'extra' (Index 11): ZSTD
208
- Column 'mta_tax' (Index 12): ZSTD
209
- Column 'tip_amount' (Index 13): ZSTD
210
- Column 'tolls_amount' (Index 14): ZSTD
211
- Column 'improvement_surcharge' (Index 15): ZSTD
212
- Column 'total_amount' (Index 16): ZSTD
213
- Column 'congestion_surcharge' (Index 17): ZSTD
214
- Column 'Airport_fee' (Index 18): ZSTD
215
- Compression codecs: {'ZSTD'}
216
- ```
@@ -1,7 +0,0 @@
1
- iparq/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
2
- iparq/source.py,sha256=crKRTuZY6W2zEhFfAzb4XWopaVy9qnEkFqz4jbyGmeM,6439
3
- iparq-0.1.7.dist-info/METADATA,sha256=ku4ZsLQ1Iq2ovPzKqv8aGqBGBkn3nTviW6hFzFsP6bw,6884
4
- iparq-0.1.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
- iparq-0.1.7.dist-info/entry_points.txt,sha256=vrE2lwvuheySWTOJdr_gh9AT47ck02WCHo0muRq5HS8,43
6
- iparq-0.1.7.dist-info/licenses/LICENSE,sha256=apqXCIYD_rrtbJVE-Ex1-1X7N0cBwZTOm4KL3TEFmYA,1067
7
- iparq-0.1.7.dist-info/RECORD,,
File without changes