csvpredict 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. csvpredict-0.0.1/PKG-INFO +577 -0
  2. csvpredict-0.0.1/README.md +550 -0
  3. csvpredict-0.0.1/pyproject.toml +48 -0
  4. csvpredict-0.0.1/src/csvpredict/__init__.py +35 -0
  5. csvpredict-0.0.1/src/csvpredict/client/__init__.py +8 -0
  6. csvpredict-0.0.1/src/csvpredict/client/api/__init__.py +1 -0
  7. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_analysis_graphs/__init__.py +1 -0
  8. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_analysis_graphs/endpoint_generate_graphs_api_v1_graphs_post.py +248 -0
  9. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_analysis_graphs/endpoint_generate_graphs_batch_api_v1_graphs_batch_post.py +236 -0
  10. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_excel_files/__init__.py +1 -0
  11. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_excel_files/endpoint_generate_xlsx_api_v1_xlsx_post.py +236 -0
  12. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_excel_files/endpoint_generate_xlsx_batch_api_v1_xlsx_batch_post.py +236 -0
  13. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_power_point_files/__init__.py +1 -0
  14. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_power_point_files/endpoint_generate_pptx_api_v1_pptx_post.py +240 -0
  15. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_power_point_files/endpoint_generate_pptx_batch_api_v1_pptx_batch_post.py +240 -0
  16. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_visualizable_statistics/__init__.py +1 -0
  17. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_visualizable_statistics/endpoint_inspect_api_v1_inspect_post.py +222 -0
  18. csvpredict-0.0.1/src/csvpredict/client/api/api_v1_generate_visualizable_statistics/endpoint_inspect_batch_api_v1_inspect_batch_post.py +226 -0
  19. csvpredict-0.0.1/src/csvpredict/client/api/default/__init__.py +1 -0
  20. csvpredict-0.0.1/src/csvpredict/client/api/default/health_get.py +84 -0
  21. csvpredict-0.0.1/src/csvpredict/client/api/generate_analysis_graphs/__init__.py +1 -0
  22. csvpredict-0.0.1/src/csvpredict/client/api/generate_analysis_graphs/endpoint_generate_graphs_batch_graphs_batch_post.py +262 -0
  23. csvpredict-0.0.1/src/csvpredict/client/api/generate_analysis_graphs/endpoint_generate_graphs_graphs_post.py +270 -0
  24. csvpredict-0.0.1/src/csvpredict/client/api/generate_excel_files/__init__.py +1 -0
  25. csvpredict-0.0.1/src/csvpredict/client/api/generate_excel_files/endpoint_generate_xlsx_batch_xlsx_batch_post.py +258 -0
  26. csvpredict-0.0.1/src/csvpredict/client/api/generate_excel_files/endpoint_generate_xlsx_xlsx_post.py +258 -0
  27. csvpredict-0.0.1/src/csvpredict/client/api/generate_power_point_files/__init__.py +1 -0
  28. csvpredict-0.0.1/src/csvpredict/client/api/generate_power_point_files/endpoint_generate_pptx_batch_pptx_batch_post.py +262 -0
  29. csvpredict-0.0.1/src/csvpredict/client/api/generate_power_point_files/endpoint_generate_pptx_pptx_post.py +262 -0
  30. csvpredict-0.0.1/src/csvpredict/client/api/generate_visualizable_statistics/__init__.py +1 -0
  31. csvpredict-0.0.1/src/csvpredict/client/api/generate_visualizable_statistics/endpoint_inspect_batch_inspect_batch_post.py +248 -0
  32. csvpredict-0.0.1/src/csvpredict/client/api/generate_visualizable_statistics/endpoint_inspect_inspect_post.py +242 -0
  33. csvpredict-0.0.1/src/csvpredict/client/api/make_predictions_based_on_input_data/__init__.py +1 -0
  34. csvpredict-0.0.1/src/csvpredict/client/api/make_predictions_based_on_input_data/endpoint_predict_file_predict_file_post.py +254 -0
  35. csvpredict-0.0.1/src/csvpredict/client/api/make_predictions_based_on_input_data/endpoint_predict_predict_post.py +262 -0
  36. csvpredict-0.0.1/src/csvpredict/client/client.py +282 -0
  37. csvpredict-0.0.1/src/csvpredict/client/errors.py +16 -0
  38. csvpredict-0.0.1/src/csvpredict/client/models/__init__.py +205 -0
  39. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_graphs_api_v1_graphs_post.py +398 -0
  40. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_graphs_api_v1_graphs_post_extension.py +11 -0
  41. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_graphs_batch_api_v1_graphs_batch_post.py +394 -0
  42. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_graphs_batch_api_v1_graphs_batch_post_extension.py +11 -0
  43. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_graphs_batch_graphs_batch_post.py +394 -0
  44. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_graphs_batch_graphs_batch_post_extension.py +11 -0
  45. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_graphs_graphs_post.py +398 -0
  46. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_graphs_graphs_post_extension.py +11 -0
  47. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_pptx_api_v1_pptx_post.py +357 -0
  48. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_pptx_batch_api_v1_pptx_batch_post.py +351 -0
  49. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_pptx_batch_pptx_batch_post.py +351 -0
  50. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_pptx_pptx_post.py +357 -0
  51. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_xlsx_api_v1_xlsx_post.py +342 -0
  52. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_xlsx_batch_api_v1_xlsx_batch_post.py +336 -0
  53. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_xlsx_batch_xlsx_batch_post.py +336 -0
  54. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_generate_xlsx_xlsx_post.py +342 -0
  55. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_inspect_api_v1_inspect_post.py +205 -0
  56. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_inspect_batch_api_v1_inspect_batch_post.py +199 -0
  57. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_inspect_batch_inspect_batch_post.py +199 -0
  58. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_inspect_inspect_post.py +205 -0
  59. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_predict_file_predict_file_post.py +362 -0
  60. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_predict_file_predict_file_post_custom_seasonality_type_0.py +59 -0
  61. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_predict_predict_post.py +358 -0
  62. csvpredict-0.0.1/src/csvpredict/client/models/body_endpoint_predict_predict_post_custom_seasonality_type_0.py +59 -0
  63. csvpredict-0.0.1/src/csvpredict/client/models/color_palette.py +164 -0
  64. csvpredict-0.0.1/src/csvpredict/client/models/file_statistics.py +131 -0
  65. csvpredict-0.0.1/src/csvpredict/client/models/file_statistics_partitioned_statistics_type_0.py +63 -0
  66. csvpredict-0.0.1/src/csvpredict/client/models/font_family.py +17 -0
  67. csvpredict-0.0.1/src/csvpredict/client/models/frequency.py +12 -0
  68. csvpredict-0.0.1/src/csvpredict/client/models/http_validation_error.py +79 -0
  69. csvpredict-0.0.1/src/csvpredict/client/models/inspect_batch_response.py +71 -0
  70. csvpredict-0.0.1/src/csvpredict/client/models/inspect_batch_response_statistics.py +80 -0
  71. csvpredict-0.0.1/src/csvpredict/client/models/inspect_response.py +144 -0
  72. csvpredict-0.0.1/src/csvpredict/client/models/inspect_response_partitioned_statistics_type_0.py +63 -0
  73. csvpredict-0.0.1/src/csvpredict/client/models/language.py +14 -0
  74. csvpredict-0.0.1/src/csvpredict/client/models/overall_statistics_dict.py +164 -0
  75. csvpredict-0.0.1/src/csvpredict/client/models/predict_response.py +113 -0
  76. csvpredict-0.0.1/src/csvpredict/client/models/predict_response_historical_data_item.py +46 -0
  77. csvpredict-0.0.1/src/csvpredict/client/models/predict_response_predictions_item.py +46 -0
  78. csvpredict-0.0.1/src/csvpredict/client/models/statistics.py +95 -0
  79. csvpredict-0.0.1/src/csvpredict/client/models/statistics_correlation_statistics.py +71 -0
  80. csvpredict-0.0.1/src/csvpredict/client/models/statistics_correlation_statistics_additional_property.py +63 -0
  81. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict.py +209 -0
  82. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_boolean.py +81 -0
  83. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_boolean_additional_property_item.py +46 -0
  84. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_boolean_count.py +85 -0
  85. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_boolean_count_additional_property_item.py +46 -0
  86. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_datetime.py +81 -0
  87. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_datetime_additional_property_item.py +46 -0
  88. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_datetime_count.py +85 -0
  89. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_datetime_count_additional_property_item.py +46 -0
  90. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_duration.py +81 -0
  91. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_duration_additional_property_item.py +46 -0
  92. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_duration_count.py +85 -0
  93. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_duration_count_additional_property_item.py +46 -0
  94. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_numeric.py +81 -0
  95. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_numeric_additional_property_item.py +46 -0
  96. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_numeric_count.py +85 -0
  97. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_numeric_count_additional_property_item.py +46 -0
  98. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_numeric_rolling.py +85 -0
  99. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_numeric_rolling_additional_property_item.py +46 -0
  100. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_string.py +81 -0
  101. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_string_additional_property_item.py +46 -0
  102. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_string_count.py +85 -0
  103. csvpredict-0.0.1/src/csvpredict/client/models/summary_statistics_dict_string_count_additional_property_item.py +46 -0
  104. csvpredict-0.0.1/src/csvpredict/client/models/validation_error.py +90 -0
  105. csvpredict-0.0.1/src/csvpredict/client/types.py +54 -0
  106. csvpredict-0.0.1/src/csvpredict/helpers.py +17 -0
  107. csvpredict-0.0.1/src/csvpredict/html.py +1933 -0
  108. csvpredict-0.0.1/src/csvpredict/models.py +964 -0
  109. csvpredict-0.0.1/src/csvpredict/py.typed +0 -0
  110. csvpredict-0.0.1/src/csvpredict/sdk.py +1048 -0
  111. csvpredict-0.0.1/src/csvpredict/server.py +411 -0
@@ -0,0 +1,577 @@
1
+ Metadata-Version: 2.3
2
+ Name: csvpredict
3
+ Version: 0.0.1
4
+ Summary: SDK for the CSVPredict API - analyze and visualize CSV data
5
+ Author: Leon David Zipp
6
+ Author-email: Leon David Zipp <leondavidzipp@gmx.de>
7
+ Requires-Dist: attrs>=25.4.0
8
+ Requires-Dist: httpx>=0.28.1
9
+ Requires-Dist: ipython>=9.9.0
10
+ Requires-Dist: numpy>=2.4.1
11
+ Requires-Dist: pandas>=2.3.3
12
+ Requires-Dist: pillow>=12.1.0
13
+ Requires-Dist: polars>=1.37.1
14
+ Requires-Dist: pyarrow>=23.0.0
15
+ Requires-Dist: pydantic>=2.12.5
16
+ Requires-Dist: pyspark>=4.1.1
17
+ Requires-Dist: urllib3>=2.6.3
18
+ Requires-Dist: pandas>=2.0.0 ; extra == 'all'
19
+ Requires-Dist: polars>=0.20.0 ; extra == 'all'
20
+ Requires-Dist: pandas>=2.0.0 ; extra == 'pandas'
21
+ Requires-Dist: polars>=0.20.0 ; extra == 'polars'
22
+ Requires-Python: >=3.13
23
+ Provides-Extra: all
24
+ Provides-Extra: pandas
25
+ Provides-Extra: polars
26
+ Description-Content-Type: text/markdown
27
+
28
+ # CSVPredict SDK
29
+
30
+ A Python SDK for the CSVPredict API - analyze and visualize your tabular data with ease.
31
+
32
+ ## Features
33
+
34
+ - 📊 **Generate graphs** - Automatically create histograms, correlations, and statistical visualizations
35
+ - 🔍 **Inspect data** - Get comprehensive statistics for numeric, string, datetime, and boolean columns
36
+ - 🐼 **DataFrame support** - Works with pandas, Polars DataFrames, and Polars LazyFrames
37
+ - 📓 **Jupyter integration** - Display graphs directly in notebooks with customizable sizing
38
+ - 🔄 **Multiple output formats** - Export to Polars, pandas, numpy, or raw dictionaries
39
+ - 🎯 **Flexible filtering** - Case-insensitive substring matching for columns, datatypes, and partitions
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ pip install csvpredict
45
+ ```
46
+
47
+ Or with uv:
48
+
49
+ ```bash
50
+ uv add csvpredict
51
+ ```
52
+
53
+ ## Quick Start
54
+
55
+ ```python
56
+ import polars as pl
57
+ from csvpredict_sdk import CSVPredict
58
+
59
+ # Initialize the SDK
60
+ sdk = CSVPredict(base_url="http://localhost:8000")
61
+
62
+ # Load your data
63
+ df = pl.read_csv("sales_data.csv")
64
+
65
+ # Generate graphs
66
+ graphs = sdk.generate_graphs(df)
67
+ graphs.display() # Show in Jupyter
68
+
69
+ # Get statistics
70
+ result = sdk.inspect(df)
71
+ print(result.stats.overall.height) # Number of rows
72
+ print(result.stats.summary.numeric["price"]) # Price statistics
73
+ ```
74
+
75
+ ## API Reference
76
+
77
+ ### Initializing the SDK
78
+
79
+ ```python
80
+ from csvpredict_sdk import CSVPredict
81
+
82
+ sdk = CSVPredict(
83
+ base_url="http://localhost:8000", # API server URL
84
+ frontend_url="http://localhost:3000", # Frontend URL (for browser inspection)
85
+ timeout=60.0, # Request timeout in seconds
86
+ )
87
+ ```
88
+
89
+ ---
90
+
91
+ ## Generating Graphs
92
+
93
+ Generate statistical visualizations from your data.
94
+
95
+ ### Basic Usage
96
+
97
+ ```python
98
+ import polars as pl
99
+ from csvpredict_sdk import CSVPredict
100
+
101
+ sdk = CSVPredict()
102
+ df = pl.read_csv("data.csv")
103
+
104
+ # Generate graphs with default settings (SVG format)
105
+ graphs = sdk.generate_graphs(df)
106
+ ```
107
+
108
+ ### Options
109
+
110
+ ```python
111
+ graphs = sdk.generate_graphs(
112
+ df,
113
+ partition_by=["category"], # Generate separate graphs per category
114
+ extension=".png", # Output format: .svg, .png, .jpg, .jpeg
115
+ transparent=True, # Transparent background
116
+ window_size=7, # Rolling window size for time series
117
+ null_values=["N/A", "NULL"], # Strings to treat as null
118
+ dpi=300, # Resolution for raster formats
119
+ font="Arial", # Font family
120
+ language="en", # Language for labels
121
+ )
122
+ ```
123
+
124
+ ### Working with GraphResult
125
+
126
+ The `generate_graphs()` method returns a `GraphResult` object with many ways to access your graphs:
127
+
128
+ #### Display in Jupyter
129
+
130
+ ```python
131
+ # Display all graphs in a grid
132
+ graphs.display()
133
+
134
+ # Display with custom size
135
+ graphs.display(width=500, height=400)
136
+
137
+ # Display in 3 columns
138
+ graphs.display(columns=3)
139
+
140
+ # Filter by name (case-insensitive substring match)
141
+ graphs.display("histogram") # Show all histograms
142
+ graphs.display("price") # Show price-related graphs
143
+ graphs.display("correlation") # Show correlation matrices
144
+
145
+ # Filter by extension
146
+ graphs.display(extension=".png")
147
+
148
+ # Display a single graph
149
+ graphs.display_one("price_histogram.svg", width=600)
150
+ ```
151
+
152
+ #### Access Individual Graphs
153
+
154
+ ```python
155
+ # Get list of all graph names
156
+ print(graphs.names)
157
+ # ['price_histogram.svg', 'quantity_histogram.svg', 'correlation_matrix.svg', ...]
158
+
159
+ # Get raw bytes for a specific graph
160
+ svg_bytes = graphs["price_histogram.svg"]
161
+
162
+ # Check if a graph exists
163
+ if graphs.contains("price_histogram.svg"):
164
+ print("Found it!")
165
+
166
+ # Get with default
167
+ data = graphs.get("missing.svg", default=None)
168
+
169
+ # Number of graphs
170
+ print(graphs.count()) # 15
171
+ ```
172
+
173
+ #### Iterate and Filter
174
+
175
+ ```python
176
+ # Iterate over all graphs
177
+ for name, data in graphs:
178
+ print(f"{name}: {len(data)} bytes")
179
+
180
+ # Filter by pattern (case-insensitive)
181
+ for name, data in graphs.filter("histogram"):
182
+ print(name)
183
+
184
+ # Filter by extension
185
+ for name, data in graphs.filter(extension=".svg"):
186
+ print(name)
187
+ ```
188
+
189
+ #### Convert to PIL Image (Raster Only)
190
+
191
+ ```python
192
+ # Convert PNG/JPG to PIL Image for further processing
193
+ img = graphs.to_pil("price_histogram.png")
194
+ img.show()
195
+ img.save("modified.png")
196
+ ```
197
+
198
+ #### Save to Disk
199
+
200
+ ```python
201
+ # Save as ZIP file
202
+ graphs.save("graphs.zip")
203
+
204
+ # Extract all to a directory
205
+ extracted_files = graphs.extract("./output_graphs/")
206
+ print(extracted_files)
207
+ # [Path('output_graphs/price_histogram.svg'), ...]
208
+ ```
209
+
210
+ ---
211
+
212
+ ## Inspecting Data
213
+
214
+ Get comprehensive statistics about your dataset.
215
+
216
+ ### Basic Usage
217
+
218
+ ```python
219
+ result = sdk.inspect(df)
220
+
221
+ # Access overall statistics
222
+ print(result.stats.overall.height) # Number of rows
223
+ print(result.stats.overall.width) # Number of columns
224
+ print(result.stats.overall.null_count) # Total null values
225
+ print(result.stats.overall.duplicate_count) # Duplicate rows
226
+ print(result.stats.overall.column_names) # List of columns
227
+ ```
228
+
229
+ ### Options
230
+
231
+ ```python
232
+ result = sdk.inspect(
233
+ df,
234
+ partition_by=["category", "region"], # Partition data
235
+ window_size=5, # Rolling window size
236
+ round_digits=3, # Decimal precision
237
+ null_values=["N/A", ""], # Strings to treat as null
238
+ )
239
+ ```
240
+
241
+ ### Accessing Summary Statistics
242
+
243
+ Summary statistics are organized by data type:
244
+
245
+ ```python
246
+ # Numeric columns
247
+ result.stats.summary.numeric["price"] # Statistics for 'price' column
248
+ result.stats.summary.numeric_count["price"] # Value counts for 'price'
249
+ result.stats.summary.numeric_rolling["price"] # Rolling statistics (time series)
250
+
251
+ # String columns
252
+ result.stats.summary.string["product_name"]
253
+ result.stats.summary.string_count["product_name"]
254
+
255
+ # Datetime columns
256
+ result.stats.summary.datetime["created_at"]
257
+ result.stats.summary.datetime_count["created_at"]
258
+
259
+ # Boolean columns
260
+ result.stats.summary.boolean["is_active"]
261
+ result.stats.summary.boolean_count["is_active"]
262
+
263
+ # Duration columns
264
+ result.stats.summary.duration["processing_time"]
265
+ result.stats.summary.duration_count["processing_time"]
266
+ ```
267
+
268
+ ### Filtering Statistics
269
+
270
+ All filters use case-insensitive substring matching:
271
+
272
+ ```python
273
+ # Filter by column name
274
+ result.stats.summary.get_stats_for_column("price")
275
+ # Returns: {'price': DataFrame, 'unit_price': DataFrame, 'total_price': DataFrame}
276
+
277
+ # Filter by column name - get value counts
278
+ result.stats.summary.get_counts_for_column("status")
279
+ # Returns: {'status': DataFrame, 'order_status': DataFrame}
280
+
281
+ # Filter by column name - get rolling stats
282
+ result.stats.summary.get_rolling_for_column("price")
283
+
284
+ # Filter by datatype
285
+ result.stats.summary.filter(datatype="numeric")
286
+ # Returns: {'numeric': {'price': DataFrame, 'quantity': DataFrame, ...}}
287
+
288
+ # Filter by both column and datatype
289
+ result.stats.summary.filter(column="price", datatype="count")
290
+ # Returns: {'numeric_count': {'price': DataFrame, 'unit_price': DataFrame}}
291
+
292
+ # Access datatype directly
293
+ result.stats.summary["numeric"] # All numeric statistics
294
+ result.stats.summary["string_count"] # All string value counts
295
+ ```
296
+
297
+ ### Correlation Matrix
298
+
299
+ ```python
300
+ # Get the full correlation matrix
301
+ corr = result.stats.correlation.matrix
302
+ print(corr)
303
+
304
+ # Get correlation between two specific columns
305
+ corr_value = result.stats.correlation.get_correlation("price", "quantity")
306
+ print(f"Correlation: {corr_value}")
307
+
308
+ # Get column names in the correlation matrix
309
+ print(result.stats.correlation.columns)
310
+ ```
311
+
312
+ ### Working with Partitioned Data
313
+
314
+ When you use `partition_by`, data is split into groups:
315
+
316
+ ```python
317
+ result = sdk.inspect(df, partition_by=["category"])
318
+
319
+ # Check if data is partitioned
320
+ print(result.is_partitioned) # True
321
+
322
+ # List available partitions
323
+ print(result.partitions)
324
+ # ['_electronics', '_clothing', '_food']
325
+
326
+ # Access a specific partition
327
+ electronics_stats = result["_electronics"]
328
+ print(electronics_stats.overall.height)
329
+ print(electronics_stats.summary.numeric["price"])
330
+
331
+ # Iterate over all partitions
332
+ for partition_name, stats in result.items():
333
+ print(f"{partition_name}: {stats.overall.height} rows")
334
+
335
+ # Filter partitions by name
336
+ matching = result.filter_partitions("electronics")
337
+ # Returns: {'_electronics': Statistics(...)}
338
+
339
+ # Filter across partitions, columns, and datatypes
340
+ filtered = result.filter(
341
+ partition="electronics",
342
+ column="price",
343
+ datatype="numeric"
344
+ )
345
+ ```
346
+
347
+ ### Non-Partitioned Data
348
+
349
+ For non-partitioned data, use `.stats` directly:
350
+
351
+ ```python
352
+ result = sdk.inspect(df) # No partition_by
353
+
354
+ # Access statistics directly
355
+ stats = result.stats
356
+ print(stats.overall.height)
357
+ print(stats.summary.numeric["price"])
358
+ ```
359
+
360
+ ---
361
+
362
+ ## Output Formats
363
+
364
+ All statistics objects support multiple output formats:
365
+
366
+ ### Polars (Default)
367
+
368
+ ```python
369
+ # DataFrames are Polars by default
370
+ df = result.stats.summary.numeric["price"]
371
+ print(type(df)) # <class 'polars.DataFrame'>
372
+
373
+ # Explicitly convert to Polars
374
+ polars_data = result.stats.to_polars()
375
+ polars_lazy = result.stats.to_polars_lazy() # LazyFrame for large data
376
+ ```
377
+
378
+ ### Pandas
379
+
380
+ ```python
381
+ # Convert everything to pandas
382
+ pandas_data = result.stats.to_pandas()
383
+
384
+ # Convert specific statistics
385
+ pandas_df = result.stats.summary.get_stats_for_column_pandas("price")
386
+ pandas_counts = result.stats.summary.get_counts_for_column_pandas("status")
387
+
388
+ # Correlation matrix with index
389
+ corr_df = result.stats.correlation.to_pandas_indexed()
390
+ ```
391
+
392
+ ### NumPy
393
+
394
+ ```python
395
+ # Convert to numpy arrays
396
+ numpy_data = result.stats.to_numpy()
397
+
398
+ # Correlation matrix as numpy array
399
+ corr_array = result.stats.correlation.to_numpy()
400
+ ```
401
+
402
+ ### Dictionary
403
+
404
+ ```python
405
+ # Convert to plain Python dictionaries
406
+ dict_data = result.stats.to_dict()
407
+
408
+ # Useful for JSON serialization
409
+ import json
410
+ json.dumps(result.stats.overall.to_dict())
411
+ ```
412
+
413
+ ---
414
+
415
+ ## DataFrame Compatibility
416
+
417
+ The SDK works with multiple DataFrame types:
418
+
419
+ ### Polars DataFrame
420
+
421
+ ```python
422
+ import polars as pl
423
+
424
+ df = pl.read_csv("data.csv")
425
+ result = sdk.inspect(df)
426
+ graphs = sdk.generate_graphs(df)
427
+ ```
428
+
429
+ ### Polars LazyFrame
430
+
431
+ ```python
432
+ import polars as pl
433
+
434
+ # LazyFrames are collected automatically
435
+ lf = pl.scan_csv("data.csv")
436
+ result = sdk.inspect(lf)
437
+ graphs = sdk.generate_graphs(lf)
438
+ ```
439
+
440
+ ### Pandas DataFrame
441
+
442
+ ```python
443
+ import pandas as pd
444
+
445
+ df = pd.read_csv("data.csv")
446
+ result = sdk.inspect(df)
447
+ graphs = sdk.generate_graphs(df)
448
+ ```
449
+
450
+ ---
451
+
452
+ ## Browser Inspection
453
+
454
+ Open an interactive inspection view in your browser:
455
+
456
+ ```python
457
+ # Inspect and open browser
458
+ result = sdk.inspect_in_browser(df)
459
+
460
+ # With partitioning
461
+ result = sdk.inspect_in_browser(df, partition_by=["category"])
462
+ ```
463
+
464
+ ---
465
+
466
+ ## Complete Example
467
+
468
+ ```python
469
+ import polars as pl
470
+ from csvpredict_sdk import CSVPredict
471
+
472
+ # Initialize SDK
473
+ sdk = CSVPredict(base_url="http://localhost:8000")
474
+
475
+ # Load data
476
+ df = pl.read_csv("sales_data.csv")
477
+
478
+ # Generate and display graphs
479
+ graphs = sdk.generate_graphs(df, extension=".svg")
480
+ graphs.display(width=400, columns=2)
481
+
482
+ # Filter to show only histograms
483
+ graphs.display("histogram", width=300)
484
+
485
+ # Save graphs to disk
486
+ graphs.save("sales_graphs.zip")
487
+
488
+ # Inspect data
489
+ result = sdk.inspect(df)
490
+
491
+ # Overall statistics
492
+ print(f"Rows: {result.stats.overall.height}")
493
+ print(f"Columns: {result.stats.overall.width}")
494
+ print(f"Null values: {result.stats.overall.null_count}")
495
+
496
+ # Numeric statistics for price column
497
+ price_stats = result.stats.summary.numeric["price"]
498
+ print(price_stats)
499
+
500
+ # Get all price-related statistics
501
+ price_data = result.stats.summary.get_stats_for_column("price")
502
+ for col_name, stats_df in price_data.items():
503
+ print(f"\n{col_name}:")
504
+ print(stats_df)
505
+
506
+ # Correlation analysis
507
+ print("\nCorrelation Matrix:")
508
+ print(result.stats.correlation.matrix)
509
+
510
+ # Get specific correlation
511
+ corr = result.stats.correlation.get_correlation("price", "quantity")
512
+ print(f"\nPrice-Quantity correlation: {corr}")
513
+
514
+ # Export to different formats
515
+ pandas_stats = result.stats.to_pandas()
516
+ numpy_stats = result.stats.to_numpy()
517
+ dict_stats = result.stats.to_dict()
518
+
519
+ # Partitioned analysis
520
+ result = sdk.inspect(df, partition_by=["category"])
521
+ for partition, stats in result.items():
522
+ print(f"\n{partition}: {stats.overall.height} rows")
523
+ print(stats.summary.numeric["price"])
524
+ ```
525
+
526
+ ---
527
+
528
+ ## Jupyter Notebook Tips
529
+
530
+ ### Optimal Display Settings
531
+
532
+ ```python
533
+ # For high-DPI displays
534
+ graphs.display(width=600, height=450, columns=2)
535
+
536
+ # For presentations
537
+ graphs.display(width=800, height=600, columns=1)
538
+
539
+ # Quick overview
540
+ graphs.display(width=300, height=200, columns=4)
541
+ ```
542
+
543
+ ### Filtering in Notebooks
544
+
545
+ ```python
546
+ # Show only correlation graphs
547
+ graphs.display("correlation")
548
+
549
+ # Show only a specific column's graphs
550
+ graphs.display("price")
551
+
552
+ # Show histograms at smaller size
553
+ graphs.display("histogram", width=250, columns=3)
554
+ ```
555
+
556
+ ---
557
+
558
+ ## Error Handling
559
+
560
+ ```python
561
+ from csvpredict_sdk import CSVPredict
562
+
563
+ sdk = CSVPredict()
564
+
565
+ try:
566
+ result = sdk.inspect(df)
567
+ except ValueError as e:
568
+ print(f"API error: {e}")
569
+ except Exception as e:
570
+ print(f"Unexpected error: {e}")
571
+ ```
572
+
573
+ ---
574
+
575
+ ## License
576
+
577
+ MIT License - see LICENSE file for details.