datawash 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. datawash/__init__.py +9 -0
  2. datawash/adapters/__init__.py +12 -0
  3. datawash/adapters/base.py +66 -0
  4. datawash/adapters/csv_adapter.py +23 -0
  5. datawash/adapters/excel_adapter.py +36 -0
  6. datawash/adapters/json_adapter.py +21 -0
  7. datawash/adapters/parquet_adapter.py +34 -0
  8. datawash/cli/__init__.py +0 -0
  9. datawash/cli/formatters.py +110 -0
  10. datawash/cli/main.py +168 -0
  11. datawash/codegen/__init__.py +1 -0
  12. datawash/codegen/generator.py +72 -0
  13. datawash/core/__init__.py +1 -0
  14. datawash/core/cache.py +64 -0
  15. datawash/core/config.py +56 -0
  16. datawash/core/dtypes.py +24 -0
  17. datawash/core/exceptions.py +21 -0
  18. datawash/core/models.py +78 -0
  19. datawash/core/report.py +430 -0
  20. datawash/core/sampling.py +84 -0
  21. datawash/detectors/__init__.py +13 -0
  22. datawash/detectors/base.py +27 -0
  23. datawash/detectors/duplicate_detector.py +56 -0
  24. datawash/detectors/format_detector.py +130 -0
  25. datawash/detectors/missing_detector.py +78 -0
  26. datawash/detectors/outlier_detector.py +93 -0
  27. datawash/detectors/registry.py +64 -0
  28. datawash/detectors/similarity_detector.py +294 -0
  29. datawash/detectors/type_detector.py +100 -0
  30. datawash/profiler/__init__.py +1 -0
  31. datawash/profiler/engine.py +88 -0
  32. datawash/profiler/parallel.py +122 -0
  33. datawash/profiler/patterns.py +80 -0
  34. datawash/profiler/statistics.py +41 -0
  35. datawash/suggestors/__init__.py +1 -0
  36. datawash/suggestors/base.py +15 -0
  37. datawash/suggestors/engine.py +327 -0
  38. datawash/suggestors/prioritizer.py +23 -0
  39. datawash/transformers/__init__.py +13 -0
  40. datawash/transformers/base.py +27 -0
  41. datawash/transformers/categories.py +64 -0
  42. datawash/transformers/columns.py +72 -0
  43. datawash/transformers/duplicates.py +43 -0
  44. datawash/transformers/formats.py +95 -0
  45. datawash/transformers/missing.py +201 -0
  46. datawash/transformers/registry.py +30 -0
  47. datawash/transformers/types.py +95 -0
  48. datawash-0.2.0.dist-info/METADATA +353 -0
  49. datawash-0.2.0.dist-info/RECORD +53 -0
  50. datawash-0.2.0.dist-info/WHEEL +5 -0
  51. datawash-0.2.0.dist-info/entry_points.txt +2 -0
  52. datawash-0.2.0.dist-info/licenses/LICENSE +21 -0
  53. datawash-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,353 @@
1
+ Metadata-Version: 2.4
2
+ Name: datawash
3
+ Version: 0.2.0
4
+ Summary: Intelligent data cleaning and quality analysis
5
+ Author: Sai Pranav Krovvidi
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Pranav1011/DataWash
8
+ Project-URL: Repository, https://github.com/Pranav1011/DataWash
9
+ Project-URL: Issues, https://github.com/Pranav1011/DataWash/issues
10
+ Keywords: data-cleaning,data-quality,pandas,etl,data-analysis
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: pandas>=1.5.0
22
+ Requires-Dist: numpy>=1.21.0
23
+ Requires-Dist: pydantic>=2.0.0
24
+ Requires-Dist: rich>=13.0.0
25
+ Requires-Dist: typer>=0.9.0
26
+ Provides-Extra: ml
27
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "ml"
28
+ Requires-Dist: datasketch>=1.5.0; extra == "ml"
29
+ Requires-Dist: scikit-learn>=1.0.0; extra == "ml"
30
+ Requires-Dist: python-Levenshtein>=0.21.0; extra == "ml"
31
+ Provides-Extra: formats
32
+ Requires-Dist: pyarrow>=10.0.0; extra == "formats"
33
+ Requires-Dist: openpyxl>=3.0.0; extra == "formats"
34
+ Provides-Extra: all
35
+ Requires-Dist: sentence-transformers>=2.2.0; extra == "all"
36
+ Requires-Dist: datasketch>=1.5.0; extra == "all"
37
+ Requires-Dist: scikit-learn>=1.0.0; extra == "all"
38
+ Requires-Dist: python-Levenshtein>=0.21.0; extra == "all"
39
+ Requires-Dist: pyarrow>=10.0.0; extra == "all"
40
+ Requires-Dist: openpyxl>=3.0.0; extra == "all"
41
+ Provides-Extra: dev
42
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
43
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
44
+ Requires-Dist: black>=23.0.0; extra == "dev"
45
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
46
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
47
+ Dynamic: license-file
48
+
49
+ # DataWash
50
+
51
+ <p align="center">
52
+ <strong>Intelligent data cleaning and quality analysis for Python</strong>
53
+ </p>
54
+
55
+ <p align="center">
56
+ <a href="#installation">Installation</a> •
57
+ <a href="#quick-start">Quick Start</a> •
58
+ <a href="#features">Features</a> •
59
+ <a href="#documentation">Documentation</a> •
60
+ <a href="#examples">Examples</a>
61
+ </p>
62
+
63
+ <p align="center">
64
+ <img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue" alt="Python">
65
+ <img src="https://img.shields.io/badge/coverage-92%25-brightgreen" alt="Coverage">
66
+ <img src="https://img.shields.io/badge/tests-114%20passing-brightgreen" alt="Tests">
67
+ <img src="https://img.shields.io/badge/license-MIT-green" alt="License">
68
+ </p>
69
+
70
+ ---
71
+
72
+ DataWash analyzes your tabular data, detects quality issues, suggests prioritized fixes, and generates reproducible Python code — all in a few lines of code.
73
+
74
+ ```python
75
+ from datawash import analyze
76
+
77
+ report = analyze("messy_data.csv")
78
+ print(f"Quality Score: {report.quality_score}/100")
79
+ clean_df = report.apply_all()
80
+ print(report.generate_code())
81
+ ```
82
+
83
+ ## Why DataWash?
84
+
85
+ | Problem | DataWash Solution |
86
+ |---------|-------------------|
87
+ | Missing values silently break ML models | Automatic detection + smart filling strategies |
88
+ | Inconsistent date formats cause parsing errors | Detects and standardizes to ISO format |
89
+ | Duplicate rows inflate statistics | Identifies and removes exact duplicates |
90
+ | Boolean values stored as "yes"/"no" strings | Converts to proper boolean type |
91
+ | Manual data cleaning is tedious and error-prone | Generates reproducible Python code |
92
+
93
+ ## Installation
94
+
95
+ ```bash
96
+ pip install datawash
97
+ ```
98
+
99
+ **Optional extras:**
100
+
101
+ ```bash
102
+ pip install datawash[formats] # Parquet + Excel support
103
+ pip install datawash[ml] # ML-powered detection (coming soon)
104
+ pip install datawash[all] # All optional dependencies
105
+ pip install datawash[dev] # Development tools
106
+ ```
107
+
108
+ ## Quick Start
109
+
110
+ ### Python API
111
+
112
+ ```python
113
+ from datawash import analyze
114
+
115
+ # 1. Analyze your data (sampling enabled by default for large datasets)
116
+ report = analyze("data.csv") # or pass a DataFrame
117
+
118
+ # 2. Check quality score
119
+ print(f"Quality Score: {report.quality_score}/100")
120
+ print(f"Issues Found: {len(report.issues)}")
121
+
122
+ # 3. Review suggestions
123
+ for s in report.suggestions:
124
+ print(f"[{s.id}] {s.action}")
125
+
126
+ # 4. Apply all fixes
127
+ clean_df = report.apply_all()
128
+
129
+ # 5. Or apply selectively
130
+ clean_df = report.apply([1, 3, 5]) # by suggestion ID
131
+
132
+ # 6. Generate reproducible code
133
+ print(report.generate_code())
134
+
135
+ # Disable sampling for exact results on large datasets
136
+ report = analyze("data.csv", sample=False)
137
+
138
+ # Disable parallel processing
139
+ report = analyze("data.csv", parallel=False)
140
+ ```
141
+
142
+ ### Command Line
143
+
144
+ ```bash
145
+ # Analyze and see quality report
146
+ datawash analyze data.csv
147
+
148
+ # Get prioritized suggestions
149
+ datawash suggest data.csv --use-case ml
150
+
151
+ # Clean and export
152
+ datawash clean data.csv -o clean.csv --apply-all
153
+
154
+ # Generate Python code
155
+ datawash codegen data.csv --apply-all
156
+ ```
157
+
158
+ ## Features
159
+
160
+ ### Data Quality Detection
161
+
162
+ | Detector | What It Finds |
163
+ |----------|---------------|
164
+ | **Missing** | Null values, empty strings, whitespace-only values |
165
+ | **Duplicates** | Exact duplicate rows |
166
+ | **Formats** | Mixed case, inconsistent dates, whitespace padding |
167
+ | **Outliers** | Statistical anomalies (IQR or Z-score) |
168
+ | **Types** | Numbers/booleans stored as strings |
169
+ | **Similarity** | Potentially duplicate columns |
170
+
171
+ ### Smart Transformations
172
+
173
+ | Transformer | Operations |
174
+ |-------------|------------|
175
+ | **Missing** | Drop rows, fill with median/mode/value, clean empty strings |
176
+ | **Duplicates** | Remove exact duplicates |
177
+ | **Types** | Convert to numeric, boolean, datetime |
178
+ | **Formats** | Standardize case, dates, strip whitespace |
179
+ | **Columns** | Drop, rename, merge columns |
180
+ | **Categories** | Normalize categorical values |
181
+
182
+ ### Intelligent Suggestion System
183
+
184
+ - **Conflict Resolution**: Automatically prevents conflicting transformations
185
+ - **Execution Ordering**: Applies fixes in optimal order (6 phases)
186
+ - **Use-Case Aware**: Priorities adjust for ML, analytics, or export workflows
187
+ - **Contextual Rationale**: Every suggestion explains why it's recommended
188
+
189
+ ### Code Generation
190
+
191
+ ```python
192
+ # Generate a reusable cleaning function
193
+ code = report.generate_code(style="function")
194
+
195
+ # Or a standalone script
196
+ code = report.generate_code(style="script")
197
+ ```
198
+
199
+ ## Performance
200
+
201
+ DataWash v0.2.0 is optimized for large datasets:
202
+
203
+ | Dataset | Time | Throughput |
204
+ |---------|------|------------|
205
+ | 1M rows x 10 cols | 0.72s | 1.4M rows/sec |
206
+ | 100K rows x 50 cols | 2.13s | 47K rows/sec |
207
+ | 10K rows x 100 cols | 4.35s | 2.3K rows/sec |
208
+ | 1M rows x 50 cols | 3.24s | 309K rows/sec |
209
+ | 50K rows x 250 cols | 9.99s | 5K rows/sec |
210
+
211
+ **Optimizations include:**
212
+ - Smart sampling for datasets >=50K rows (10-20x speedup)
213
+ - Parallel column profiling and detection
214
+ - 31% memory reduction via dtype optimization
215
+ - O(n) similarity detection with MinHash + LSH
216
+
217
+ ## Examples
218
+
219
+ We provide ready-to-run examples in the `examples/` directory:
220
+
221
+ | Example | Description |
222
+ |---------|-------------|
223
+ | [`quickstart.py`](examples/quickstart.py) | Basic workflow: analyze → suggest → apply → codegen |
224
+ | [`csv_cleaning.py`](examples/csv_cleaning.py) | Load CSV, clean, save with CLI equivalents |
225
+ | [`ml_preprocessing.py`](examples/ml_preprocessing.py) | ML-optimized cleaning workflow |
226
+ | [`jupyter_demo.ipynb`](examples/jupyter_demo.ipynb) | Interactive notebook with visualizations |
227
+
228
+ **Sample datasets** in `examples/sample_data/`:
229
+ - `customers_messy.csv` - Names, emails, phones with various issues
230
+ - `orders_messy.csv` - Dates, amounts, categories with inconsistencies
231
+ - `employees_messy.csv` - Mixed types, duplicates, outliers
232
+
233
+ ```bash
234
+ # Run an example
235
+ python examples/quickstart.py
236
+ ```
237
+
238
+ ## Documentation
239
+
240
+ | Document | Description |
241
+ |----------|-------------|
242
+ | [Getting Started](docs/getting-started.md) | Installation and first steps |
243
+ | [User Guide](docs/user-guide.md) | Complete feature walkthrough |
244
+ | [API Reference](docs/api-reference.md) | Detailed API documentation |
245
+ | [CLI Reference](docs/cli-reference.md) | Command-line interface guide |
246
+ | [Configuration](docs/configuration.md) | Customization options |
247
+ | [Contributing](docs/contributing.md) | How to contribute |
248
+
249
+ ## Use Cases
250
+
251
+ Choose a use case to get optimized suggestions:
252
+
253
+ ```python
254
+ report = analyze(df, use_case="ml") # or "general", "analytics", "export"
255
+ ```
256
+
257
+ | Use Case | Prioritizes |
258
+ |----------|-------------|
259
+ | `general` | Balanced approach for exploration |
260
+ | `ml` | Duplicates, missing values, type conversions |
261
+ | `analytics` | Consistency, date formats, outliers |
262
+ | `export` | Format standardization, clean values |
263
+
264
+ ## Configuration
265
+
266
+ ```python
267
+ report = analyze(
268
+ "data.csv",
269
+ use_case="ml",
270
+ config={
271
+ "detectors": {
272
+ "outlier_method": "zscore", # or "iqr"
273
+ "outlier_threshold": 2.5,
274
+ "min_similarity": 0.8,
275
+ },
276
+ "suggestions": {
277
+ "max_suggestions": 20,
278
+ },
279
+ },
280
+ )
281
+ ```
282
+
283
+ ## Project Status
284
+
285
+ | Metric | Value |
286
+ |--------|-------|
287
+ | Source Code | ~2,900 lines |
288
+ | Test Code | ~1,270 lines |
289
+ | Tests | 114 passing |
290
+ | Coverage | ~92% |
291
+ | Python | 3.10, 3.11, 3.12 |
292
+ | Platforms | Linux, macOS, Windows |
293
+
294
+ ### What's Working
295
+
296
+ - ✅ Multi-format loading (CSV, JSON, Parquet, Excel)
297
+ - ✅ Comprehensive profiling and statistics
298
+ - ✅ 6 detectors for common data quality issues
299
+ - ✅ 6 transformers with multiple operations each
300
+ - ✅ Smart suggestion system with conflict resolution
301
+ - ✅ Reproducible Python code generation
302
+ - ✅ Rich CLI with colored output
303
+ - ✅ Jupyter notebook support
304
+
305
+ ### What's Next
306
+
307
+ - ML-powered semantic similarity detection
308
+ - Fuzzy duplicate detection for near-duplicate rows
309
+ - Advanced imputation (KNN, MICE)
310
+ - Cloud storage connectors (S3, BigQuery)
311
+ - PII detection for sensitive data
312
+ - Schema validation for expected column types and constraints
313
+
314
+ ## Requirements
315
+
316
+ - **Python** >= 3.10
317
+ - **Core**: pandas, numpy, pydantic, rich, typer, scikit-learn
318
+ - **Optional**: pyarrow (Parquet), openpyxl (Excel)
319
+
320
+ ## Development
321
+
322
+ ```bash
323
+ # Clone and install
324
+ git clone https://github.com/Pranav1011/DataWash.git
325
+ cd DataWash
326
+ pip install -e ".[dev,all]"
327
+
328
+ # Run tests
329
+ pytest
330
+
331
+ # Format code
332
+ black src tests
333
+ ruff check src tests
334
+ ```
335
+
336
+ ## Contributing
337
+
338
+ Contributions welcome! See [CONTRIBUTING.md](docs/contributing.md) for guidelines.
339
+
340
+ **Areas where help is needed:**
341
+ - ML module implementation (sentence-transformers)
342
+ - Additional detectors (PII, schema validation)
343
+ - Performance optimization
344
+ - Documentation and examples
345
+ - Cloud connectors
346
+
347
+ ## License
348
+
349
+ MIT License - see [LICENSE](LICENSE) for details.
350
+
351
+ ## Acknowledgments
352
+
353
+ Built with [pandas](https://pandas.pydata.org/), [pydantic](https://pydantic-docs.helpmanual.io/), [rich](https://rich.readthedocs.io/), [typer](https://typer.tiangolo.com/), and [scikit-learn](https://scikit-learn.org/).
@@ -0,0 +1,53 @@
1
+ datawash/__init__.py,sha256=1Zj7T1Xs-t4uTI8c-i4xe9rF0WGwKxepuQ_9wdVbCpw,204
2
+ datawash/adapters/__init__.py,sha256=LJnEVVlyq_LoHpW1hdxFIaE96oBwc8zme4YCqGusR0A,343
3
+ datawash/adapters/base.py,sha256=9c0_qP7zHCf8t7J2cN2Evz9vPa4BbMIKrUQdxZAOGS8,1782
4
+ datawash/adapters/csv_adapter.py,sha256=719WINqhKyHJyMHy9CBQdYNHPcoi7LuAFIeL8NTXYoc,564
5
+ datawash/adapters/excel_adapter.py,sha256=bXAL9FLwo0twdCoLZZbRYc9MIH3yWyXv_qkWStX0g0k,1016
6
+ datawash/adapters/json_adapter.py,sha256=93O3kmxlFCGzEyLkvkIEW1ozhd03Q-CFYESP3Ef98ps,492
7
+ datawash/adapters/parquet_adapter.py,sha256=htK02H7lvim9REa4QDbr8H51DezCSreMggD8qnTRcmI,976
8
+ datawash/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ datawash/cli/formatters.py,sha256=vCRtBWYK9m3o7zR5z4K1KzuXYxSdHRn2sWTP7OAJfq0,3300
10
+ datawash/cli/main.py,sha256=9e-SdbgZT17FUUljzwH60f_sVoiqxEkvXvl8E-Pxw64,5259
11
+ datawash/codegen/__init__.py,sha256=_k0rnjiZqOkt825K3vJnjrNJuNwpXyNDpCSsvAe4McI,54
12
+ datawash/codegen/generator.py,sha256=uU84nrwwyQoAn8UrG9p8c2q8AJlqGssBm6y30sDQOT8,2482
13
+ datawash/core/__init__.py,sha256=TeULOhD1EXvygmAbuCIlRpyxYYMBM_-Z-RP0VMjBtqw,39
14
+ datawash/core/cache.py,sha256=kFRl_giVvlyuHEbx9xfohyoO6ZFjQUUonyc3zClsJt8,2537
15
+ datawash/core/config.py,sha256=2_KmLrZ97424VjBH4c8yRN1P5kf8JWG-iz9_ysXbGKk,1570
16
+ datawash/core/dtypes.py,sha256=02vXzmYInDX7dS5AOEKoDBGt5FpUBRwPKH_ioA0dhp0,684
17
+ datawash/core/exceptions.py,sha256=6L6G2S50D1xZwZuWTocEJcgH5Cukh8Oqb_I1HNI8GsY,424
18
+ datawash/core/models.py,sha256=ZZCATjfphVnoW3a6GruOif3dfNAgKeva1_Q3W6tMCX4,1865
19
+ datawash/core/report.py,sha256=IH4zfZ5zUlcRnkK5hgaxTZpp2F0sZWMeIExqo5yIjyY,15933
20
+ datawash/core/sampling.py,sha256=8Bxlr9qhVlM17VOSUSsb9i4nWMKb72V7oOG8JxUtQe8,3063
21
+ datawash/detectors/__init__.py,sha256=6c_MLlA0i-MK_8lhxMDpnH9CAMBh4K0DumcbbHHvVm8,372
22
+ datawash/detectors/base.py,sha256=8Oii7RSCRb77de4CNi_XUH0NtA0fH3Hd-N8n7dhs4oA,620
23
+ datawash/detectors/duplicate_detector.py,sha256=CxrGFmGXcx5_JICo2JRpTRI7IY1kdUd5IU0G7A8gMQY,1667
24
+ datawash/detectors/format_detector.py,sha256=hUj8LGrSGDvII9BAZC3kRBYNGoFS6bYaRcO4thg3BiM,4710
25
+ datawash/detectors/missing_detector.py,sha256=1ze-rzx07-G7mUGWPehc37qmfpv18ImRpd8ZJLeBryY,2818
26
+ datawash/detectors/outlier_detector.py,sha256=eOqcHfY93BqNyQjOj1gotZ_z5p69t0e1YKUGiqcUD4I,3056
27
+ datawash/detectors/registry.py,sha256=sbda5YRcu_a2Hoe3gtr104AVv5-6kRTVvPQnq3P9T0g,2050
28
+ datawash/detectors/similarity_detector.py,sha256=ScgOfQyjGJcUyaNi__iHBQN0UEmLyaq93-4CZGGRsL0,10588
29
+ datawash/detectors/type_detector.py,sha256=E_FOer8qgfLSaTKr8XsE7P5IEhmle_osH0bIJfYuDR8,3695
30
+ datawash/profiler/__init__.py,sha256=NbAMeUn2EqIujVAgMg4Wwp5bnwU8iWCUR6ClJ2wzwQE,55
31
+ datawash/profiler/engine.py,sha256=i6Sq5okeEiNmlKwf9SZSrpt5bg3zmOfhIsZVp7ZSd2s,2737
32
+ datawash/profiler/parallel.py,sha256=TATKeCqBavfn4y5Uo8T-NeucqI1cOIhLEon5akzahTo,3772
33
+ datawash/profiler/patterns.py,sha256=BSbLz4JMWxyS_h057s1eWQ9AglYB45HZy2M0X82n1Ec,2236
34
+ datawash/profiler/statistics.py,sha256=MPGgl2MbcaVDwUIgdFzticiwQ_7TINYkVlNjRhayuuE,1399
35
+ datawash/suggestors/__init__.py,sha256=47VPzsdGbzE6M3QQSjO9GmmAA0NgfMR5WeWpN3AhV40,65
36
+ datawash/suggestors/base.py,sha256=VklzlzQCWSt-gDTZyuivyxAB7fk8nZIxkPdKnh218NQ,399
37
+ datawash/suggestors/engine.py,sha256=iKvutrA4OcIDwauksqGyU8495NZP-PxQdk9kLVB134c,12121
38
+ datawash/suggestors/prioritizer.py,sha256=jiOanLpieGLaPZHzkAJKnnpP6iLteDqLP22A2IIktkA,797
39
+ datawash/transformers/__init__.py,sha256=6sn_xdbQqk5zFn1PpcUKndWq__eW7yW7VKYu7JrF0h0,287
40
+ datawash/transformers/base.py,sha256=U2lLvrgaWu5DU5n1PkQjFIO_z_XqPff0ve0OoTmngQU,683
41
+ datawash/transformers/categories.py,sha256=VjyXK1cKeiYfQfVb5NWC_L6uP0pwi4OeK2nO5waq24w,2035
42
+ datawash/transformers/columns.py,sha256=qGB-yH9foVChZHSTQvbSgOATaDJ1MhJWelW2pV8nAls,2575
43
+ datawash/transformers/duplicates.py,sha256=1tL51l4jZsEhvuLDPuu5-wLtQBCvUrdcULmCcS08NdE,1322
44
+ datawash/transformers/formats.py,sha256=hJM2_8f9d_YGcreb_3lOdj9m29k8Wg5wGggKjcJJls8,3741
45
+ datawash/transformers/missing.py,sha256=G4a1eZN8QrD-vMGD8_K-MIQ76Okw0znfrbmUXEvOLyI,8572
46
+ datawash/transformers/registry.py,sha256=nKq4XU_TqWJg53wFuCVUc07XBVAQ_ehgrAb-K1BFjW4,768
47
+ datawash/transformers/types.py,sha256=x4Im1b6_XbEXg685T4usZ_KQyDlM3YSAprs0VKEtM0A,3413
48
+ datawash-0.2.0.dist-info/licenses/LICENSE,sha256=kNSkDIpMFky2dMMDOGB62O7DoC-o8Xno_7Nwp8ylcK4,1078
49
+ datawash-0.2.0.dist-info/METADATA,sha256=U22gYry3P1D5X5ENAWatKzfVCBWYFfEJsQPB9cYxLn8,10866
50
+ datawash-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
51
+ datawash-0.2.0.dist-info/entry_points.txt,sha256=zOhpea6Ad1tn5ofpYb6BEROG5bj6-LQh2eO6aVxP744,51
52
+ datawash-0.2.0.dist-info/top_level.txt,sha256=xDL_KT2BiuAlMCI5iuXC6j-llUrx3XLYaiVgovjS-M4,9
53
+ datawash-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ datawash = datawash.cli.main:app
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 DataWash Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ datawash