datacompose 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacompose might be problematic. Click here for more details.
- datacompose/__init__.py +1 -0
- datacompose/cli/__init__.py +5 -0
- datacompose/cli/colors.py +80 -0
- datacompose/cli/commands/__init__.py +3 -0
- datacompose/cli/commands/add.py +215 -0
- datacompose/cli/commands/init.py +451 -0
- datacompose/cli/commands/list.py +118 -0
- datacompose/cli/commands/upgrade.py +7 -0
- datacompose/cli/main.py +59 -0
- datacompose/cli/validation.py +72 -0
- datacompose/generators/__init__.py +3 -0
- datacompose/generators/base.py +193 -0
- datacompose/generators/pyspark/__init__.py +1 -0
- datacompose/generators/pyspark/generator.py +51 -0
- datacompose/operators/__init__.py +21 -0
- datacompose/operators/primitives.py +595 -0
- datacompose/transformers/__init__.py +0 -0
- datacompose/transformers/discovery.py +186 -0
- datacompose/transformers/text/__init__.py +1 -0
- datacompose/transformers/text/clean_addresses/__init__.py +1 -0
- datacompose/transformers/text/clean_addresses/pyspark/pyspark_primitives.py +1967 -0
- datacompose/transformers/text/clean_emails/__init__.py +1 -0
- datacompose/transformers/text/clean_emails/pyspark/pyspark_primitives.py +781 -0
- datacompose/transformers/text/clean_phone_numbers/__init__.py +0 -0
- datacompose/transformers/text/clean_phone_numbers/pyspark/pyspark_primitives.py +941 -0
- datacompose-0.2.4.dist-info/METADATA +431 -0
- datacompose-0.2.4.dist-info/RECORD +31 -0
- datacompose-0.2.4.dist-info/WHEEL +5 -0
- datacompose-0.2.4.dist-info/entry_points.txt +2 -0
- datacompose-0.2.4.dist-info/licenses/LICENSE +21 -0
- datacompose-0.2.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datacompose
|
|
3
|
+
Version: 0.2.4
|
|
4
|
+
Summary: Copy-pasteable data transformation primitives for PySpark. Inspired by shadcn-svelte.
|
|
5
|
+
Author: Datacompose Contributors
|
|
6
|
+
Maintainer: Datacompose Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/datacompose/datacompose
|
|
9
|
+
Project-URL: Documentation, https://github.com/datacompose/datacompose/tree/main/docs
|
|
10
|
+
Project-URL: Repository, https://github.com/datacompose/datacompose.git
|
|
11
|
+
Project-URL: Issues, https://github.com/datacompose/datacompose/issues
|
|
12
|
+
Project-URL: Changelog, https://github.com/datacompose/datacompose/blob/main/CHANGELOG.md
|
|
13
|
+
Keywords: data-cleaning,data-quality,udf,spark,postgres,code-generation,data-pipeline,etl
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
26
|
+
Classifier: Operating System :: OS Independent
|
|
27
|
+
Requires-Python: >=3.8
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Requires-Dist: jinja2>=3.0.0
|
|
31
|
+
Requires-Dist: pyyaml>=6.0
|
|
32
|
+
Requires-Dist: click>=8.0.0
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
38
|
+
Provides-Extra: docs
|
|
39
|
+
Requires-Dist: sphinx>=7.2.0; extra == "docs"
|
|
40
|
+
Requires-Dist: furo>=2024.1.0; extra == "docs"
|
|
41
|
+
Requires-Dist: myst-parser>=2.0.0; extra == "docs"
|
|
42
|
+
Requires-Dist: sphinx-autodoc-typehints>=1.25.0; extra == "docs"
|
|
43
|
+
Requires-Dist: sphinx-copybutton>=0.5.2; extra == "docs"
|
|
44
|
+
Requires-Dist: sphinx-tabs>=3.4.0; extra == "docs"
|
|
45
|
+
Requires-Dist: sphinx-click>=5.1.0; extra == "docs"
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
# Datacompose
|
|
49
|
+
|
|
50
|
+
A powerful data transformation framework for building reusable, composable data cleaning pipelines in PySpark.
|
|
51
|
+
|
|
52
|
+
## Overview
|
|
53
|
+
|
|
54
|
+
Datacompose provides a declarative way to build data transformation pipelines using composable primitives. It generates optimized, standalone PySpark code that can be deployed without runtime dependencies.
|
|
55
|
+
|
|
56
|
+
## Key Features
|
|
57
|
+
|
|
58
|
+
- **Composable Primitives**: Build complex transformations from simple, reusable functions
|
|
59
|
+
- **Smart Partial Application**: Configure transformations with parameters for reuse
|
|
60
|
+
- **Pipeline Compilation**: Convert declarative pipeline definitions into optimized Spark operations
|
|
61
|
+
- **Code Generation**: Generate standalone PySpark code with embedded dependencies
|
|
62
|
+
- **Comprehensive Libraries**: Pre-built primitives for emails, addresses, and phone numbers
|
|
63
|
+
- **Conditional Logic**: Support for if/else branching in pipelines
|
|
64
|
+
- **Type-Safe Operations**: All transformations maintain Spark column type safety
|
|
65
|
+
|
|
66
|
+
## Installation
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install datacompose
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Quick Start
|
|
73
|
+
|
|
74
|
+
### 1. Initialize a Project
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
datacompose init
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
This creates a `datacompose.json` configuration file with default settings.
|
|
81
|
+
|
|
82
|
+
### 2. Generate Transformation Code
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Generate email cleaning primitives
|
|
86
|
+
datacompose add clean_emails --target pyspark
|
|
87
|
+
|
|
88
|
+
# Generate address standardization primitives
|
|
89
|
+
datacompose add clean_addresses --target pyspark
|
|
90
|
+
|
|
91
|
+
# Generate phone number validation primitives
|
|
92
|
+
datacompose add clean_phone_numbers --target pyspark
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### 3. Use the Generated Code
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from pyspark.sql import SparkSession
|
|
99
|
+
from pyspark.sql import functions as F
|
|
100
|
+
|
|
101
|
+
# Import the generated primitives
|
|
102
|
+
from build.pyspark.clean_emails.email_primitives import emails
|
|
103
|
+
|
|
104
|
+
# Create Spark session
|
|
105
|
+
spark = SparkSession.builder.appName("DataCleaning").getOrCreate()
|
|
106
|
+
|
|
107
|
+
# Load your data
|
|
108
|
+
df = spark.read.csv("data.csv", header=True)
|
|
109
|
+
|
|
110
|
+
# Apply email transformations
|
|
111
|
+
cleaned_df = df.withColumn(
|
|
112
|
+
"email_clean",
|
|
113
|
+
emails.standardize_email(F.col("email"))
|
|
114
|
+
).withColumn(
|
|
115
|
+
"email_domain",
|
|
116
|
+
emails.extract_domain(F.col("email_clean"))
|
|
117
|
+
).withColumn(
|
|
118
|
+
"is_valid",
|
|
119
|
+
emails.is_valid_email(F.col("email_clean"))
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Filter to valid emails only
|
|
123
|
+
valid_emails = cleaned_df.filter(F.col("is_valid"))
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Core Concepts
|
|
127
|
+
|
|
128
|
+
### PrimitiveRegistry
|
|
129
|
+
|
|
130
|
+
A container for organizing related transformation functions:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from datacompose.operators.primitives import PrimitiveRegistry
|
|
134
|
+
|
|
135
|
+
# Create a registry for text operations
|
|
136
|
+
text = PrimitiveRegistry("text")
|
|
137
|
+
|
|
138
|
+
# Register transformation functions
|
|
139
|
+
@text.register()
|
|
140
|
+
def lowercase(col):
|
|
141
|
+
return F.lower(col)
|
|
142
|
+
|
|
143
|
+
@text.register()
|
|
144
|
+
def remove_spaces(col):
|
|
145
|
+
return F.regexp_replace(col, r'\s+', '')
|
|
146
|
+
|
|
147
|
+
# Use the transformations
|
|
148
|
+
df = df.withColumn("clean_text", text.lowercase(F.col("input")))
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### SmartPrimitive
|
|
152
|
+
|
|
153
|
+
Enables partial application of transformations:
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
@text.register()
|
|
157
|
+
def trim(col, chars=' '):
|
|
158
|
+
return F.trim(col, chars)
|
|
159
|
+
|
|
160
|
+
# Direct usage
|
|
161
|
+
df = df.withColumn("trimmed", text.trim(F.col("input")))
|
|
162
|
+
|
|
163
|
+
# Pre-configured usage
|
|
164
|
+
trim_tabs = text.trim(chars='\t')
|
|
165
|
+
df = df.withColumn("no_tabs", trim_tabs(F.col("input")))
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Pipeline Composition
|
|
169
|
+
|
|
170
|
+
Build complex pipelines from simple primitives:
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
@text.compose(text=text)
|
|
174
|
+
def clean_pipeline():
|
|
175
|
+
text.trim()
|
|
176
|
+
text.lowercase()
|
|
177
|
+
text.remove_spaces()
|
|
178
|
+
|
|
179
|
+
# Apply the entire pipeline
|
|
180
|
+
df = df.withColumn("cleaned", clean_pipeline(F.col("input")))
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Conditional Pipelines
|
|
184
|
+
|
|
185
|
+
Add conditional logic to your transformations:
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
@text.register(is_conditional=True)
|
|
189
|
+
def is_valid_length(col):
|
|
190
|
+
return F.length(col) > 5
|
|
191
|
+
|
|
192
|
+
@text.register()
|
|
193
|
+
def truncate(col):
|
|
194
|
+
return F.substring(col, 1, 5)
|
|
195
|
+
|
|
196
|
+
@text.compose(text=text)
|
|
197
|
+
def smart_truncate():
|
|
198
|
+
if text.is_valid_length():
|
|
199
|
+
text.truncate()
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Available Primitives
|
|
203
|
+
|
|
204
|
+
### Email Primitives
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from build.pyspark.clean_emails.email_primitives import emails
|
|
208
|
+
|
|
209
|
+
# Validation
|
|
210
|
+
emails.is_valid_email(col)
|
|
211
|
+
emails.is_business_email(col)
|
|
212
|
+
emails.is_disposable_email(col)
|
|
213
|
+
|
|
214
|
+
# Extraction
|
|
215
|
+
emails.extract_domain(col)
|
|
216
|
+
emails.extract_username(col)
|
|
217
|
+
emails.extract_tld(col)
|
|
218
|
+
|
|
219
|
+
# Standardization
|
|
220
|
+
emails.standardize_email(col)
|
|
221
|
+
emails.normalize_gmail(col)
|
|
222
|
+
emails.fix_common_typos(col)
|
|
223
|
+
|
|
224
|
+
# Filtering
|
|
225
|
+
emails.filter_valid_emails(col)
|
|
226
|
+
emails.filter_business_emails(col)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Address Primitives
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
from build.pyspark.clean_addresses.address_primitives import addresses
|
|
233
|
+
|
|
234
|
+
# Extraction
|
|
235
|
+
addresses.extract_street_number(col)
|
|
236
|
+
addresses.extract_street_name(col)
|
|
237
|
+
addresses.extract_city(col)
|
|
238
|
+
addresses.extract_state(col)
|
|
239
|
+
addresses.extract_zip_code(col)
|
|
240
|
+
|
|
241
|
+
# Standardization
|
|
242
|
+
addresses.standardize_state(col)
|
|
243
|
+
addresses.standardize_street_suffix(col)
|
|
244
|
+
addresses.standardize_direction(col)
|
|
245
|
+
|
|
246
|
+
# Validation
|
|
247
|
+
addresses.is_valid_zip_code(col)
|
|
248
|
+
addresses.is_valid_state(col)
|
|
249
|
+
addresses.is_po_box(col)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### Phone Number Primitives
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
from build.pyspark.clean_phone_numbers.phone_primitives import phones
|
|
256
|
+
|
|
257
|
+
# Validation
|
|
258
|
+
phones.is_valid_nanp(col)
|
|
259
|
+
phones.is_valid_international(col)
|
|
260
|
+
phones.is_toll_free(col)
|
|
261
|
+
|
|
262
|
+
# Extraction
|
|
263
|
+
phones.extract_country_code(col)
|
|
264
|
+
phones.extract_area_code(col)
|
|
265
|
+
phones.extract_exchange(col)
|
|
266
|
+
phones.extract_subscriber(col)
|
|
267
|
+
|
|
268
|
+
# Formatting
|
|
269
|
+
phones.format_nanp(col)
|
|
270
|
+
phones.format_e164(col)
|
|
271
|
+
phones.format_international(col)
|
|
272
|
+
|
|
273
|
+
# Standardization
|
|
274
|
+
phones.standardize_phone(col)
|
|
275
|
+
phones.clean_phone(col)
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Advanced Usage
|
|
279
|
+
|
|
280
|
+
### Creating Custom Primitives
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
from datacompose.operators.primitives import PrimitiveRegistry
|
|
284
|
+
|
|
285
|
+
# Create your own registry
|
|
286
|
+
custom = PrimitiveRegistry("custom")
|
|
287
|
+
|
|
288
|
+
@custom.register()
|
|
289
|
+
def remove_special_chars(col):
|
|
290
|
+
return F.regexp_replace(col, r'[^a-zA-Z0-9\s]', '')
|
|
291
|
+
|
|
292
|
+
@custom.register()
|
|
293
|
+
def capitalize_words(col):
|
|
294
|
+
return F.initcap(col)
|
|
295
|
+
|
|
296
|
+
@custom.register(is_conditional=True)
|
|
297
|
+
def contains_numbers(col):
|
|
298
|
+
return col.rlike(r'\d+')
|
|
299
|
+
|
|
300
|
+
# Create a pipeline with your custom primitives
|
|
301
|
+
@custom.compose(custom=custom)
|
|
302
|
+
def clean_text():
|
|
303
|
+
custom.remove_special_chars()
|
|
304
|
+
if custom.contains_numbers():
|
|
305
|
+
custom.capitalize_words()
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### Working with Parameters
|
|
309
|
+
|
|
310
|
+
```python
|
|
311
|
+
@custom.register()
|
|
312
|
+
def pad_string(col, length=10, fill_char='0'):
|
|
313
|
+
return F.lpad(col, length, fill_char)
|
|
314
|
+
|
|
315
|
+
# Use with different parameters
|
|
316
|
+
df = df.withColumn("padded_10", custom.pad_string(F.col("id")))
|
|
317
|
+
df = df.withColumn("padded_5", custom.pad_string(length=5)(F.col("id")))
|
|
318
|
+
df = df.withColumn("padded_x", custom.pad_string(length=8, fill_char='X')(F.col("id")))
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Combining Multiple Registries
|
|
322
|
+
|
|
323
|
+
```python
|
|
324
|
+
from build.pyspark.clean_emails.email_primitives import emails
|
|
325
|
+
from build.pyspark.clean_phones.phone_primitives import phones
|
|
326
|
+
|
|
327
|
+
# Create a combined validation pipeline
|
|
328
|
+
validation = PrimitiveRegistry("validation")
|
|
329
|
+
|
|
330
|
+
@validation.compose(emails=emails, phones=phones)
|
|
331
|
+
def validate_contact_info():
|
|
332
|
+
# Check email
|
|
333
|
+
if emails.is_valid_email():
|
|
334
|
+
emails.standardize_email()
|
|
335
|
+
|
|
336
|
+
# Check phone
|
|
337
|
+
if phones.is_valid_phone():
|
|
338
|
+
phones.standardize_phone()
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
## CLI Commands
|
|
342
|
+
|
|
343
|
+
### Initialize a Project
|
|
344
|
+
```bash
|
|
345
|
+
datacompose init [--yes]
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
### Add Transformers
|
|
349
|
+
```bash
|
|
350
|
+
datacompose add <transformer> [--target TARGET] [--output OUTPUT] [--verbose]
|
|
351
|
+
|
|
352
|
+
# Examples
|
|
353
|
+
datacompose add clean_emails --target pyspark
|
|
354
|
+
datacompose add clean_addresses --target pyspark --output ./custom/path
|
|
355
|
+
datacompose add clean_phone_numbers --target pyspark --verbose
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### List Available Transformers
|
|
359
|
+
```bash
|
|
360
|
+
datacompose list transformers
|
|
361
|
+
datacompose list generators
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
## Project Structure
|
|
365
|
+
|
|
366
|
+
After running `datacompose add`, your project will have the following structure:
|
|
367
|
+
|
|
368
|
+
```
|
|
369
|
+
project/
|
|
370
|
+
├── datacompose.json # Configuration file
|
|
371
|
+
├── build/
|
|
372
|
+
│ └── pyspark/
|
|
373
|
+
│ ├── clean_emails/
|
|
374
|
+
│ │ ├── email_primitives.py # Generated email primitives
|
|
375
|
+
│ │ └── utils/
|
|
376
|
+
│ │ └── primitives.py # Core framework (embedded)
|
|
377
|
+
│ ├── clean_addresses/
|
|
378
|
+
│ │ ├── address_primitives.py
|
|
379
|
+
│ │ └── utils/
|
|
380
|
+
│ │ └── primitives.py
|
|
381
|
+
│ └── clean_phone_numbers/
|
|
382
|
+
│ ├── phone_primitives.py
|
|
383
|
+
│ └── utils/
|
|
384
|
+
│ └── primitives.py
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
## Configuration
|
|
388
|
+
|
|
389
|
+
The `datacompose.json` file configures default settings:
|
|
390
|
+
|
|
391
|
+
```json
|
|
392
|
+
{
|
|
393
|
+
"version": "1.0.0",
|
|
394
|
+
"targets": {
|
|
395
|
+
"pyspark": {
|
|
396
|
+
"output": "./build/pyspark",
|
|
397
|
+
"generator": "SparkPandasUDFGenerator"
|
|
398
|
+
}
|
|
399
|
+
},
|
|
400
|
+
"templates": {
|
|
401
|
+
"directory": "src/transformers/templates"
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
## Performance Considerations
|
|
409
|
+
|
|
410
|
+
- Primitives are designed to be efficient Spark operations
|
|
411
|
+
- Pipelines are compiled to minimize intermediate columns
|
|
412
|
+
- Conditional logic uses Spark's `when/otherwise` for vectorized operations
|
|
413
|
+
- Generated code has no runtime dependencies beyond PySpark
|
|
414
|
+
|
|
415
|
+
## Philosophy & Inspiration
|
|
416
|
+
|
|
417
|
+
Datacompose is inspired by [shadcn-svelte](https://www.shadcn-svelte.com/) and [huntabyte](https://github.com/huntabyte)'s approach to component libraries. Just as shadcn-svelte provides "copy and paste" components rather than npm packages, Datacompose generates data transformation code that becomes part of YOUR codebase.
|
|
418
|
+
|
|
419
|
+
**Why we believe in this approach:**
|
|
420
|
+
|
|
421
|
+
- **You Own Your Code**: No external dependencies to manage or worry about breaking changes
|
|
422
|
+
- **Full Transparency**: Every transformation is readable, debuggable PySpark code you can understand
|
|
423
|
+
- **Customization First**: Need to adjust transformation? Just edit the code
|
|
424
|
+
- **Learn by Reading**: The generated code serves as documentation and learning material
|
|
425
|
+
|
|
426
|
+
This is NOT a traditional library - it's a code generator that gives you production-ready data transformation primitives that you can modify to fit your exact needs.
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
## License
|
|
430
|
+
|
|
431
|
+
MIT License - see LICENSE file for details
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
datacompose/__init__.py,sha256=kbQEzEvheAMsm3-MT4nWSuX42fjrfgDWoR8WZmC_WX4,34
|
|
2
|
+
datacompose/cli/__init__.py,sha256=a5UAbPnAm87gUq4YaoTPN5umknN5s2xgnztatI03So0,107
|
|
3
|
+
datacompose/cli/colors.py,sha256=Ax7jHhdAIuq5x3663gJ7_MzFCBOJv38DqNXts5t4SLs,1756
|
|
4
|
+
datacompose/cli/main.py,sha256=NjA6Uy1_A-xGaAEKKdXOrtMbAxOZ9Cn1aNDNYgHW9rg,1273
|
|
5
|
+
datacompose/cli/validation.py,sha256=8WMZ9wtPgFk9eBgMS_wtkncFz_-BmH4E8V57tjp3YoI,2526
|
|
6
|
+
datacompose/cli/commands/__init__.py,sha256=Bu58UsnkGRbVFS92U2Px_KxlUPrdlbSY6wlvP6tet2o,38
|
|
7
|
+
datacompose/cli/commands/add.py,sha256=dWussDjA_7hlkM6J0mv4GGXg5iKwIqjScbstPDZcPYo,7023
|
|
8
|
+
datacompose/cli/commands/init.py,sha256=MTO48fM18s4qnUCPpEC5RdmdKW6OsJc_nyN_wrHbi3g,16740
|
|
9
|
+
datacompose/cli/commands/list.py,sha256=MmRxMnghBLagg6IEh4lqCK0WR-0Ku-jxH8AT6WlajuU,3867
|
|
10
|
+
datacompose/cli/commands/upgrade.py,sha256=F0ra-HLVCP5MEdYOkKbvZ_cnhXFmKKw6IRBhmuWBGVI,163
|
|
11
|
+
datacompose/generators/__init__.py,sha256=dFJWJScu8mkP0ZKIQtVlJ36PQW-LwCYBijuNwLSevZw,48
|
|
12
|
+
datacompose/generators/base.py,sha256=YodC9Ai58eWCPNFx1IIOAcQvd5CGu3ySf6Z6m_-UVTI,7002
|
|
13
|
+
datacompose/generators/pyspark/__init__.py,sha256=ayoKDGtbt2KwFcNt2QxHKt8z83Kzy4ySw9Gg7j9ZMTY,33
|
|
14
|
+
datacompose/generators/pyspark/generator.py,sha256=U0OCJVRI9Jc9aWsubRNBQ6dEq8IFNf_j5IVGDZuglvs,1987
|
|
15
|
+
datacompose/operators/__init__.py,sha256=6g7Hp5261TkPghRgTfxKrizx0OH3Zga3OKHZ37I9_4E,586
|
|
16
|
+
datacompose/operators/primitives.py,sha256=rIERyKfPIULngHs9fRewXo6VjmbjyiOXvTCqiHGIur8,22022
|
|
17
|
+
datacompose/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
datacompose/transformers/discovery.py,sha256=JgMtmd8PkhmwLqS17NeKSv9MRneY9tdOsOK32A6luOQ,7048
|
|
19
|
+
datacompose/transformers/text/__init__.py,sha256=Mq0UmgYlBV8T18IkvHAS1TImEzWyGciCqxaCv324hFQ,36
|
|
20
|
+
datacompose/transformers/text/clean_addresses/__init__.py,sha256=l5TItGrGBn69Mlq0CaRGJa-SwpyuUEYWvG5N26s3Pco,39
|
|
21
|
+
datacompose/transformers/text/clean_addresses/pyspark/pyspark_primitives.py,sha256=wiENqpdcVAlfNhsyIM-JjX7lye4xcW0h1INHYMRrYlE,60249
|
|
22
|
+
datacompose/transformers/text/clean_emails/__init__.py,sha256=snZLOJsxrPDOi8gIISlRxc6YlskKxUyu0NnOZCE5cIU,34
|
|
23
|
+
datacompose/transformers/text/clean_emails/pyspark/pyspark_primitives.py,sha256=vIgPAcc6t8UCYSzvi79UplkTFqY9jIR9brZyhAhtLwY,21802
|
|
24
|
+
datacompose/transformers/text/clean_phone_numbers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
datacompose/transformers/text/clean_phone_numbers/pyspark/pyspark_primitives.py,sha256=BsCfgncFxM77M3k0hEZyARPJk4kq1PQZB40YRc9RR8M,26279
|
|
26
|
+
datacompose-0.2.4.dist-info/licenses/LICENSE,sha256=SCPOqmPhMikiyYDlKZ877fGHaE2O45cDBoJIomrlpDU,1067
|
|
27
|
+
datacompose-0.2.4.dist-info/METADATA,sha256=eEXoMoQtlhoJXojWoX8weZ3uH5dGyBT7MJG8f76B_To,11876
|
|
28
|
+
datacompose-0.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
+
datacompose-0.2.4.dist-info/entry_points.txt,sha256=oeG9oGgDwajk4v0C1awdUTBx2GmhLpuNHCTAV-jurUc,58
|
|
30
|
+
datacompose-0.2.4.dist-info/top_level.txt,sha256=AX1qGkuJMD2YJLZKo40h-w4MeFxDZL6W1vbKKuTpW8I,12
|
|
31
|
+
datacompose-0.2.4.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 datacompose
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
datacompose
|