@hazeljs/data 0.2.0-beta.37 → 0.2.0-beta.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -0
- package/package.json +2 -2
package/README.md
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# @hazeljs/data
|
|
2
|
+
|
|
3
|
+
Data Processing & ETL for HazelJS - pipelines, schema validation, streaming, and data quality.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/@hazeljs/data)
|
|
6
|
+
[](https://www.apache.org/licenses/LICENSE-2.0)
|
|
7
|
+
|
|
8
|
+
## Features
|
|
9
|
+
|
|
10
|
+
- **Pipelines** – Declarative ETL with `@Pipeline`, `@Transform`, `@Validate` decorators
|
|
11
|
+
- **Schema validation** – Fluent Schema API (string, number, object, array, email, oneOf)
|
|
12
|
+
- **ETL service** – Execute multi-step pipelines with ordering and error handling
|
|
13
|
+
- **Stream processing** – StreamBuilder, StreamProcessor for batch and streaming workloads
|
|
14
|
+
- **Built-in transformers** – trimString, toLowerCase, parseJson, pick, omit, renameKeys
|
|
15
|
+
- **Data quality** – QualityService for completeness, notNull, and custom checks
|
|
16
|
+
- **Flink integration** – Optional Apache Flink deployment for distributed stream processing
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm install @hazeljs/data @hazeljs/core
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
### 1. Import DataModule
|
|
27
|
+
|
|
28
|
+
```typescript
|
|
29
|
+
import { HazelApp } from '@hazeljs/core';
|
|
30
|
+
import { DataModule } from '@hazeljs/data';
|
|
31
|
+
|
|
32
|
+
const app = new HazelApp({
|
|
33
|
+
imports: [DataModule.forRoot()],
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
app.listen(3000);
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 2. Define a pipeline with decorators
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
import { Injectable } from '@hazeljs/core';
|
|
43
|
+
import {
|
|
44
|
+
Pipeline,
|
|
45
|
+
PipelineBase,
|
|
46
|
+
Transform,
|
|
47
|
+
Validate,
|
|
48
|
+
ETLService,
|
|
49
|
+
Schema,
|
|
50
|
+
} from '@hazeljs/data';
|
|
51
|
+
|
|
52
|
+
const OrderSchema = Schema.object()
|
|
53
|
+
.prop('id', Schema.string().required())
|
|
54
|
+
.prop('customerId', Schema.string().required())
|
|
55
|
+
.prop('status', Schema.string().oneOf(['pending', 'paid', 'shipped', 'delivered', 'cancelled']))
|
|
56
|
+
.prop('items', Schema.array().items(Schema.object()
|
|
57
|
+
.prop('sku', Schema.string().minLength(1))
|
|
58
|
+
.prop('qty', Schema.number().min(1))
|
|
59
|
+
.prop('price', Schema.number().min(0))
|
|
60
|
+
))
|
|
61
|
+
.required();
|
|
62
|
+
|
|
63
|
+
@Pipeline('order-processing')
|
|
64
|
+
@Injectable()
|
|
65
|
+
export class OrderProcessingPipeline extends PipelineBase {
|
|
66
|
+
constructor(etlService: ETLService) {
|
|
67
|
+
super(etlService);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
@Transform({ step: 1, name: 'normalize' })
|
|
71
|
+
async normalize(data: Record<string, unknown>): Promise<Record<string, unknown>> {
|
|
72
|
+
return {
|
|
73
|
+
...data,
|
|
74
|
+
status: String(data.status).toLowerCase(),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
@Validate({ step: 2, schema: OrderSchema })
|
|
79
|
+
async validate(data: Record<string, unknown>): Promise<Record<string, unknown>> {
|
|
80
|
+
return data;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
@Transform({ step: 3, name: 'enrich' })
|
|
84
|
+
async enrich(data: Record<string, unknown> & { items?: { qty: number; price: number }[] }): Promise<Record<string, unknown>> {
|
|
85
|
+
const items = data.items ?? [];
|
|
86
|
+
const subtotal = items.reduce((sum, i) => sum + i.qty * i.price, 0);
|
|
87
|
+
const tax = subtotal * 0.1;
|
|
88
|
+
return {
|
|
89
|
+
...data,
|
|
90
|
+
subtotal,
|
|
91
|
+
tax,
|
|
92
|
+
total: subtotal + tax,
|
|
93
|
+
processedAt: new Date().toISOString(),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### 3. Execute from a controller or service
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
import { Controller, Post, Body, Inject } from '@hazeljs/core';
|
|
103
|
+
import { OrderProcessingPipeline } from './pipelines/order-processing.pipeline';
|
|
104
|
+
|
|
105
|
+
@Controller('data')
|
|
106
|
+
export class DataController {
|
|
107
|
+
constructor(
|
|
108
|
+
@Inject(OrderProcessingPipeline) private pipeline: OrderProcessingPipeline
|
|
109
|
+
) {}
|
|
110
|
+
|
|
111
|
+
@Post('pipeline/orders')
|
|
112
|
+
async processOrder(@Body() body: unknown) {
|
|
113
|
+
const result = await this.pipeline.execute(body);
|
|
114
|
+
return { ok: true, data: result };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Batch processing with StreamService
|
|
120
|
+
|
|
121
|
+
Process arrays through pipelines in batches:
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
import { StreamService } from '@hazeljs/data';
|
|
125
|
+
|
|
126
|
+
const streamService = new StreamService(etlService);
|
|
127
|
+
const results = await streamService.processBatch(OrderProcessingPipeline, orders);
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Schema validation
|
|
131
|
+
|
|
132
|
+
Build schemas with the fluent API:
|
|
133
|
+
|
|
134
|
+
```typescript
|
|
135
|
+
import { Schema } from '@hazeljs/data';
|
|
136
|
+
|
|
137
|
+
const UserSchema = Schema.object()
|
|
138
|
+
.prop('email', Schema.string().format('email').required())
|
|
139
|
+
.prop('name', Schema.string().minLength(1).maxLength(200))
|
|
140
|
+
.prop('age', Schema.number().min(0).max(150))
|
|
141
|
+
.prop('role', Schema.string().oneOf(['user', 'admin', 'moderator', 'guest']))
|
|
142
|
+
.required();
|
|
143
|
+
|
|
144
|
+
const validator = new SchemaValidator();
|
|
145
|
+
const { value, error } = validator.validate(UserSchema, rawData);
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Data quality checks
|
|
149
|
+
|
|
150
|
+
```typescript
|
|
151
|
+
import { QualityService } from '@hazeljs/data';
|
|
152
|
+
|
|
153
|
+
const qualityService = new QualityService();
|
|
154
|
+
const report = await qualityService.check(records, {
|
|
155
|
+
completeness: ['id', 'email', 'createdAt'],
|
|
156
|
+
notNull: ['id', 'status'],
|
|
157
|
+
});
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## Flink configuration (optional)
|
|
161
|
+
|
|
162
|
+
For distributed stream processing with Apache Flink:
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
DataModule.forRoot({
|
|
166
|
+
flink: {
|
|
167
|
+
url: process.env.FLINK_REST_URL ?? 'http://localhost:8081',
|
|
168
|
+
timeout: 30000,
|
|
169
|
+
},
|
|
170
|
+
});
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Built-in transformers
|
|
174
|
+
|
|
175
|
+
| Transformer | Description |
|
|
176
|
+
|-------------|-------------|
|
|
177
|
+
| `trimString` | Trim whitespace from strings |
|
|
178
|
+
| `toLowerCase` / `toUpperCase` | Case conversion |
|
|
179
|
+
| `parseJson` / `stringifyJson` | JSON parsing and serialization |
|
|
180
|
+
| `pick` | Select specific keys from objects |
|
|
181
|
+
| `omit` | Remove specific keys from objects |
|
|
182
|
+
| `renameKeys` | Rename object keys |
|
|
183
|
+
|
|
184
|
+
## Example
|
|
185
|
+
|
|
186
|
+
See [hazeljs-data-starter](../../../hazeljs-data-starter) for a full example with order and user pipelines, REST API, and quality reports.
|
|
187
|
+
|
|
188
|
+
## Links
|
|
189
|
+
|
|
190
|
+
- [Documentation](https://hazeljs.com/docs/packages/data)
|
|
191
|
+
- [GitHub](https://github.com/hazel-js/hazeljs)
|
|
192
|
+
- [Issues](https://github.com/hazeljs/hazel-js/issues)
|
|
193
|
+
- [Homepage](https://hazeljs.com)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hazeljs/data",
|
|
3
|
-
"version": "0.2.0-beta.
|
|
3
|
+
"version": "0.2.0-beta.39",
|
|
4
4
|
"description": "Data Processing & ETL for HazelJS framework",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -49,5 +49,5 @@
|
|
|
49
49
|
"url": "https://github.com/hazeljs/hazel-js/issues"
|
|
50
50
|
},
|
|
51
51
|
"homepage": "https://hazeljs.com",
|
|
52
|
-
"gitHead": "
|
|
52
|
+
"gitHead": "1ef15653fe25b11327020085fdfdd6d426b45763"
|
|
53
53
|
}
|