@hazeljs/data 0.2.0-beta.37 → 0.2.0-beta.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +193 -0
  2. package/package.json +2 -2
package/README.md ADDED
@@ -0,0 +1,193 @@
1
+ # @hazeljs/data
2
+
3
+ Data Processing & ETL for HazelJS - pipelines, schema validation, streaming, and data quality.
4
+
5
+ [![npm version](https://img.shields.io/npm/v/@hazeljs/data.svg)](https://www.npmjs.com/package/@hazeljs/data)
6
+ [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0)
7
+
8
+ ## Features
9
+
10
+ - **Pipelines** – Declarative ETL with `@Pipeline`, `@Transform`, `@Validate` decorators
11
+ - **Schema validation** – Fluent Schema API (string, number, object, array, email, oneOf)
12
+ - **ETL service** – Execute multi-step pipelines with ordering and error handling
13
+ - **Stream processing** – StreamBuilder, StreamProcessor for batch and streaming workloads
14
+ - **Built-in transformers** – trimString, toLowerCase, parseJson, pick, omit, renameKeys
15
+ - **Data quality** – QualityService for completeness, notNull, and custom checks
16
+ - **Flink integration** – Optional Apache Flink deployment for distributed stream processing
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ npm install @hazeljs/data @hazeljs/core
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ ### 1. Import DataModule
27
+
28
+ ```typescript
29
+ import { HazelApp } from '@hazeljs/core';
30
+ import { DataModule } from '@hazeljs/data';
31
+
32
+ const app = new HazelApp({
33
+ imports: [DataModule.forRoot()],
34
+ });
35
+
36
+ app.listen(3000);
37
+ ```
38
+
39
+ ### 2. Define a pipeline with decorators
40
+
41
+ ```typescript
42
+ import { Injectable } from '@hazeljs/core';
43
+ import {
44
+ Pipeline,
45
+ PipelineBase,
46
+ Transform,
47
+ Validate,
48
+ ETLService,
49
+ Schema,
50
+ } from '@hazeljs/data';
51
+
52
+ const OrderSchema = Schema.object()
53
+ .prop('id', Schema.string().required())
54
+ .prop('customerId', Schema.string().required())
55
+ .prop('status', Schema.string().oneOf(['pending', 'paid', 'shipped', 'delivered', 'cancelled']))
56
+ .prop('items', Schema.array().items(Schema.object()
57
+ .prop('sku', Schema.string().minLength(1))
58
+ .prop('qty', Schema.number().min(1))
59
+ .prop('price', Schema.number().min(0))
60
+ ))
61
+ .required();
62
+
63
+ @Pipeline('order-processing')
64
+ @Injectable()
65
+ export class OrderProcessingPipeline extends PipelineBase {
66
+ constructor(etlService: ETLService) {
67
+ super(etlService);
68
+ }
69
+
70
+ @Transform({ step: 1, name: 'normalize' })
71
+ async normalize(data: Record<string, unknown>): Promise<Record<string, unknown>> {
72
+ return {
73
+ ...data,
74
+ status: String(data.status).toLowerCase(),
75
+ };
76
+ }
77
+
78
+ @Validate({ step: 2, schema: OrderSchema })
79
+ async validate(data: Record<string, unknown>): Promise<Record<string, unknown>> {
80
+ return data;
81
+ }
82
+
83
+ @Transform({ step: 3, name: 'enrich' })
84
+ async enrich(data: Record<string, unknown> & { items?: { qty: number; price: number }[] }): Promise<Record<string, unknown>> {
85
+ const items = data.items ?? [];
86
+ const subtotal = items.reduce((sum, i) => sum + i.qty * i.price, 0);
87
+ const tax = subtotal * 0.1;
88
+ return {
89
+ ...data,
90
+ subtotal,
91
+ tax,
92
+ total: subtotal + tax,
93
+ processedAt: new Date().toISOString(),
94
+ };
95
+ }
96
+ }
97
+ ```
98
+
99
+ ### 3. Execute from a controller or service
100
+
101
+ ```typescript
102
+ import { Controller, Post, Body, Inject } from '@hazeljs/core';
103
+ import { OrderProcessingPipeline } from './pipelines/order-processing.pipeline';
104
+
105
+ @Controller('data')
106
+ export class DataController {
107
+ constructor(
108
+ @Inject(OrderProcessingPipeline) private pipeline: OrderProcessingPipeline
109
+ ) {}
110
+
111
+ @Post('pipeline/orders')
112
+ async processOrder(@Body() body: unknown) {
113
+ const result = await this.pipeline.execute(body);
114
+ return { ok: true, data: result };
115
+ }
116
+ }
117
+ ```
118
+
119
+ ## Batch processing with StreamService
120
+
121
+ Process arrays through pipelines in batches:
122
+
123
+ ```typescript
124
+ import { StreamService } from '@hazeljs/data';
125
+
126
+ const streamService = new StreamService(etlService);
127
+ const results = await streamService.processBatch(OrderProcessingPipeline, orders);
128
+ ```
129
+
130
+ ## Schema validation
131
+
132
+ Build schemas with the fluent API:
133
+
134
+ ```typescript
135
+ import { Schema } from '@hazeljs/data';
136
+
137
+ const UserSchema = Schema.object()
138
+ .prop('email', Schema.string().format('email').required())
139
+ .prop('name', Schema.string().minLength(1).maxLength(200))
140
+ .prop('age', Schema.number().min(0).max(150))
141
+ .prop('role', Schema.string().oneOf(['user', 'admin', 'moderator', 'guest']))
142
+ .required();
143
+
144
+ const validator = new SchemaValidator();
145
+ const { value, error } = validator.validate(UserSchema, rawData);
146
+ ```
147
+
148
+ ## Data quality checks
149
+
150
+ ```typescript
151
+ import { QualityService } from '@hazeljs/data';
152
+
153
+ const qualityService = new QualityService();
154
+ const report = await qualityService.check(records, {
155
+ completeness: ['id', 'email', 'createdAt'],
156
+ notNull: ['id', 'status'],
157
+ });
158
+ ```
159
+
160
+ ## Flink configuration (optional)
161
+
162
+ For distributed stream processing with Apache Flink:
163
+
164
+ ```typescript
165
+ DataModule.forRoot({
166
+ flink: {
167
+ url: process.env.FLINK_REST_URL ?? 'http://localhost:8081',
168
+ timeout: 30000,
169
+ },
170
+ });
171
+ ```
172
+
173
+ ## Built-in transformers
174
+
175
+ | Transformer | Description |
176
+ |-------------|-------------|
177
+ | `trimString` | Trim whitespace from strings |
178
+ | `toLowerCase` / `toUpperCase` | Case conversion |
179
+ | `parseJson` / `stringifyJson` | JSON parsing and serialization |
180
+ | `pick` | Select specific keys from objects |
181
+ | `omit` | Remove specific keys from objects |
182
+ | `renameKeys` | Rename object keys |
183
+
184
+ ## Example
185
+
186
+ See [hazeljs-data-starter](../../../hazeljs-data-starter) for a full example with order and user pipelines, REST API, and quality reports.
187
+
188
+ ## Links
189
+
190
+ - [Documentation](https://hazeljs.com/docs/packages/data)
191
+ - [GitHub](https://github.com/hazel-js/hazeljs)
192
+ - [Issues](https://github.com/hazeljs/hazel-js/issues)
193
+ - [Homepage](https://hazeljs.com)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hazeljs/data",
3
- "version": "0.2.0-beta.37",
3
+ "version": "0.2.0-beta.38",
4
4
  "description": "Data Processing & ETL for HazelJS framework",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -49,5 +49,5 @@
49
49
  "url": "https://github.com/hazeljs/hazel-js/issues"
50
50
  },
51
51
  "homepage": "https://hazeljs.com",
52
- "gitHead": "d39af9d45d13caed182bc8ecc5d3c517b2ffe8d5"
52
+ "gitHead": "ba1d66ecacc4943a08f85e3f9c077e53224838ff"
53
53
  }