@yarkivaev/source-to-sink 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(npm test)",
5
+ "Bash(npm run coverage:*)",
6
+ "Bash(npx c8 npm test:*)",
7
+ "Bash(npm link)",
8
+ "Bash(npm link:*)",
9
+ "Bash(npm run test:integration:*)",
10
+ "Bash(npm run test:all:*)",
11
+ "Bash(npm test:*)",
12
+ "Bash(find:*)",
13
+ "Bash(poetry run pytest:*)",
14
+ "Bash(grep:*)"
15
+ ]
16
+ }
17
+ }
@@ -0,0 +1,12 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ ci:
9
+ uses: yarkivaev/npm-workflows/.github/workflows/ci.yml@main
10
+ with:
11
+ node-version: '22'
12
+ lint: false
@@ -0,0 +1,15 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ release:
9
+ uses: yarkivaev/npm-workflows/.github/workflows/release.yml@main
10
+ permissions:
11
+ contents: write
12
+ id-token: write
13
+ with:
14
+ node-version: '22'
15
+ lint: false
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # source-to-sink
2
+
3
+ A library for building data streaming pipelines with batching and circuit breaker support.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install source-to-sink
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```javascript
14
+ import { batch, circuit, timedBatch, clock, clickhouseSink, mqttSource } from 'source-to-sink';
15
+
16
+ const clk = clock();
17
+ const breaker = circuit(5, 60, clk);
18
+ const sink = clickhouseSink('http://localhost:8123', 'metrics');
19
+ const collector = timedBatch(batch(sink, 1000, breaker), 5.0);
20
+ const source = mqttSource('mqtt://localhost:1883', 'sensors/#', collector);
21
+
22
+ source.start();
23
+ ```
24
+
25
+ ## Components
26
+
27
+ | Component | Description |
28
+ |-----------|-------------|
29
+ | `batch(sink, size, circuit)` | Collects records and flushes to sink when size is reached |
30
+ | `circuit(threshold, timeout, clock)` | Circuit breaker for failure isolation |
31
+ | `timedBatch(collector, interval)` | Adds time-based auto-flush to a collector |
32
+ | `clock()` | System time provider |
33
+ | `pollingSource(fetch, interval, collector, clock)` | Generic polling source with time window |
34
+ | `clickhouseSink(url, table)` | ClickHouse sink adapter |
35
+ | `mqttSource(url, topic, collector)` | MQTT subscription source |
36
+ | `lokiSource(url, query, interval, collector, clock)` | Loki polling source |
37
+
38
+ ## License
39
+
40
+ MIT
package/index.js ADDED
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Source-to-sink streaming library.
3
+ *
4
+ * Provides components for building data streaming pipelines:
5
+ * - batch: Collects records and flushes to sink
6
+ * - circuit: Circuit breaker for failure isolation
7
+ * - timedBatch: Decorator for time-based flushing
8
+ * - clock: Time provider for circuit breaker
9
+ * - pollingSource: Generic polling source with time window
10
+ * - clickhouseSink: ClickHouse sink (accepts URL)
11
+ * - mqttSource: MQTT subscription source (accepts URL)
12
+ * - lokiSource: Loki polling source (accepts URL)
13
+ *
14
+ * @example
15
+ * import { batch, circuit, timedBatch, clock, clickhouseSink, mqttSource } from 'source-to-sink';
16
+ *
17
+ * const clk = clock();
18
+ * const c = circuit(5, 60, clk);
19
+ * const sink = clickhouseSink('http://localhost:8123', 'metrics');
20
+ * const collector = timedBatch(batch(sink, 1000, c), 5.0);
21
+ * const source = mqttSource('mqtt://localhost:1883', 'sensors/#', collector);
22
+ * source.start();
23
+ */
24
+ export { default as batch } from './src/batch.js';
25
+ export { default as circuit } from './src/circuit.js';
26
+ export { default as clock } from './src/clock.js';
27
+ export { default as timedBatch } from './src/timedBatch.js';
28
+ export { default as pollingSource } from './src/pollingSource.js';
29
+ export { default as clickhouseSink } from './src/clickhouseSink.js';
30
+ export { default as mqttSource } from './src/mqttSource.js';
31
+ export { default as lokiSource } from './src/lokiSource.js';
package/package.json ADDED
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": "@yarkivaev/source-to-sink",
3
+ "version": "1.0.0",
4
+ "description": "Generic library for building data streaming pipelines",
5
+ "repository": {
6
+ "type": "git",
7
+ "url": "https://github.com/yarkivaev/source-to-sink"
8
+ },
9
+ "type": "module",
10
+ "main": "index.js",
11
+ "dependencies": {
12
+ "mqtt": "^5.0.0",
13
+ "@clickhouse/client": "^1.0.0"
14
+ },
15
+ "devDependencies": {
16
+ "mocha": "^10.0.0",
17
+ "c8": "^8.0.0",
18
+ "testcontainers": "^10.13.0"
19
+ },
20
+ "scripts": {
21
+ "test": "mocha test/*.js",
22
+ "coverage": "c8 mocha test/*.js"
23
+ }
24
+ }
package/src/batch.js ADDED
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Batch collector that accumulates records and flushes to a sink.
3
+ *
4
+ * Collects records until batch size is reached, then flushes to
5
+ * the configured sink. Uses circuit breaker for failure isolation.
6
+ * For time-based flushing, wrap with timedBatch.
7
+ *
8
+ * @example
9
+ * const sink = { write: (records) => console.log(records) };
10
+ * const clk = clock();
11
+ * const c = circuit(5, 60, clk);
12
+ * const b = batch(sink, 100, c);
13
+ * b.accept({ value: 42 });
14
+ * b.flush();
15
+ * b.stop();
16
+ *
17
+ * @param {object} sink - Object with write(records) method
18
+ * @param {number} size - Maximum batch size before automatic flush
19
+ * @param {object} circuit - Circuit breaker with allowing(), succeed(), fail()
20
+ * @returns {object} Batch collector with accept(), flush(), and stop() methods
21
+ */
22
+ export default function batch(sink, size, circuit) {
23
+ if (!sink || typeof sink.write !== 'function') {
24
+ throw new Error('Sink must have a write(records) method');
25
+ }
26
+ if (typeof size !== 'number' || size < 1) {
27
+ throw new Error(`Size must be a positive number, got: ${size}`);
28
+ }
29
+ if (!circuit || typeof circuit.allowing !== 'function') {
30
+ throw new Error('Circuit must have an allowing() method');
31
+ }
32
+ let records = [];
33
+ const perform = () => {
34
+ if (records.length === 0) {
35
+ return;
36
+ }
37
+ if (!circuit.allowing()) {
38
+ return;
39
+ }
40
+ const pending = records;
41
+ try {
42
+ sink.write(pending);
43
+ records = [];
44
+ circuit.succeed();
45
+ } catch (err) {
46
+ circuit.fail();
47
+ throw err;
48
+ }
49
+ };
50
+ return {
51
+ /**
52
+ * Accepts a record into the batch.
53
+ *
54
+ * @param {*} record - Record to accept
55
+ */
56
+ accept(record) {
57
+ records.push(record);
58
+ if (records.length >= size) {
59
+ perform();
60
+ }
61
+ },
62
+ /**
63
+ * Forces an immediate flush of all pending records.
64
+ */
65
+ flush() {
66
+ perform();
67
+ },
68
+ /**
69
+ * Stops the batch collector and clears pending records.
70
+ */
71
+ stop() {
72
+ records = [];
73
+ }
74
+ };
75
+ }
package/src/circuit.js ADDED
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Closed circuit state allowing operations.
3
+ *
4
+ * @returns {object} State with allowing() returning true
5
+ */
6
+ function closed() {
7
+ return {
8
+ allowing() {
9
+ return true;
10
+ }
11
+ };
12
+ }
13
+
14
+ /**
15
+ * Open circuit state blocking operations.
16
+ *
17
+ * @param {number} timestamp - When the circuit was opened
18
+ * @param {object} clk - Clock for time tracking
19
+ * @param {number} timeout - Seconds before expiration
20
+ * @returns {object} State with allowing() and expired()
21
+ */
22
+ function open(timestamp, clk, timeout) {
23
+ return {
24
+ allowing() {
25
+ return false;
26
+ },
27
+ expired() {
28
+ return (clk.millis() - timestamp) / 1000 >= timeout;
29
+ }
30
+ };
31
+ }
32
+
33
+ /**
34
+ * Circuit breaker for failure isolation in data pipelines.
35
+ *
36
+ * Implements the circuit breaker pattern to prevent cascading failures.
37
+ * The circuit opens after a threshold of failures and closes after a timeout.
38
+ *
39
+ * @example
40
+ * const clk = clock();
41
+ * const c = circuit(5, 60, clk);
42
+ * if (c.allowing()) {
43
+ * try {
44
+ * await riskyOperation();
45
+ * c.succeed();
46
+ * } catch (err) {
47
+ * c.fail();
48
+ * }
49
+ * }
50
+ *
51
+ * @param {number} threshold - Number of failures before opening the circuit
52
+ * @param {number} timeout - Seconds to wait before attempting recovery
53
+ * @param {object} clk - Clock with millis() method for time tracking
54
+ * @returns {object} Circuit breaker with allowing(), succeed(), and fail() methods
55
+ */
56
+ export default function circuit(threshold, timeout, clk) {
57
+ if (typeof threshold !== 'number' || threshold < 1) {
58
+ throw new Error(`Threshold must be a positive number, got: ${threshold}`);
59
+ }
60
+ if (typeof timeout !== 'number' || timeout < 0) {
61
+ throw new Error(`Timeout must be a non-negative number, got: ${timeout}`);
62
+ }
63
+ if (!clk || typeof clk.millis !== 'function') {
64
+ throw new Error('Clock must have a millis() method');
65
+ }
66
+ let failures = 0;
67
+ let state = closed();
68
+ return {
69
+ /**
70
+ * Checks if the circuit allows operations.
71
+ *
72
+ * @returns {boolean} True if circuit allows operations, false if open
73
+ */
74
+ allowing() {
75
+ if (state.allowing()) {
76
+ return true;
77
+ }
78
+ if (state.expired()) {
79
+ state = closed();
80
+ failures = 0;
81
+ return true;
82
+ }
83
+ return false;
84
+ },
85
+ /**
86
+ * Records a successful operation, resetting the failure count.
87
+ */
88
+ succeed() {
89
+ failures = 0;
90
+ state = closed();
91
+ },
92
+ /**
93
+ * Records a failed operation, potentially opening the circuit.
94
+ */
95
+ fail() {
96
+ failures += 1;
97
+ if (failures >= threshold) {
98
+ state = open(clk.millis(), clk, timeout);
99
+ }
100
+ }
101
+ };
102
+ }
@@ -0,0 +1,36 @@
1
+ import { createClient } from '@clickhouse/client';
2
+
3
+ /**
4
+ * ClickHouse sink for batch record insertion.
5
+ *
6
+ * Creates a ClickHouse client internally and implements the Sink
7
+ * interface for use with batch collectors.
8
+ *
9
+ * @example
10
+ * const sink = clickhouseSink('http://localhost:8123', 'metrics');
11
+ * await sink.write([{ ts: Date.now(), value: 42 }]);
12
+ *
13
+ * @param {string} url - ClickHouse URL (e.g., 'http://localhost:8123')
14
+ * @param {string} table - Target table name
15
+ * @returns {object} Sink with write(records) method
16
+ */
17
+ export default function clickhouseSink(url, table) {
18
+ if (typeof url !== 'string' || url.length === 0) {
19
+ throw new Error('URL must be a non-empty string');
20
+ }
21
+ if (typeof table !== 'string' || table.length === 0) {
22
+ throw new Error('Table must be a non-empty string');
23
+ }
24
+ const client = createClient({ url });
25
+ return {
26
+ /**
27
+ * Writes records to ClickHouse table.
28
+ *
29
+ * @param {Array} records - Array of records to insert
30
+ * @returns {Promise} Promise resolving when insert completes
31
+ */
32
+ write(records) {
33
+ return client.insert({ table, values: records, format: 'JSONEachRow' });
34
+ }
35
+ };
36
+ }
package/src/clock.js ADDED
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Real clock implementation using system time.
3
+ *
4
+ * Provides current time in milliseconds for time-dependent operations.
5
+ * Use fakeClock in tests to control time deterministically.
6
+ *
7
+ * @example
8
+ * const clk = clock();
9
+ * const now = clk.millis();
10
+ *
11
+ * @returns {object} Clock with millis() method
12
+ */
13
+ export default function clock() {
14
+ return {
15
+ /**
16
+ * Returns current time in milliseconds since epoch.
17
+ *
18
+ * @returns {number} Current time in milliseconds
19
+ */
20
+ millis() {
21
+ return Date.now();
22
+ }
23
+ };
24
+ }
@@ -0,0 +1,49 @@
1
+ import pollingSource from './pollingSource.js';
2
+
3
+ /**
4
+ * Loki polling source for streaming log entries to a collector.
5
+ *
6
+ * Polls Loki at a specified interval and forwards log entries
7
+ * to the collector. Creates HTTP client internally.
8
+ *
9
+ * @example
10
+ * const source = lokiSource('http://localhost:3100', '{app="traefik"}', 10, collector, clock);
11
+ * source.start();
12
+ * // ... later
13
+ * source.stop();
14
+ *
15
+ * @param {string} url - Loki base URL (e.g., 'http://localhost:3100')
16
+ * @param {string} query - LogQL query string
17
+ * @param {number} interval - Polling interval in seconds
18
+ * @param {object} collector - Collector with accept() method
19
+ * @param {object} clk - Clock with millis() method
20
+ * @returns {object} Source with start() and stop() methods
21
+ */
22
+ export default function lokiSource(url, query, interval, collector, clk) {
23
+ if (typeof url !== 'string' || url.length === 0) {
24
+ throw new Error('URL must be a non-empty string');
25
+ }
26
+ if (typeof query !== 'string' || query.length === 0) {
27
+ throw new Error('Query must be a non-empty string');
28
+ }
29
+ const fetch = async (since, until) => {
30
+ const params = new URLSearchParams({
31
+ query: query,
32
+ start: (since * 1000000).toString(),
33
+ end: (until * 1000000).toString(),
34
+ limit: '1000'
35
+ });
36
+ const response = await globalThis.fetch(`${url}/loki/api/v1/query_range?${params}`);
37
+ const data = await response.json();
38
+ const entries = [];
39
+ if (data.data && data.data.result) {
40
+ for (const stream of data.data.result) {
41
+ for (const [ts, line] of stream.values) {
42
+ entries.push({ ts: parseInt(ts, 10) / 1000000, line });
43
+ }
44
+ }
45
+ }
46
+ return entries;
47
+ };
48
+ return pollingSource(fetch, interval, collector, clk);
49
+ }
@@ -0,0 +1,109 @@
1
+ import mqtt from 'mqtt';
2
+
3
+ /**
4
+ * Idle state for MQTT source.
5
+ *
6
+ * @returns {object} State with subscribed() returning false
7
+ */
8
+ function idle() {
9
+ return {
10
+ subscribed() {
11
+ return false;
12
+ }
13
+ };
14
+ }
15
+
16
+ /**
17
+ * Subscribed state for MQTT source.
18
+ *
19
+ * @param {object} client - MQTT client
20
+ * @param {function} handler - Message handler function
21
+ * @returns {object} State with subscribed() returning true
22
+ */
23
+ function subscribed(client, handler) {
24
+ return {
25
+ subscribed() {
26
+ return true;
27
+ },
28
+ client() {
29
+ return client;
30
+ },
31
+ handler() {
32
+ return handler;
33
+ }
34
+ };
35
+ }
36
+
37
+ /**
38
+ * MQTT subscription source for streaming messages to a collector.
39
+ *
40
+ * Subscribes to MQTT topics and forwards raw messages to the collector.
41
+ * Messages are passed as {topic, payload} objects without parsing.
42
+ * Creates MQTT client internally. Supports comma-separated topic patterns.
43
+ *
44
+ * @example
45
+ * const source = mqttSource('mqtt://localhost:1883', 'sensors/#,devices/#', collector);
46
+ * source.start();
47
+ * // ... later
48
+ * source.stop();
49
+ *
50
+ * @param {string} url - MQTT broker URL (e.g., 'mqtt://localhost:1883')
51
+ * @param {string} topics - Comma-separated MQTT topic patterns to subscribe
52
+ * @param {object} collector - Collector with accept() method receiving {topic, payload}
53
+ * @param {object} [options] - Optional MQTT connection options
54
+ * @param {string} [options.clientId] - Client ID for persistent sessions
55
+ * @param {number} [options.sessionExpiryInterval] - Session expiry in seconds (default 3600)
56
+ * @returns {object} Source with start() and stop() methods
57
+ */
58
+ export default function mqttSource(url, topics, collector, options = {}) {
59
+ if (typeof url !== 'string' || url.length === 0) {
60
+ throw new Error('URL must be a non-empty string');
61
+ }
62
+ if (typeof topics !== 'string' || topics.length === 0) {
63
+ throw new Error('Topic must be a non-empty string');
64
+ }
65
+ if (!collector || typeof collector.accept !== 'function') {
66
+ throw new Error('Collector must have an accept() method');
67
+ }
68
+ const list = topics.split(',').map((t) => t.trim()).filter((t) => t.length > 0);
69
+ let state = idle();
70
+ return {
71
+ /**
72
+ * Starts subscribing to the MQTT topics.
73
+ */
74
+ start() {
75
+ if (state.subscribed()) {
76
+ return;
77
+ }
78
+ const client = mqtt.connect(url, {
79
+ clientId: options.clientId,
80
+ clean: options.clientId ? false : true,
81
+ protocolVersion: 5,
82
+ properties: options.clientId ? {
83
+ sessionExpiryInterval: options.sessionExpiryInterval || 3600
84
+ } : undefined
85
+ });
86
+ const handler = (t, message) => {
87
+ collector.accept({ topic: t, payload: message.toString() });
88
+ };
89
+ client.on('message', handler);
90
+ client.on('connect', () => {
91
+ client.subscribe(list, { qos: options.clientId ? 1 : 0 });
92
+ });
93
+ state = subscribed(client, handler);
94
+ },
95
+ /**
96
+ * Stops subscribing to the MQTT topics.
97
+ */
98
+ stop() {
99
+ if (!state.subscribed()) {
100
+ return;
101
+ }
102
+ const client = state.client();
103
+ client.unsubscribe(list);
104
+ client.off('message', state.handler());
105
+ client.end();
106
+ state = idle();
107
+ }
108
+ };
109
+ }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Idle state for polling source.
3
+ *
4
+ * @returns {object} State with polling() returning false
5
+ */
6
+ function idle() {
7
+ return {
8
+ polling() {
9
+ return false;
10
+ }
11
+ };
12
+ }
13
+
14
+ /**
15
+ * Polling state for polling source.
16
+ *
17
+ * @param {object} handle - Timer handle from setInterval
18
+ * @returns {object} State with polling() returning true
19
+ */
20
+ function polling(handle) {
21
+ return {
22
+ polling() {
23
+ return true;
24
+ },
25
+ cancel() {
26
+ clearInterval(handle);
27
+ }
28
+ };
29
+ }
30
+
31
+ /**
32
+ * Generic polling source with time window tracking.
33
+ *
34
+ * Polls at a specified interval and forwards records to
35
+ * the collector. Tracks time windows to avoid duplicates.
36
+ *
37
+ * @example
38
+ * const fetch = async (since, until) => {
39
+ * return await api.query(since, until);
40
+ * };
41
+ * const source = pollingSource(fetch, 10, collector, clock);
42
+ * source.start();
43
+ * // ... later
44
+ * source.stop();
45
+ *
46
+ * @param {function} fetch - Async function(since, until) returning array
47
+ * @param {number} interval - Polling interval in seconds
48
+ * @param {object} collector - Collector with accept() method
49
+ * @param {object} clk - Clock with millis() method
50
+ * @returns {object} Source with start() and stop() methods
51
+ */
52
+ export default function pollingSource(fetch, interval, collector, clk) {
53
+ if (typeof fetch !== 'function') {
54
+ throw new Error('Fetch must be a function');
55
+ }
56
+ if (typeof interval !== 'number' || interval <= 0) {
57
+ throw new Error(`Interval must be a positive number, got: ${interval}`);
58
+ }
59
+ if (!collector || typeof collector.accept !== 'function') {
60
+ throw new Error('Collector must have an accept() method');
61
+ }
62
+ if (!clk || typeof clk.millis !== 'function') {
63
+ throw new Error('Clock must have a millis() method');
64
+ }
65
+ let state = idle();
66
+ let since = clk.millis();
67
+ const poll = async () => {
68
+ const until = clk.millis();
69
+ const result = await fetch(since, until);
70
+ since = until;
71
+ for (const entry of result) {
72
+ collector.accept(entry);
73
+ }
74
+ };
75
+ return {
76
+ /**
77
+ * Starts polling.
78
+ */
79
+ start() {
80
+ if (state.polling()) {
81
+ return;
82
+ }
83
+ since = clk.millis();
84
+ const handle = setInterval(poll, interval * 1000);
85
+ state = polling(handle);
86
+ },
87
+ /**
88
+ * Stops polling.
89
+ */
90
+ stop() {
91
+ if (!state.polling()) {
92
+ return;
93
+ }
94
+ state.cancel();
95
+ state = idle();
96
+ }
97
+ };
98
+ }