@rdfc/js-runner 2.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/reader.ts CHANGED
@@ -1,6 +1,11 @@
1
1
  import { ClientReadableStream } from '@grpc/grpc-js'
2
- import { DataChunk, Message, RunnerClient, StreamMessage } from '@rdfc/proto'
3
- import winston from 'winston'
2
+ import {
3
+ DataChunk,
4
+ ReceivingMessage,
5
+ ReceivingStreamMessage,
6
+ RunnerClient,
7
+ } from '@rdfc/proto'
8
+ import { Logger } from 'winston'
4
9
  import {
5
10
  AnyConvertor,
6
11
  Convertor,
@@ -8,17 +13,19 @@ import {
8
13
  StreamConvertor,
9
14
  StringConvertor,
10
15
  } from './convertor'
16
+ import { Writable } from './runner'
17
+ import { promisify } from 'util'
11
18
 
12
19
  export type Any =
13
20
  | {
14
- string: string
15
- }
21
+ string: string
22
+ }
16
23
  | {
17
- stream: AsyncGenerator<Uint8Array>
18
- }
24
+ stream: AsyncGenerator<Uint8Array>
25
+ }
19
26
  | {
20
- buffer: Uint8Array
21
- }
27
+ buffer: Uint8Array
28
+ }
22
29
 
23
30
  export interface Reader {
24
31
  readonly uri: string
@@ -28,41 +35,47 @@ export interface Reader {
28
35
  anys(): AsyncIterable<Any>
29
36
  }
30
37
 
38
+ type Todo<T> = {
39
+ item: T
40
+ onComplete: () => void
41
+ }
42
+
31
43
  class MyIter<T> implements AsyncIterable<T> {
32
44
  private convertor: Convertor<T>
33
- private queue: (T | undefined)[] = []
45
+ private queue: Todo<T | undefined>[] = []
34
46
  private resolveNext: ((value: undefined) => void) | null = null
35
47
 
36
48
  constructor(convertor: Convertor<T>) {
37
49
  this.convertor = convertor
38
50
  }
39
51
 
40
- push(buffer: Uint8Array) {
52
+ push(buffer: Uint8Array, onComplete: () => void) {
41
53
  const item = this.convertor.from(buffer)
42
- this.queue.push(item)
54
+ this.queue.push({ item, onComplete })
43
55
  if (this.resolveNext) {
44
56
  this.resolveNext(undefined)
45
57
  this.resolveNext = null
46
58
  }
47
59
  }
48
60
 
49
- close() {
50
- this.queue.push(undefined)
61
+ close(onComplete: () => void) {
62
+ this.queue.push({ item: undefined, onComplete })
51
63
  if (this.resolveNext) {
52
64
  this.resolveNext(undefined)
53
65
  this.resolveNext = null
54
66
  }
55
67
  }
56
68
 
57
- async pushStream(chunks: ClientReadableStream<DataChunk>) {
58
- const stream = (async function* (stream) {
59
- for await (const c of stream) {
60
- const chunk: DataChunk = c
69
+ async pushStream(chunks: AsyncIterable<DataChunk>, onComplete: () => void) {
70
+ // This is an async generator that transforms DataChunks to Buffers
71
+ const stream = (async function*(stream) {
72
+ for await (const chunk of stream) {
61
73
  yield chunk.data
62
74
  }
63
75
  })(chunks)
76
+
64
77
  const item = await this.convertor.fromStream(stream)
65
- this.queue.push(item)
78
+ this.queue.push({ item, onComplete })
66
79
  if (this.resolveNext) {
67
80
  this.resolveNext(undefined)
68
81
  this.resolveNext = null
@@ -72,9 +85,15 @@ class MyIter<T> implements AsyncIterable<T> {
72
85
  async *[Symbol.asyncIterator]() {
73
86
  while (true) {
74
87
  if (this.queue.length > 0) {
75
- const item = this.queue.shift()!
76
- if (item === undefined) break
88
+ const { item, onComplete } = this.queue.shift()!
89
+ if (item === undefined) {
90
+ onComplete()
91
+ break
92
+ }
77
93
  yield item
94
+ // Note: execution pauses at `yield` until the consumer calls `.next()` again.
95
+ // We call onComplete *after* resuming, so the producer knows the item was actually consumed.
96
+ onComplete()
78
97
  } else {
79
98
  await new Promise<undefined>((resolve) => (this.resolveNext = resolve))
80
99
  }
@@ -85,58 +104,217 @@ class MyIter<T> implements AsyncIterable<T> {
85
104
  export class ReaderInstance implements Reader {
86
105
  private client: RunnerClient
87
106
  readonly uri: string
88
- private logger: winston.Logger
107
+ private logger: Logger
108
+ private readonly notifyOrchestrator: Writable
89
109
 
90
- private iterators: MyIter<unknown>[] = []
110
+ private consumers: MyIter<unknown>[] = []
91
111
 
92
- constructor(uri: string, client: RunnerClient, logger: winston.Logger) {
112
+ constructor(
113
+ uri: string,
114
+ client: RunnerClient,
115
+ notifyOrchestrator: Writable,
116
+ logger: Logger,
117
+ ) {
93
118
  this.uri = uri
94
119
  this.client = client
95
120
  this.logger = logger
121
+ this.notifyOrchestrator = notifyOrchestrator
96
122
  }
97
123
 
98
124
  anys(): AsyncIterable<Any> {
99
125
  const iter = new MyIter(AnyConvertor)
100
- this.iterators.push(iter)
126
+ this.consumers.push(iter)
101
127
  return iter
102
128
  }
103
129
 
104
130
  strings(): AsyncIterable<string> {
105
131
  const iter = new MyIter(StringConvertor)
106
- this.iterators.push(iter)
132
+ this.consumers.push(iter)
107
133
  return iter
108
134
  }
109
135
 
110
136
  buffers(): AsyncIterable<Uint8Array> {
111
137
  const iter = new MyIter(NoConvertor)
112
- this.iterators.push(iter)
138
+ this.consumers.push(iter)
113
139
  return iter
114
140
  }
115
141
 
116
142
  streams(): AsyncIterable<AsyncGenerator<Uint8Array>> {
117
143
  const iter = new MyIter(StreamConvertor)
118
- this.iterators.push(iter)
144
+ this.consumers.push(iter)
119
145
  return iter
120
146
  }
121
147
 
122
- handleMsg(msg: Message) {
148
+ handleMsg(msg: ReceivingMessage) {
123
149
  this.logger.debug(`${this.uri} handling message`)
124
- for (const iter of this.iterators) {
125
- iter.push(msg.data)
150
+
151
+ const promises = []
152
+ for (const iter of this.consumers) {
153
+ promises.push(new Promise((res) => iter.push(msg.data, () => res(null))))
126
154
  }
155
+
156
+ Promise.all(promises).then(() =>
157
+ this.notifyOrchestrator({
158
+ processed: {
159
+ globalSequenceNumber: msg.globalSequenceNumber,
160
+ channel: this.uri,
161
+ },
162
+ }),
163
+ )
127
164
  }
128
165
 
129
166
  close() {
130
- for (const iter of this.iterators) {
131
- iter.close()
167
+ for (const iter of this.consumers) {
168
+ iter.close(() => { })
132
169
  }
133
170
  }
134
171
 
135
- handleStreamingMessage(msg: StreamMessage) {
172
+ // There is a stream message available for this reader
173
+ async handleStreamingMessage({
174
+ channel,
175
+ globalSequenceNumber,
176
+ }: ReceivingStreamMessage) {
136
177
  this.logger.debug(`${this.uri} handling streaming message`)
137
- const chunks = this.client.receiveStreamMessage(msg.id!)
138
- for (const iter of this.iterators) {
139
- iter.pushStream(chunks)
178
+
179
+ const chunks = this.client.receiveStreamMessage()
180
+ const writeControlMessage = promisify(chunks.write.bind(chunks))
181
+ const consumersConsumed = []
182
+
183
+ // After each chunk is handled by all consumer, emit a processed message
184
+ let idx = 0
185
+ const messageIterators = fanoutStream(
186
+ chunks,
187
+ this.consumers.length,
188
+ async () => {
189
+ await writeControlMessage({ streamSequenceNumber: idx++ })
190
+ },
191
+ )
192
+
193
+ for (const consumer of this.consumers) {
194
+ consumersConsumed.push(
195
+ new Promise((res) =>
196
+ consumer.pushStream(messageIterators.pop()!, () => res(null)),
197
+ ),
198
+ )
199
+ }
200
+
201
+ await writeControlMessage({ globalSequenceNumber })
202
+
203
+ Promise.all(consumersConsumed).then(() => {
204
+ console.log('Writing processed for streaming message')
205
+ this.notifyOrchestrator({ processed: { globalSequenceNumber, channel } })
206
+ })
207
+ }
208
+ }
209
+
210
+ /**
211
+ * Helper function to tee a stream `numConsumers` times
212
+ * When each tee'd stream has handled a chunk, call {@link onAllHandled}
213
+ */
214
+ function fanoutStream<T>(
215
+ stream: ClientReadableStream<T>,
216
+ numConsumers: number,
217
+ onAllHandled: () => void | Promise<void>,
218
+ ): AsyncIterable<T>[] {
219
+ type Waiter = (value: IteratorResult<T>) => void
220
+
221
+ let ended = false
222
+ const buffer: T[] = []
223
+ const pending: Waiter[] = []
224
+ let activeConsumers = numConsumers
225
+
226
+ // consumer bookkeeping
227
+ let awaitingAck = 0
228
+
229
+ function pushChunk(chunk: T) {
230
+ buffer.push(chunk)
231
+ flush()
232
+ }
233
+
234
+ function flush() {
235
+ while (buffer.length > 0 && pending.length > 0) {
236
+ const chunk = buffer[0] // keep until all consumers ack
237
+ const waiter = pending.shift()!
238
+ waiter({ value: chunk, done: false })
239
+ awaitingAck++
240
+ }
241
+ }
242
+
243
+ function end() {
244
+ ended = true
245
+ while (pending.length > 0) {
246
+ const waiter = pending.shift()!
247
+ waiter({ value: undefined, done: true })
248
+ }
249
+ }
250
+
251
+ stream.on('data', (chunk: T) => {
252
+ pushChunk(chunk)
253
+ })
254
+
255
+ stream.on('end', () => {
256
+ end()
257
+ })
258
+
259
+ stream.on('error', (err) => {
260
+ while (pending.length > 0) {
261
+ const waiter = pending.shift()!
262
+ waiter({ value: undefined, done: true })
263
+ }
264
+ throw err
265
+ })
266
+
267
+ function makeIterable(): AsyncIterable<T> {
268
+ return {
269
+ [Symbol.asyncIterator]() {
270
+ return {
271
+ next(): Promise<IteratorResult<T>> {
272
+ if (buffer.length > 0) {
273
+ const chunk = buffer[0]
274
+ awaitingAck++
275
+ return Promise.resolve({ value: chunk, done: false })
276
+ }
277
+ if (ended) {
278
+ return Promise.resolve({ value: undefined, done: true })
279
+ }
280
+ return new Promise((resolve) => {
281
+ pending.push(resolve)
282
+ })
283
+ },
284
+ async return() {
285
+ activeConsumers--
286
+ if (activeConsumers === 0) {
287
+ end()
288
+ }
289
+ return { value: undefined, done: true }
290
+ },
291
+ }
292
+ },
140
293
  }
141
294
  }
295
+
296
+ async function ack() {
297
+ awaitingAck--
298
+ if (awaitingAck === 0) {
299
+ // all consumers done with the current chunk
300
+ buffer.shift() // drop it
301
+ await onAllHandled()
302
+ flush() // continue with next chunk
303
+ }
304
+ }
305
+
306
+ // wrap consumer so they *must* call ack() after processing
307
+ function wrap(iterable: AsyncIterable<T>): AsyncIterable<T> {
308
+ return {
309
+ async *[Symbol.asyncIterator]() {
310
+ for await (const item of iterable) {
311
+ yield item
312
+ await ack()
313
+ }
314
+ },
315
+ }
316
+ }
317
+
318
+ const rawIterables = Array.from({ length: numConsumers }, makeIterable)
319
+ return rawIterables.map(wrap)
142
320
  }
package/src/runner.ts CHANGED
@@ -1,15 +1,18 @@
1
1
  import {
2
- OrchestratorMessage,
2
+ Close,
3
+ FromRunner,
3
4
  Processor,
4
5
  RunnerClient,
5
- RunnerMessage,
6
+ ToRunner,
7
+ LocalAck,
8
+ ReceivingMessage,
9
+ ReceivingStreamMessage,
6
10
  } from '@rdfc/proto'
7
11
  import { Reader, ReaderInstance } from './reader'
8
12
  import { Writer, WriterInstance } from './writer'
9
13
  import { Processor as Proc } from './processor'
10
- import { Logger } from 'winston'
14
+ import { createLogger, Logger } from 'winston'
11
15
 
12
- import winston from 'winston'
13
16
  import { RpcTransport } from './logger'
14
17
  import { Cont, empty, extractShapes, Shapes } from 'rdf-lens'
15
18
  import { NamedNode, Parser } from 'n3'
@@ -36,7 +39,7 @@ const RDFC = createNamespace(
36
39
  'Writer',
37
40
  )
38
41
 
39
- export type Writable = (msg: OrchestratorMessage) => Promise<unknown>
42
+ export type Writable = (msg: FromRunner) => Promise<unknown>
40
43
 
41
44
  type ProcessorConfig = {
42
45
  location: string
@@ -44,13 +47,12 @@ type ProcessorConfig = {
44
47
  clazz?: string
45
48
  }
46
49
 
47
- export type FullProc<C extends Proc<unknown>> =
48
- C extends Proc<infer T> ? T & C : unknown
50
+ export type FullProc<C> = C extends Proc<infer T> ? T & C : never
49
51
  export class Runner {
50
- private readonly readers: { [uri: string]: ReaderInstance[] } = {}
51
- private readonly writers: { [uri: string]: WriterInstance[] } = {}
52
+ private readonly readers: { [uri: string]: ReaderInstance } = {}
53
+ private readonly writers: { [uri: string]: WriterInstance } = {}
52
54
  private readonly client: RunnerClient
53
- private readonly write: Writable
55
+ private readonly notifyOrchestrator: Writable
54
56
  private readonly logger: Logger
55
57
  private shapes: Shapes
56
58
  private quads: Quad[] = []
@@ -62,12 +64,12 @@ export class Runner {
62
64
 
63
65
  constructor(
64
66
  client: RunnerClient,
65
- write: Writable,
67
+ notifyOrchestrator: Writable,
66
68
  uri: string,
67
69
  logger: Logger,
68
70
  ) {
69
71
  this.client = client
70
- this.write = write
72
+ this.notifyOrchestrator = notifyOrchestrator
71
73
  this.uri = uri
72
74
  this.logger = logger
73
75
  }
@@ -75,7 +77,7 @@ export class Runner {
75
77
  async addProcessor<P extends Proc<unknown>>(
76
78
  proc: Processor,
77
79
  ): Promise<FullProc<P>> {
78
- const procLogger = winston.createLogger({
80
+ const procLogger = createLogger({
79
81
  transports: [
80
82
  new RpcTransport({
81
83
  entities: [proc.uri, this.uri],
@@ -84,14 +86,13 @@ export class Runner {
84
86
  ],
85
87
  })
86
88
 
87
- const ty = JSON.stringify(
88
- this.quads
89
- .filter(
90
- (x) =>
91
- x.subject.value === proc.uri && x.predicate.equals(RDF.terms.type),
92
- )
93
- .map((x) => x.object.value),
94
- )
89
+ const ty = this.quads
90
+ .filter(
91
+ (x) =>
92
+ x.subject.value === proc.uri && x.predicate.equals(RDF.terms.type),
93
+ )
94
+ .map((x) => x.object.value)
95
+
95
96
  this.logger.info('parsing ' + proc.uri + ' type ' + ty)
96
97
  const args = this.shapes.lenses[RDFL.TypedExtract].execute({
97
98
  id: new NamedNode(proc.uri),
@@ -99,8 +100,6 @@ export class Runner {
99
100
  })
100
101
 
101
102
  const config: ProcessorConfig = JSON.parse(proc.config)
102
- // const url = new URL(config.location)
103
- // process.chdir(url.pathname)
104
103
  const jsProgram = await import(config.file)
105
104
  const clazz = jsProgram[config.clazz || 'default']
106
105
  const instance: Proc<unknown> = new clazz(args, procLogger)
@@ -111,7 +110,7 @@ export class Runner {
111
110
  this.processors.push(instance)
112
111
  this.processorTransforms.push(instance.transform())
113
112
 
114
- await this.write({ init: { uri: proc.uri } })
113
+ await this.notifyOrchestrator({ initialized: { uri: proc.uri } })
115
114
 
116
115
  return <FullProc<P>>instance
117
116
  }
@@ -126,72 +125,135 @@ export class Runner {
126
125
  }
127
126
 
128
127
  createWriter(uri: Term): Writer {
129
- const ids = uri.value
128
+ const id = uri.value
130
129
 
131
- if (this.writers[ids] === undefined) {
132
- this.writers[ids] = []
130
+ if (this.writers[id] !== undefined) {
131
+ return this.writers[id]
133
132
  }
134
- const writer = new WriterInstance(ids, this.client, this.write, this.logger)
135
- this.writers[ids].push(writer)
133
+ const writer = new WriterInstance(
134
+ id,
135
+ this.client,
136
+ this.notifyOrchestrator,
137
+ this.uri,
138
+ this.logger,
139
+ )
140
+ this.writers[id] = writer
136
141
  return writer
137
142
  }
138
143
 
139
144
  createReader(uri: Term): Reader {
140
145
  const ids = uri.value
141
146
 
142
- if (this.readers[ids] === undefined) {
143
- this.readers[ids] = []
147
+ if (this.readers[ids] !== undefined) {
148
+ return this.readers[ids]
144
149
  }
145
- const reader = new ReaderInstance(ids, this.client, this.logger)
146
- this.readers[ids].push(reader)
150
+ const reader = new ReaderInstance(
151
+ ids,
152
+ this.client,
153
+ this.notifyOrchestrator,
154
+ this.logger,
155
+ )
156
+ this.readers[ids] = reader
147
157
  return reader
148
158
  }
149
159
 
150
- async handleOrchMessage(msg: RunnerMessage) {
160
+ async handleOrchMessage(msg: ToRunner) {
151
161
  if (msg.msg) {
152
- this.logger.debug('Handling data msg for ' + msg.msg.channel)
153
- for (const reader of this.readers[msg.msg.channel] || []) {
154
- reader.handleMsg(msg.msg)
155
- }
162
+ this.handleMsg(msg.msg)
156
163
  }
157
164
 
158
165
  if (msg.streamMsg) {
159
- for (const reader of this.readers[msg.streamMsg.channel] || []) {
160
- reader.handleStreamingMessage(msg.streamMsg)
161
- }
166
+ await this.handleStreamMsg(msg.streamMsg)
162
167
  }
163
168
 
164
169
  if (msg.close) {
165
- const uri = msg.close.channel
170
+ await this.handleClose(msg.close)
171
+ }
166
172
 
167
- for (const reader of this.readers[uri] || []) {
168
- reader.close()
169
- }
173
+ if (msg.pipeline) {
174
+ this.handlePipeline(msg.pipeline)
175
+ }
170
176
 
171
- for (const writer of this.writers[uri] || []) {
172
- await writer.close(true)
173
- }
177
+ if (msg.processed) {
178
+ this.handleProcessed(msg.processed)
174
179
  }
180
+ }
175
181
 
176
- if (msg.pipeline) {
177
- try {
178
- // here
179
- const quads = new Parser().parse(msg.pipeline)
180
- this.shapes = extractShapes(
181
- quads,
182
- {
183
- [RDFC.Reader]: (x: Cont) => this.createReader(x.id),
184
- [RDFC.Writer]: (x: Cont) => this.createWriter(x.id),
185
- },
186
- {
187
- [RDFC.Reader]: empty<Cont>(),
188
- [RDFC.Writer]: empty<Cont>(),
189
- },
190
- )
191
- this.quads = quads
192
- } catch (ex: unknown) {
193
- this.logger.error('Pipeline failed: ' + JSON.stringify(ex))
194
- }
182
+ private async handleClose(close: Close) {
183
+ const uri = close.channel
184
+ const r = this.readers[uri]
185
+
186
+ let closed = false
187
+ if (r) {
188
+ r.close()
189
+ closed = true
190
+ }
191
+ const w = this.writers[uri]
192
+ if (w) {
193
+ closed = true
194
+ await w.close(true)
195
+ }
196
+
197
+ if (!closed) {
198
+ this.logger.error(
199
+ `Received a close event for channel ${uri}, but neither reader nor writer is present.`,
200
+ )
201
+ }
202
+ }
203
+
204
+ private handlePipeline(pipeline: string) {
205
+ try {
206
+ const quads = new Parser().parse(pipeline)
207
+ this.shapes = extractShapes(
208
+ quads,
209
+ {
210
+ [RDFC.Reader]: (x: Cont) => this.createReader(x.id),
211
+ [RDFC.Writer]: (x: Cont) => this.createWriter(x.id),
212
+ },
213
+ {
214
+ [RDFC.Reader]: empty<Cont>(),
215
+ [RDFC.Writer]: empty<Cont>(),
216
+ },
217
+ )
218
+ this.quads = quads
219
+ } catch (ex: unknown) {
220
+ this.logger.error('Pipeline failed: ' + JSON.stringify(ex))
221
+ }
222
+ }
223
+
224
+ private handleMsg(msg: ReceivingMessage) {
225
+ this.logger.debug('Handling data msg for ' + msg.channel)
226
+ const r = this.readers[msg.channel]
227
+
228
+ if (r) {
229
+ r.handleMsg(msg)
230
+ } else {
231
+ this.logger.error(
232
+ `Received message for channel ${msg.channel}, but no reader was present.`,
233
+ )
234
+ }
235
+ }
236
+
237
+ private async handleStreamMsg(streamMsg: ReceivingStreamMessage) {
238
+ const r = this.readers[streamMsg.channel]
239
+
240
+ if (r) {
241
+ await r.handleStreamingMessage(streamMsg)
242
+ } else {
243
+ this.logger.error(
244
+ `Received stream message for channel ${streamMsg.channel}, but no reader was present.`,
245
+ )
246
+ }
247
+ }
248
+
249
+ private handleProcessed(processed: LocalAck) {
250
+ const writer = this.writers[processed.channel]
251
+ if (writer) {
252
+ writer.handled()
253
+ } else {
254
+ this.logger.error(
255
+ `Received processed message for channel ${processed.channel}, but no writer was present.`,
256
+ )
195
257
  }
196
258
  }
197
259
  }