fall 0.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/draft.rb +377 -0
- data/lib/fall/version.rb +5 -0
- data/lib/fall.rb +7 -0
- metadata +103 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f15339eae553d84b9a1535bccf55158dcf4226166351045e8caae18cbdca8eea
|
4
|
+
data.tar.gz: c393bd00a8f670b28976f7a23d3cea46c5984fcb85efd89895e2b7a10997dea5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 66dffebddc587b01066dfca71b93c596bef8a89e39d37a86f94fb3da2454497c15b64112ebc5350a0244a8f14feb2841b25c0b31a947ec76c78320ba871db1db
|
7
|
+
data.tar.gz: 565d2fa8e2d68f7e402f027064377bb86fdc26bf1464a69b412dd2f1baaae442d90ffb1fce4f36983490a200fb034cf7da7ddb95f9f9cbe751317b1445b873be
|
data/lib/draft.rb
ADDED
@@ -0,0 +1,377 @@
|
|
1
|
+
require 'active_support/core_ext/hash'
|
2
|
+
require 'base64'
|
3
|
+
require 'zlib'
|
4
|
+
require 'date'
|
5
|
+
require '~/repos/bed/lib/bed.rb'
|
6
|
+
|
7
|
+
options = { delimiter: ' ', code: nil }
|
8
|
+
|
9
|
+
RubyVM::YJIT.enable rescue nil
|
10
|
+
|
11
|
+
# OptionParser.new do |o|
|
12
|
+
# o.on('-d DELIMITER', '--delimiter DELIMITER') do |d|
|
13
|
+
# options[:delimiter] = d
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# o.on('-e CODE', '--eval CODE') do |e|
|
17
|
+
# options[:code] = e
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# end.parse!
|
21
|
+
|
22
|
+
def deflate(...)
|
23
|
+
Zlib::Deflate.deflate(...)
|
24
|
+
end
|
25
|
+
|
26
|
+
def inflate(...)
|
27
|
+
Zlib::Inflate.inflate(...)
|
28
|
+
end
|
29
|
+
|
30
|
+
def deflate!(...)
|
31
|
+
Zlib::Deflate.deflate(...).force_encoding('ASCII-8BIT')
|
32
|
+
end
|
33
|
+
|
34
|
+
def inflate!(...)
|
35
|
+
Zlib::Inflate.inflate(...).force_encoding('ASCII-8BIT')
|
36
|
+
end
|
37
|
+
|
38
|
+
def deflate64(...)
|
39
|
+
Base64.strict_encode64(deflate(...))
|
40
|
+
end
|
41
|
+
|
42
|
+
def inflate64(...)
|
43
|
+
inflate(Base64.decode64(...))
|
44
|
+
end
|
45
|
+
|
46
|
+
def inflate64bed(...)
|
47
|
+
Bed.infer(inflate64(...))
|
48
|
+
end
|
49
|
+
|
50
|
+
module Enumerable
|
51
|
+
def window_timestamps(window_size_seconds = 3600)
|
52
|
+
group_by { |ts| (ts.to_time.to_i / window_size_seconds) * window_size_seconds }
|
53
|
+
.transform_keys { |ts| Time.at(ts) }
|
54
|
+
.map { |k, v| { k => v } }
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def parsedate(...)
|
59
|
+
DateTime.parse(...)
|
60
|
+
end
|
61
|
+
|
62
|
+
alphabet = 'abcdefghijklmnopqrstuvwxyz'
|
63
|
+
Array.class_eval do
|
64
|
+
alphabet.chars.each_with_index do |char, index|
|
65
|
+
define_method char do
|
66
|
+
self[index]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
Enumerator::Lazy.class_eval do
|
72
|
+
define_method :stream do
|
73
|
+
self
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def lines(enum = ARGF.readlines.lazy)
|
78
|
+
enum
|
79
|
+
end
|
80
|
+
|
81
|
+
def identify_streams(operations)
|
82
|
+
case operations
|
83
|
+
in [/^stream/] => stream
|
84
|
+
[:stream, [stream]]
|
85
|
+
in [/^stream/ => stream, *rest]
|
86
|
+
[[:stream, [stream]], *identify_streams(rest)]
|
87
|
+
in [*pre, /^stream/ => stream]
|
88
|
+
[*identify_streams(pre), [:stream, [stream]]]
|
89
|
+
in [*pre, /^stream/ => stream, *rest]
|
90
|
+
[*identify_streams(pre), [:stream, [stream]], *identify_streams(rest)]
|
91
|
+
else
|
92
|
+
[[:record, [*operations]]]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def consolidate_streams(operations)
|
97
|
+
case operations
|
98
|
+
in [*pre, [:stream, op_1], [:stream, op_2], *post]
|
99
|
+
consolidate_streams([*pre, [:stream, [*op_1, *op_2]], *post])
|
100
|
+
else
|
101
|
+
operations
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def groupup(code)
|
106
|
+
code = code.split('|')
|
107
|
+
.map(&:strip)
|
108
|
+
|
109
|
+
consolidate_streams(identify_streams(code))
|
110
|
+
end
|
111
|
+
|
112
|
+
class Transformer
|
113
|
+
attr_accessor :source, :func
|
114
|
+
|
115
|
+
def initialize(source:, func:)
|
116
|
+
@source = source
|
117
|
+
@func = func
|
118
|
+
end
|
119
|
+
|
120
|
+
def to_enum(...)
|
121
|
+
source.map { |*record| func.call(*record) }
|
122
|
+
.reject do |args|
|
123
|
+
case args
|
124
|
+
in [*, :__discard, *] then true
|
125
|
+
in :__discard then true
|
126
|
+
else false
|
127
|
+
end
|
128
|
+
end
|
129
|
+
.to_enum(...)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
class StreamOperation
|
134
|
+
attr_reader :func
|
135
|
+
|
136
|
+
def self.from_code(code)
|
137
|
+
func = instance_eval <<~RUBY
|
138
|
+
proc do |stream|
|
139
|
+
#{code}
|
140
|
+
end
|
141
|
+
RUBY
|
142
|
+
new(func)
|
143
|
+
end
|
144
|
+
|
145
|
+
def initialize(func)
|
146
|
+
@func = func
|
147
|
+
end
|
148
|
+
|
149
|
+
def add_to_pipeline(pipeline)
|
150
|
+
pipeline.add_stream_operation(pipeline)
|
151
|
+
end
|
152
|
+
|
153
|
+
def to_proc
|
154
|
+
@func
|
155
|
+
end
|
156
|
+
|
157
|
+
def call(*args)
|
158
|
+
@func.call(*args)
|
159
|
+
end
|
160
|
+
|
161
|
+
def chain(operation)
|
162
|
+
identity = ->(i) { i }
|
163
|
+
|
164
|
+
composed = (func >> (operation.func >> identity))
|
165
|
+
self.class.new(composed)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class RecordOperation
|
170
|
+
attr_reader :func
|
171
|
+
|
172
|
+
def self.from_code(code)
|
173
|
+
func = instance_eval <<~RUBY
|
174
|
+
proc do |*args|
|
175
|
+
|
176
|
+
results = args.flatten(1).instance_eval { [#{code}] }
|
177
|
+
|
178
|
+
foo = results.zip(args).reject { _1.size != 2 }.map do |result, arg|
|
179
|
+
case result
|
180
|
+
in true then arg
|
181
|
+
in false then :__discard
|
182
|
+
else result
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
RUBY
|
187
|
+
new(func)
|
188
|
+
end
|
189
|
+
|
190
|
+
def initialize(func)
|
191
|
+
@func = func
|
192
|
+
end
|
193
|
+
|
194
|
+
def add_to_pipeline(pipeline)
|
195
|
+
pipeline.add_record_operation(pipeline)
|
196
|
+
end
|
197
|
+
|
198
|
+
def call(*args)
|
199
|
+
@func.call(*args)
|
200
|
+
end
|
201
|
+
|
202
|
+
def chain(operation)
|
203
|
+
identity = ->(i) { i }
|
204
|
+
|
205
|
+
composed = (func >> (operation.func >> identity))
|
206
|
+
self.class.new(composed)
|
207
|
+
end
|
208
|
+
|
209
|
+
alias >> chain
|
210
|
+
|
211
|
+
def to_proc
|
212
|
+
@func
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
class RecordOperationPipeline
|
217
|
+
def initialize(source)
|
218
|
+
@source = source
|
219
|
+
@operation = RecordOperation.new(->(*args) { args })
|
220
|
+
end
|
221
|
+
|
222
|
+
def add_operation(operation)
|
223
|
+
@operation = @operation.chain(operation)
|
224
|
+
end
|
225
|
+
|
226
|
+
def to_enum(...)
|
227
|
+
@source.map { |*record| @operation.call(*record) }
|
228
|
+
.reject do |args|
|
229
|
+
case args
|
230
|
+
in [*, :__discard, *] then true
|
231
|
+
in :__discard then true
|
232
|
+
else false
|
233
|
+
end
|
234
|
+
end
|
235
|
+
.to_enum(...)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
class StreamOperationPipeline
|
240
|
+
def initialize(source)
|
241
|
+
@source = source
|
242
|
+
@operation = StreamOperation.new(->(stream) { stream })
|
243
|
+
end
|
244
|
+
|
245
|
+
def add_operation(operation)
|
246
|
+
@operation = @operation.chain(operation)
|
247
|
+
end
|
248
|
+
|
249
|
+
def to_enum
|
250
|
+
@operation.call(@source)
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
class Pipeline
|
255
|
+
def initialize(source = [].lazy)
|
256
|
+
@source = source
|
257
|
+
@currently_building_pipeline = nil
|
258
|
+
end
|
259
|
+
|
260
|
+
def add_operation(operation)
|
261
|
+
operation.add_to_pipeline(self)
|
262
|
+
end
|
263
|
+
|
264
|
+
def add_record_operation(operation)
|
265
|
+
ensure_current_stage_type(RecordOperationPipeline)
|
266
|
+
@currently_building_pipeline.add_operation(operation)
|
267
|
+
end
|
268
|
+
|
269
|
+
def add_stream_operation(operation)
|
270
|
+
ensure_current_stage_type(StreamOperationPipeline)
|
271
|
+
@currently_building_pipeline.add_operation(operation)
|
272
|
+
end
|
273
|
+
|
274
|
+
def to_enum
|
275
|
+
@currently_building_pipeline.to_enum
|
276
|
+
end
|
277
|
+
|
278
|
+
private
|
279
|
+
|
280
|
+
def ensure_current_stage_type(expected_pipeline_klass)
|
281
|
+
seal_current_stage unless @currently_building_pipeline in ^expected_pipeline_klass | nil
|
282
|
+
@currently_building_pipeline ||= expected_pipeline_klass.new(@source.to_enum)
|
283
|
+
end
|
284
|
+
|
285
|
+
def seal_current_stage
|
286
|
+
@source = @currently_building_pipeline.to_enum
|
287
|
+
@currently_building_pipeline = nil
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
Processor = lambda do |enum, **input_options|
|
292
|
+
|
293
|
+
options.merge(input_options) => { code:, delimiter: }
|
294
|
+
operations = groupup code
|
295
|
+
|
296
|
+
lams = operations.map do |type, pipeline|
|
297
|
+
composed = (
|
298
|
+
case type
|
299
|
+
in :record
|
300
|
+
ops = pipeline.map do |op|
|
301
|
+
proc do |errors, it|
|
302
|
+
code = "[#{op}].flatten(1)"
|
303
|
+
if code
|
304
|
+
begin
|
305
|
+
[errors, it.instance_eval(code)]
|
306
|
+
rescue => e
|
307
|
+
warn e
|
308
|
+
errors << e.full_message
|
309
|
+
[errors, it]
|
310
|
+
end
|
311
|
+
else
|
312
|
+
[errors, it]
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
identity = ->(i) { i }
|
317
|
+
composed = ops.reverse.reduce(identity) { |accum, fn| fn >> accum }
|
318
|
+
composed.curry(2)[[]]
|
319
|
+
in :stream
|
320
|
+
ops = pipeline.map do |op|
|
321
|
+
proc do |stream|
|
322
|
+
begin
|
323
|
+
stream = [stream] unless stream.is_a?(Enumerator)
|
324
|
+
stream.instance_eval(op)
|
325
|
+
rescue => e
|
326
|
+
warn e
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
identity = ->(i) { i }
|
331
|
+
ops.reverse.reduce(identity) { |accum, fn| fn >> accum }
|
332
|
+
else
|
333
|
+
raise "Unknown type: #{type}"
|
334
|
+
end
|
335
|
+
)
|
336
|
+
[type, composed]
|
337
|
+
end
|
338
|
+
|
339
|
+
enum
|
340
|
+
.map(&:chomp)
|
341
|
+
.map { |line| line.split(options[:delimiter]) }
|
342
|
+
.map { |line| line.reduce([0, {}]) { |(index, hash), field| hash[alphabet[index]] = field; [index + 1, hash] } }
|
343
|
+
.map(&:last)
|
344
|
+
.map do |i|
|
345
|
+
Bed.infer(i)
|
346
|
+
end
|
347
|
+
.then { |enum|
|
348
|
+
lams.reduce(enum) { |accum, (type, lam)|
|
349
|
+
case type
|
350
|
+
in :record
|
351
|
+
accum.map(&lam).map(&:last)
|
352
|
+
in :stream
|
353
|
+
[[lam.call(accum.flat_map(&:last))].flatten(1)]
|
354
|
+
else
|
355
|
+
raise "Unknown type: #{type}"
|
356
|
+
end
|
357
|
+
}
|
358
|
+
}
|
359
|
+
.then { |obj|
|
360
|
+
case obj
|
361
|
+
in [Array => arr]
|
362
|
+
arr
|
363
|
+
in [[Enumerator::Lazy => lazy]]
|
364
|
+
lazy
|
365
|
+
in [Enumerator::Lazy => lazy]
|
366
|
+
lazy
|
367
|
+
else
|
368
|
+
obj
|
369
|
+
end
|
370
|
+
}
|
371
|
+
# .map do |record|
|
372
|
+
# record.join(delimiter) if record.respond_to?(:join)
|
373
|
+
# puts record
|
374
|
+
# end
|
375
|
+
end
|
376
|
+
|
377
|
+
# process.call(options, ARGF.each_line.lazy)
|
data/lib/fall/version.rb
ADDED
data/lib/fall.rb
ADDED
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fall
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- " David Gillis"
|
8
|
+
bindir: bin
|
9
|
+
cert_chain: []
|
10
|
+
date: 2025-04-06 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: zeitwerk
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0'
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: '0'
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: minitest
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - "~>"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '5.25'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '5.25'
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: rake
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '13.0'
|
47
|
+
type: :development
|
48
|
+
prerelease: false
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '13.0'
|
54
|
+
- !ruby/object:Gem::Dependency
|
55
|
+
name: irb
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
type: :development
|
62
|
+
prerelease: false
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
description: An attempt at making CLI-based data processing simpler.
|
69
|
+
email:
|
70
|
+
- david@flipmine.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- lib/draft.rb
|
76
|
+
- lib/fall.rb
|
77
|
+
- lib/fall/version.rb
|
78
|
+
homepage: https://github.com/gillisd/fall
|
79
|
+
licenses:
|
80
|
+
- MIT
|
81
|
+
metadata:
|
82
|
+
allowed_push_host: https://rubygems.org
|
83
|
+
homepage_uri: https://github.com/gillisd/fall
|
84
|
+
source_code_uri: https://github.com/gillisd/fall
|
85
|
+
rubygems_mfa_required: 'true'
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 3.3.0
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubygems_version: 3.6.6
|
101
|
+
specification_version: 4
|
102
|
+
summary: A CLI-based stream processing utility
|
103
|
+
test_files: []
|