tiny_pipe 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/tiny_pipe.rb +91 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 32e78e9a4423c88ebe178b9013eb14a6a33a867c5eba7af4a12f8f19bf4553db
|
4
|
+
data.tar.gz: ca11e754a546566b2caa18840eb419e08649dcd4286b20db00bf1eedfbe2eaaa
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d1e7758f6ce08561adfa405389f13a0695a46e08b12baf44fce035374878f4c59c326bc6d64fa9956d6cc2bebd5f14759f4846e672a67224417e02a27f6aba13
|
7
|
+
data.tar.gz: 000d3635802eb9b81ff006b1bee53838355f0d7934dec4f4c981953c69ef80d405769b9aaf10b5207f597441ef0811fb0327b4ae6790b584aa533e814e77b2b5
|
data/lib/tiny_pipe.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
# this class wraps a reusable pipeline of Procs, similar to chaining method
|
2
|
+
# calls from the Enumerable module, except you can chain long and complicated
|
3
|
+
# things all in one object and then reuse that pipeline anywhere. sure you could
|
4
|
+
# do basically the same thing by defining a method - not disagreeing there.
|
5
|
+
#
|
6
|
+
# this was originally built to be the basis of a pipeline for processing complex
|
7
|
+
# and inconsistently formatted lines of text from log files.
|
8
|
+
#
|
9
|
+
# Usage:
|
10
|
+
# let's look at a text processing example where you want to process lines from
|
11
|
+
# a log file and convert it into a more helpful data structure. let's start
|
12
|
+
# with this line from a log file:
|
13
|
+
# 2022-11-19T17:34:05.299295Z 25888 INFO loading configuration from ./config.yml\n
|
14
|
+
#
|
15
|
+
# ... and let's say that we want to extract the parts of this line into a
|
16
|
+
# Hash, then we might build a pipeline like this:
|
17
|
+
#
|
18
|
+
# line = "2022-11-19T17:34:05.299295Z 25888 INFO loading configuration from ./config.yml\n"
|
19
|
+
# p = TinyPipe.new(
|
20
|
+
# line,
|
21
|
+
# [
|
22
|
+
# TinyPipe::MAP_STRIP, # the list of procs
|
23
|
+
# ->(line){ line.split(' ', 4),
|
24
|
+
# ->(list){
|
25
|
+
# {
|
26
|
+
# timestamp: DateTime.parse(list[0]),
|
27
|
+
# process_id: list[1].to_i,
|
28
|
+
# log_level: list[2].downcase.to_sym,
|
29
|
+
# log_line: list[3]
|
30
|
+
# }
|
31
|
+
# },
|
32
|
+
# ]
|
33
|
+
# )
|
34
|
+
#
|
35
|
+
# parts = p.pipe(line)
|
36
|
+
#
|
37
|
+
# If you had a whole log file of lines then you just warp the above with
|
38
|
+
# something like this:
|
39
|
+
#
|
40
|
+
# results = log_file.each_line.map{|l| p.pipe(l) }
|
41
|
+
# [ ... an array of Hashes ... ]
|
42
|
+
#
|
43
|
+
# TinyPipe comes with a few common pipeline steps for common text processing
|
44
|
+
# cases that you can pass in as elements of the procs parameter to the
|
45
|
+
# initialize method.
|
46
|
+
class TinyPipe
|
47
|
+
SUPPORTED_INPUT_CLASSES = [String, Array, IO].freeze
|
48
|
+
|
49
|
+
# some pre-build procs for common text processing steps
|
50
|
+
MAP_STRIP = ->(l){ l.strip }
|
51
|
+
MAP_UPCASE = ->(l){ l.upcase }
|
52
|
+
MAP_DOWNCASE = ->(l){ l.downcase }
|
53
|
+
|
54
|
+
JOIN = ->(l){ l.join }
|
55
|
+
JOIN_SPACE = ->(l){ l.join(' ') }
|
56
|
+
JOIN_COMMA = ->(l){ l.join(',') }
|
57
|
+
JOIN_TAB = ->(l){ l.join("\t") }
|
58
|
+
JOIN_PIPE = ->(l){ l.join('|') }
|
59
|
+
|
60
|
+
SPLIT_SPACE = ->(l){ l.split }
|
61
|
+
SPLIT_COMMA = ->(l){ l.split(',') }
|
62
|
+
SPLIT_TAB = ->(l){ l.split("\t") }
|
63
|
+
SPLIT_PIPE = ->(l){ l.split('|') }
|
64
|
+
|
65
|
+
FIELD_FIRST = ->(l){ l.first }
|
66
|
+
FIELD_LAST = ->(l){ l.last }
|
67
|
+
|
68
|
+
SELECT_EMPTY = ->(l){ l.empty? ? l : nil }
|
69
|
+
REJECT_EMPTY = ->(l){ l.empty? ? nil : l }
|
70
|
+
|
71
|
+
# procs: the list of procs to be run over the input lines, in the order in
|
72
|
+
# which they should be run
|
73
|
+
def initialize(procs)
|
74
|
+
@procs = procs
|
75
|
+
end
|
76
|
+
|
77
|
+
# process a single input item, returning the processed item
|
78
|
+
#
|
79
|
+
# NOTE: the pipeline will exit early if any step within the pipeline returns
|
80
|
+
# nil
|
81
|
+
def pipe(item)
|
82
|
+
dup = item.dup
|
83
|
+
|
84
|
+
@procs.each do |p|
|
85
|
+
break if dup.nil?
|
86
|
+
dup = p.call(dup)
|
87
|
+
end
|
88
|
+
|
89
|
+
dup
|
90
|
+
end
|
91
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tiny_pipe
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeff Lunt
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-11-21 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: reusable pipelines of Procs, like one tiny level above Enumerable
|
14
|
+
email: jefflunt@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/tiny_pipe.rb
|
20
|
+
homepage: https://github.com/jefflunt/tiny_pipe
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubygems_version: 3.3.7
|
40
|
+
signing_key:
|
41
|
+
specification_version: 4
|
42
|
+
summary: want reusable Proc-based pipelines, and absolutely nothing else? then this
|
43
|
+
library is for you.
|
44
|
+
test_files: []
|