tiny_pipe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/tiny_pipe.rb +91 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 32e78e9a4423c88ebe178b9013eb14a6a33a867c5eba7af4a12f8f19bf4553db
|
4
|
+
data.tar.gz: ca11e754a546566b2caa18840eb419e08649dcd4286b20db00bf1eedfbe2eaaa
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d1e7758f6ce08561adfa405389f13a0695a46e08b12baf44fce035374878f4c59c326bc6d64fa9956d6cc2bebd5f14759f4846e672a67224417e02a27f6aba13
|
7
|
+
data.tar.gz: 000d3635802eb9b81ff006b1bee53838355f0d7934dec4f4c981953c69ef80d405769b9aaf10b5207f597441ef0811fb0327b4ae6790b584aa533e814e77b2b5
|
data/lib/tiny_pipe.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
# this class wraps a reusable pipeline of Procs, similar to chaining method
|
2
|
+
# calls from the Enumerable module, except you can chain long and complicated
|
3
|
+
# things all in one object and then reuse that pipeline anywhere. sure you could
|
4
|
+
# do basically the same thing by defining a method - not disagreeing there.
|
5
|
+
#
|
6
|
+
# this was originally built to be the basis of a pipeline for processing complex
|
7
|
+
# and inconsistently formatted lines of text from log files.
|
8
|
+
#
|
9
|
+
# Usage:
|
10
|
+
# let's look at a text processing example where you want to process lines from
|
11
|
+
# a log file and convert it into a more helpful data structure. let's start
|
12
|
+
# with this line from a log file:
|
13
|
+
# 2022-11-19T17:34:05.299295Z 25888 INFO loading configuration from ./config.yml\n
|
14
|
+
#
|
15
|
+
# ... and let's say that we want to extract the parts of this line into a
|
16
|
+
# Hash, then we might build a pipeline like this:
|
17
|
+
#
|
18
|
+
# line = "2022-11-19T17:34:05.299295Z 25888 INFO loading configuration from ./config.yml\n"
|
19
|
+
# p = TinyPipe.new(
|
20
|
+
# line,
|
21
|
+
# [
|
22
|
+
# TinyPipe::MAP_STRIP, # the list of procs
|
23
|
+
# ->(line){ line.split(' ', 4),
|
24
|
+
# ->(list){
|
25
|
+
# {
|
26
|
+
# timestamp: DateTime.parse(list[0]),
|
27
|
+
# process_id: list[1].to_i,
|
28
|
+
# log_level: list[2].downcase.to_sym,
|
29
|
+
# log_line: list[3]
|
30
|
+
# }
|
31
|
+
# },
|
32
|
+
# ]
|
33
|
+
# )
|
34
|
+
#
|
35
|
+
# parts = p.pipe(line)
|
36
|
+
#
|
37
|
+
# If you had a whole log file of lines then you just warp the above with
|
38
|
+
# something like this:
|
39
|
+
#
|
40
|
+
# results = log_file.each_line.map{|l| p.pipe(l) }
|
41
|
+
# [ ... an array of Hashes ... ]
|
42
|
+
#
|
43
|
+
# TinyPipe comes with a few common pipeline steps for common text processing
|
44
|
+
# cases that you can pass in as elements of the procs parameter to the
|
45
|
+
# initialize method.
|
46
|
+
class TinyPipe
|
47
|
+
SUPPORTED_INPUT_CLASSES = [String, Array, IO].freeze
|
48
|
+
|
49
|
+
# some pre-build procs for common text processing steps
|
50
|
+
MAP_STRIP = ->(l){ l.strip }
|
51
|
+
MAP_UPCASE = ->(l){ l.upcase }
|
52
|
+
MAP_DOWNCASE = ->(l){ l.downcase }
|
53
|
+
|
54
|
+
JOIN = ->(l){ l.join }
|
55
|
+
JOIN_SPACE = ->(l){ l.join(' ') }
|
56
|
+
JOIN_COMMA = ->(l){ l.join(',') }
|
57
|
+
JOIN_TAB = ->(l){ l.join("\t") }
|
58
|
+
JOIN_PIPE = ->(l){ l.join('|') }
|
59
|
+
|
60
|
+
SPLIT_SPACE = ->(l){ l.split }
|
61
|
+
SPLIT_COMMA = ->(l){ l.split(',') }
|
62
|
+
SPLIT_TAB = ->(l){ l.split("\t") }
|
63
|
+
SPLIT_PIPE = ->(l){ l.split('|') }
|
64
|
+
|
65
|
+
FIELD_FIRST = ->(l){ l.first }
|
66
|
+
FIELD_LAST = ->(l){ l.last }
|
67
|
+
|
68
|
+
SELECT_EMPTY = ->(l){ l.empty? ? l : nil }
|
69
|
+
REJECT_EMPTY = ->(l){ l.empty? ? nil : l }
|
70
|
+
|
71
|
+
# procs: the list of procs to be run over the input lines, in the order in
|
72
|
+
# which they should be run
|
73
|
+
def initialize(procs)
|
74
|
+
@procs = procs
|
75
|
+
end
|
76
|
+
|
77
|
+
# process a single input item, returning the processed item
|
78
|
+
#
|
79
|
+
# NOTE: the pipeline will exit early if any step within the pipeline returns
|
80
|
+
# nil
|
81
|
+
def pipe(item)
|
82
|
+
dup = item.dup
|
83
|
+
|
84
|
+
@procs.each do |p|
|
85
|
+
break if dup.nil?
|
86
|
+
dup = p.call(dup)
|
87
|
+
end
|
88
|
+
|
89
|
+
dup
|
90
|
+
end
|
91
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tiny_pipe
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeff Lunt
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-11-21 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: reusable pipelines of Procs, like one tiny level above Enumerable
|
14
|
+
email: jefflunt@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/tiny_pipe.rb
|
20
|
+
homepage: https://github.com/jefflunt/tiny_pipe
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubygems_version: 3.3.7
|
40
|
+
signing_key:
|
41
|
+
specification_version: 4
|
42
|
+
summary: want reusable Proc-based pipelines, and absolutely nothing else? then this
|
43
|
+
library is for you.
|
44
|
+
test_files: []
|