tiny_pipe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/tiny_pipe.rb +91 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 32e78e9a4423c88ebe178b9013eb14a6a33a867c5eba7af4a12f8f19bf4553db
4
+ data.tar.gz: ca11e754a546566b2caa18840eb419e08649dcd4286b20db00bf1eedfbe2eaaa
5
+ SHA512:
6
+ metadata.gz: d1e7758f6ce08561adfa405389f13a0695a46e08b12baf44fce035374878f4c59c326bc6d64fa9956d6cc2bebd5f14759f4846e672a67224417e02a27f6aba13
7
+ data.tar.gz: 000d3635802eb9b81ff006b1bee53838355f0d7934dec4f4c981953c69ef80d405769b9aaf10b5207f597441ef0811fb0327b4ae6790b584aa533e814e77b2b5
data/lib/tiny_pipe.rb ADDED
@@ -0,0 +1,91 @@
1
+ # this class wraps a reusable pipeline of Procs, similar to chaining method
2
+ # calls from the Enumerable module, except you can chain long and complicated
3
+ # things all in one object and then reuse that pipeline anywhere. sure you could
4
+ # do basically the same thing by defining a method - not disagreeing there.
5
+ #
6
+ # this was originally built to be the basis of a pipeline for processing complex
7
+ # and inconsistently formatted lines of text from log files.
8
+ #
9
+ # Usage:
10
+ # let's look at a text processing example where you want to process lines from
11
+ # a log file and convert it into a more helpful data structure. let's start
12
+ # with this line from a log file:
13
+ # 2022-11-19T17:34:05.299295Z 25888 INFO loading configuration from ./config.yml\n
14
+ #
15
+ # ... and let's say that we want to extract the parts of this line into a
16
+ # Hash, then we might build a pipeline like this:
17
+ #
18
+ # line = "2022-11-19T17:34:05.299295Z 25888 INFO loading configuration from ./config.yml\n"
19
+ # p = TinyPipe.new(
20
+ # line,
21
+ # [
22
+ # TinyPipe::MAP_STRIP, # the list of procs
23
+ # ->(line){ line.split(' ', 4),
24
+ # ->(list){
25
+ # {
26
+ # timestamp: DateTime.parse(list[0]),
27
+ # process_id: list[1].to_i,
28
+ # log_level: list[2].downcase.to_sym,
29
+ # log_line: list[3]
30
+ # }
31
+ # },
32
+ # ]
33
+ # )
34
+ #
35
+ # parts = p.pipe(line)
36
+ #
37
+ # If you had a whole log file of lines then you just warp the above with
38
+ # something like this:
39
+ #
40
+ # results = log_file.each_line.map{|l| p.pipe(l) }
41
+ # [ ... an array of Hashes ... ]
42
+ #
43
+ # TinyPipe comes with a few common pipeline steps for common text processing
44
+ # cases that you can pass in as elements of the procs parameter to the
45
+ # initialize method.
46
+ class TinyPipe
47
+ SUPPORTED_INPUT_CLASSES = [String, Array, IO].freeze
48
+
49
+ # some pre-build procs for common text processing steps
50
+ MAP_STRIP = ->(l){ l.strip }
51
+ MAP_UPCASE = ->(l){ l.upcase }
52
+ MAP_DOWNCASE = ->(l){ l.downcase }
53
+
54
+ JOIN = ->(l){ l.join }
55
+ JOIN_SPACE = ->(l){ l.join(' ') }
56
+ JOIN_COMMA = ->(l){ l.join(',') }
57
+ JOIN_TAB = ->(l){ l.join("\t") }
58
+ JOIN_PIPE = ->(l){ l.join('|') }
59
+
60
+ SPLIT_SPACE = ->(l){ l.split }
61
+ SPLIT_COMMA = ->(l){ l.split(',') }
62
+ SPLIT_TAB = ->(l){ l.split("\t") }
63
+ SPLIT_PIPE = ->(l){ l.split('|') }
64
+
65
+ FIELD_FIRST = ->(l){ l.first }
66
+ FIELD_LAST = ->(l){ l.last }
67
+
68
+ SELECT_EMPTY = ->(l){ l.empty? ? l : nil }
69
+ REJECT_EMPTY = ->(l){ l.empty? ? nil : l }
70
+
71
+ # procs: the list of procs to be run over the input lines, in the order in
72
+ # which they should be run
73
+ def initialize(procs)
74
+ @procs = procs
75
+ end
76
+
77
+ # process a single input item, returning the processed item
78
+ #
79
+ # NOTE: the pipeline will exit early if any step within the pipeline returns
80
+ # nil
81
+ def pipe(item)
82
+ dup = item.dup
83
+
84
+ @procs.each do |p|
85
+ break if dup.nil?
86
+ dup = p.call(dup)
87
+ end
88
+
89
+ dup
90
+ end
91
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tiny_pipe
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Jeff Lunt
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-11-21 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: reusable pipelines of Procs, like one tiny level above Enumerable
14
+ email: jefflunt@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/tiny_pipe.rb
20
+ homepage: https://github.com/jefflunt/tiny_pipe
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubygems_version: 3.3.7
40
+ signing_key:
41
+ specification_version: 4
42
+ summary: want reusable Proc-based pipelines, and absolutely nothing else? then this
43
+ library is for you.
44
+ test_files: []