lazier_data 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c57f03c689415868d20b1c1be957564aa4e3b2517aae7dd9d8b51ca24f655f9
4
- data.tar.gz: b620c052b871cf8d68d945baea968d834856791e564052f6b26276ab9fd867b7
3
+ metadata.gz: 72acf70d853c5d5e20ca99092cb4f032c5fcec0178846710d378a80f701cfef8
4
+ data.tar.gz: 3b23b30f7cd8ad4bc3dad4bbeaa5ac2b00f00ee5779fc7501fbaef477a0a3ff7
5
5
  SHA512:
6
- metadata.gz: 030f3f706f1b6dacaa8778d7619a8f25b015138799cb90921d3ce5471ae2f2315b6e0382656c0f1198c4c304bd554495846d078323e445c458e60c424ee75fce
7
- data.tar.gz: 91bdc0898cc4e93958f0fb40d9fb5b1700b12b8fc1a7eb06ec9a6d3d95990d84611124295bb11b660d4740ee2c9d826c1c7352a130681c423397a9a27062b1e0
6
+ metadata.gz: 147f8439216de43c030b5ff87cfa20f121d94e1ea04a8167ad97a551f0c9bd218ff18f98b3f50f335e8f1a37b68665fa4ac09c10ec0d705287d0039ae5222a62
7
+ data.tar.gz: 20abfe4f10cd0209c291e78fdadfc97377a6425f93848f7eb1668710fc1b48bf30015722f97c0b64d347691944bf7402c367c16787c6277cc5ea5a3e207e1ed8
@@ -16,6 +16,10 @@ class LazierData
16
16
  end
17
17
  end
18
18
 
19
+ def [](key)
20
+ @store[key][ITEMS_KEY]
21
+ end
22
+
19
23
  private
20
24
 
21
25
  def new_layer
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LazierData
4
+ class Processor
5
+ class ChildEach
6
+ NOTHING = :_lazier_data_nothing
7
+
8
+ attr_reader :upstream, :downstream, :batch_size, :input_path, :block
9
+
10
+ def initialize(upstream, downstream, input_path, &block)
11
+ @upstream = upstream
12
+ @downstream = downstream
13
+ @batch_size = nil
14
+ @input_path = input_path
15
+ @block = block
16
+ end
17
+
18
+ def call
19
+ upstream.each do |root_item, item_store|
20
+ output_yielders = build_output_yielders(item_store)
21
+ item_store.dig(*input_path).each do |item|
22
+ block.call(item, *output_yielders)
23
+ end
24
+ downstream << [root_item, item_store]
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def output_path_parts
31
+ @output_path_parts ||= block.parameters[1..].map(&:last)
32
+ end
33
+
34
+ def build_output_yielders(item_store)
35
+ output_path_parts.map do |output_path_part|
36
+ ::Enumerator::Yielder.new do |item|
37
+ storage_path = input_path + [output_path_part]
38
+ item_store.dig(*storage_path) << item
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LazierData
4
+ class Processor
5
+ class ChildEachSlice
6
+ NOTHING = :_lazier_data_nothing
7
+
8
+ attr_reader :upstream, :downstream, :batch_size, :input_path, :block
9
+
10
+ def initialize(upstream, downstream, batch_size, input_path, &block)
11
+ @upstream = upstream
12
+ @downstream = downstream
13
+ @batch_size = batch_size
14
+ @input_path = input_path
15
+ @block = block
16
+ end
17
+
18
+ def call
19
+ slicer.each_slice(batch_size) do |raw_yielded|
20
+ items = raw_yielded.map(&:first)
21
+ yielders = raw_yielded.last[3]
22
+ @block.call(items, *yielders)
23
+ raw_yielded.each do |_, root_item, item_store, _|
24
+ next if root_item == NOTHING
25
+
26
+ downstream << [root_item, item_store]
27
+ end
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ def slicer
34
+ Enumerator.new do |slicer|
35
+ upstream.each do |root_item, item_store|
36
+ yield_items(slicer, root_item, item_store)
37
+ end
38
+ end
39
+ end
40
+
41
+ def yield_items(slicer, root_item, item_store)
42
+ output_yielders = build_output_yielders(item_store)
43
+ items = item_store.dig(*input_path)
44
+ if items.count.zero?
45
+ downstream << [root_item, item_store]
46
+ elsif items.count == 1
47
+ slicer << [items.first, root_item, item_store, output_yielders]
48
+ elsif items.count > 1
49
+ yield_multiple(slicer, items, root_item, item_store, output_yielders)
50
+ end
51
+ end
52
+
53
+ def yield_multiple(slicer, items, root_item, item_store, output_yielders)
54
+ items[0..-2].each do |item|
55
+ slicer << [item, NOTHING, item_store, output_yielders]
56
+ end
57
+
58
+ slicer << [items.last, root_item, item_store, output_yielders]
59
+ end
60
+
61
+ def output_path_parts
62
+ @output_path_parts ||= block.parameters[1..].map(&:last)
63
+ end
64
+
65
+ def build_output_yielders(item_store)
66
+ output_path_parts.map do |output_path_part|
67
+ ::Enumerator::Yielder.new do |item|
68
+ storage_path = input_path + [output_path_part]
69
+ item_store.dig(*storage_path) << item
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LazierData
4
+ class Processor
5
+ class RootEach
6
+ def initialize(upstream, downstream, &block)
7
+ @upstream = upstream
8
+ @downstream = downstream
9
+ @block = block
10
+ end
11
+
12
+ def call
13
+ upstream.each do |root_item, item_store|
14
+ output_yielders = build_output_yielders(item_store)
15
+ block.call(root_item, *output_yielders)
16
+ downstream << [root_item, item_store]
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ attr_reader :upstream, :downstream, :block
23
+
24
+ def output_path_parts
25
+ @output_path_parts ||= block.parameters[1..].map(&:last)
26
+ end
27
+
28
+ def build_output_yielders(item_store)
29
+ output_path_parts.map do |output_path_part|
30
+ ::Enumerator::Yielder.new do |item|
31
+ item_store[output_path_part] << item
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LazierData
4
+ class Processor
5
+ class RootEachSlice
6
+ def initialize(upstream, downstream, batch_size, &block)
7
+ @upstream = upstream
8
+ @downstream = downstream
9
+ @batch_size = batch_size
10
+ @block = block
11
+ end
12
+
13
+ def call
14
+ slicer.each_slice(batch_size) do |raw_yielded|
15
+ root_items = raw_yielded.map(&:first)
16
+ yielders = raw_yielded.last[2]
17
+ @block.call(root_items, *yielders)
18
+ raw_yielded.each do |root_item, item_store, _|
19
+ downstream << [root_item, item_store]
20
+ end
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ attr_reader :upstream, :downstream, :batch_size, :block
27
+
28
+ def slicer
29
+ Enumerator.new do |slicer|
30
+ upstream.each do |root_item, item_store|
31
+ output_yielders = build_output_yielders(item_store)
32
+ slicer << [root_item, item_store, output_yielders]
33
+ end
34
+ end
35
+ end
36
+
37
+ # probably go in module
38
+ def output_path_parts
39
+ @output_path_parts ||= block.parameters[1..].map(&:last)
40
+ end
41
+
42
+ def build_output_yielders(item_store)
43
+ output_path_parts.map do |output_path_part|
44
+ ::Enumerator::Yielder.new do |item|
45
+ item_store[output_path_part] << item
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -1,120 +1,56 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'lazier_data/item_store'
4
+
3
5
  class LazierData
4
6
  class Processor
5
7
  NOTHING = :_lazier_data_nothing
8
+ class << self
9
+ def root(inputs)
10
+ Enumerator.new do |y|
11
+ inputs.each do |item|
12
+ y << [item, ItemStore.new]
13
+ end
14
+ end
15
+ end
16
+ end
6
17
 
7
- attr_reader :upstream, :downstream, :batch_size, :input_path, :output_path_parts, :block
8
-
9
- def initialize(upstream, downstream, batch_size, input_path, output_path_parts, &block)
18
+ def initialize(upstream, batch_size, path, &block)
10
19
  @upstream = upstream
11
20
  @downstream = downstream
12
21
  @batch_size = batch_size
13
- @input_path = input_path
14
- @output_path_parts = output_path_parts
22
+ @path = path
15
23
  @block = block
16
24
  end
17
25
 
18
26
  def call
19
- if batch_size.nil?
20
- process_each
21
- else
22
- process_each_slice
23
- end
24
- end
25
-
26
- private
27
-
28
- def dug # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
29
- Enumerator.new do |dug|
30
- upstream.each do |root_item, item_store|
31
- output_yielders = build_output_yielders(item_store)
32
-
33
- if input_path.empty?
34
- log_and_yield(dug, [root_item, root_item, item_store, output_yielders], :root)
35
- else
36
- items = item_store.dig(*input_path)
37
- if items.count.zero?
38
- log_and_yield(downstream, [root_item, item_store], :no_dug)
39
- elsif items.count == 1
40
- log_and_yield(dug, [items.first, root_item, item_store, output_yielders], :only)
41
- elsif items.count > 1
42
- items[0..-2].each.with_index(1) do |item, item_number|
43
- log_and_yield(dug, [item, NOTHING, item_store, output_yielders], :stored, item_number, items.count)
44
- end
45
-
46
- log_and_yield(dug, [items.last, root_item, item_store, output_yielders], :last, items.count, items.count)
47
- else
48
- raise 'wat'
49
- end
50
- end
27
+ Enumerator.new do |downstream|
28
+ if batch_size.nil?
29
+ build_each_processor(downstream)
30
+ else
31
+ build_each_slice_processor(downstream)
51
32
  end
52
33
  end
53
34
  end
54
35
 
55
- def process_each
56
- dug.each do |item, root_item, item_store, output_yielders|
57
- log_and_call(block, [item, *output_yielders], :item)
58
- log_and_yield(downstream, [root_item, item_store], :after_item)
59
- end
60
- end
61
-
62
- def process_each_slice
63
- dug.each_slice(batch_size) do |raw_yielded|
64
- item_slice = raw_yielded.map(&:first)
65
- log_and_call(block, [item_slice, *raw_yielded.last[3]], :slice)
66
- raw_yielded.each do |_, root_item, item_store, _|
67
- next if root_item == NOTHING
36
+ private
68
37
 
69
- log_and_yield(downstream, [root_item, item_store], :after_slice)
70
- end
71
- end
72
- end
38
+ attr_reader :upstream, :downstream, :batch_size, :path, :block
73
39
 
74
- def build_output_yielders(item_store)
75
- output_path_parts.map do |output_path_part|
76
- ::Enumerator::Yielder.new do |item|
77
- storage_path = input_path + [output_path_part]
78
- logger.debug { "storing item at #{storage_path}: #{item.inspect}" }
79
- item_store.dig(*storage_path) << item
80
- end
40
+ def build_each_processor(downstream)
41
+ if path.empty?
42
+ Processor::RootEach.new(upstream, downstream, &block).call
43
+ else
44
+ Processor::ChildEach.new(upstream, downstream, path, &block).call
81
45
  end
82
46
  end
83
47
 
84
- def log_and_yield(yielder, to_yield, msg_type, item_number = nil, found_count = nil)
85
- logger.debug { build_log_message(msg_type, to_yield, item_number, found_count) }
86
- yielder << to_yield
87
- end
88
-
89
- def log_and_call(callee, to_yield, msg_type)
90
- logger.debug { build_log_message(msg_type, to_yield) }
91
- callee.call(*to_yield)
92
- end
93
-
94
- def build_log_message(msg_type, to_yield, item_number = nil, found_count = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
95
- case msg_type
96
- when :root
97
- "yielding dug root item: #{to_yield[0..1].inspect}"
98
- when :no_dug
99
- "yielding downstream (no dug items from #{input_path.inspect}): #{to_yield[0..0].inspect}"
100
- when :only
101
- "yielding dug stored item (1 of 1) from #{input_path.inspect}: #{to_yield[0..1].inspect}"
102
- when :stored, :last_stored
103
- "yielding dug stored item (#{item_number} of #{found_count}) " \
104
- "from #{input_path.inspect}: #{to_yield[0..1].inspect}"
105
- when :item
106
- "yielding item to #{block.source_location}: #{to_yield[0..0].inspect}"
107
- when :after_item
108
- "yielding downstream (after item): #{to_yield[0..0].inspect}"
109
- when :slice
110
- "yielding slice to #{block.source_location}: #{to_yield[0..0].inspect}"
111
- when :after_slice
112
- "yielding downstream (after slice): #{to_yield[0..0].inspect}"
48
+ def build_each_slice_processor(downstream)
49
+ if path.empty?
50
+ Processor::RootEachSlice.new(upstream, downstream, batch_size, &block).call
51
+ else
52
+ Processor::ChildEachSlice.new(upstream, downstream, batch_size, path, &block).call
113
53
  end
114
54
  end
115
-
116
- def logger
117
- LazierData.logger
118
- end
119
55
  end
120
56
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class LazierData
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.2'
5
5
  end
data/lib/lazier_data.rb CHANGED
@@ -3,32 +3,14 @@
3
3
  require 'lazier_data/child'
4
4
  require 'lazier_data/item_store'
5
5
  require 'lazier_data/processor'
6
+ require 'lazier_data/processor/root_each'
7
+ require 'lazier_data/processor/root_each_slice'
8
+ require 'lazier_data/processor/child_each'
9
+ require 'lazier_data/processor/child_each_slice'
6
10
 
7
11
  class LazierData
8
- class << self
9
- attr_writer :logger
10
-
11
- def logger
12
- @logger ||= null_logger
13
- end
14
-
15
- private
16
-
17
- def null_logger
18
- Class.new do
19
- class << self
20
- def method_missing(*, **, &); end
21
-
22
- def respond_to_missing?
23
- true
24
- end
25
- end
26
- end
27
- end
28
- end
29
-
30
12
  def initialize(inputs)
31
- @initial_processor_builder = proc { root_processor(inputs) }
13
+ @initial_processor_builder = proc { Processor.root(inputs) }
32
14
  @processor_builders = []
33
15
  @children = {}
34
16
  end
@@ -41,9 +23,7 @@ class LazierData
41
23
  end
42
24
 
43
25
  parent.add do |upstream|
44
- Enumerator.new do |downstream|
45
- Processor.new(upstream, downstream, batch_size, my_path, output_path_parts, &block).call
46
- end
26
+ Processor.new(upstream, batch_size, my_path, &block).call
47
27
  end
48
28
  end
49
29
 
@@ -63,12 +43,7 @@ class LazierData
63
43
  @children.fetch(path_part)
64
44
  end
65
45
 
66
- def logger
67
- LazierData.logger
68
- end
69
-
70
46
  def go
71
- logger.info { 'initiating processing' }
72
47
  upstream = @initial_processor_builder.call
73
48
  processors = @processor_builders.map do |processor_builder|
74
49
  upstream = processor_builder.call(upstream)
@@ -77,7 +52,6 @@ class LazierData
77
52
  end
78
53
 
79
54
  def go_stepwise
80
- logger.info { 'initiating stepwise processing' }
81
55
  stepwise_results = []
82
56
  results = @initial_processor_builder.call.to_a
83
57
  stepwise_results << results
@@ -100,14 +74,6 @@ class LazierData
100
74
 
101
75
  private
102
76
 
103
- def root_processor(inputs)
104
- Enumerator.new do |y|
105
- inputs.each do |item|
106
- y << [item, ItemStore.new]
107
- end
108
- end
109
- end
110
-
111
77
  def parent
112
78
  self
113
79
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lazier_data
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tyler Hartland
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-04-11 00:00:00.000000000 Z
10
+ date: 2025-04-12 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: 'Allows setting up data processing that works intuitively, but behind
13
13
  the secenes processes lazily.
@@ -31,6 +31,10 @@ files:
31
31
  - lib/lazier_data/child.rb
32
32
  - lib/lazier_data/item_store.rb
33
33
  - lib/lazier_data/processor.rb
34
+ - lib/lazier_data/processor/child_each.rb
35
+ - lib/lazier_data/processor/child_each_slice.rb
36
+ - lib/lazier_data/processor/root_each.rb
37
+ - lib/lazier_data/processor/root_each_slice.rb
34
38
  - lib/lazier_data/version.rb
35
39
  homepage: https://github.com/th7/lazier_data
36
40
  licenses: