lazier_data 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/lazier_data/item_store.rb +4 -0
- data/lib/lazier_data/processor/child_each.rb +44 -0
- data/lib/lazier_data/processor/child_each_slice.rb +75 -0
- data/lib/lazier_data/processor/root_each.rb +37 -0
- data/lib/lazier_data/processor/root_each_slice.rb +51 -0
- data/lib/lazier_data/processor.rb +30 -94
- data/lib/lazier_data/version.rb +1 -1
- data/lib/lazier_data.rb +6 -40
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 72acf70d853c5d5e20ca99092cb4f032c5fcec0178846710d378a80f701cfef8
|
4
|
+
data.tar.gz: 3b23b30f7cd8ad4bc3dad4bbeaa5ac2b00f00ee5779fc7501fbaef477a0a3ff7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 147f8439216de43c030b5ff87cfa20f121d94e1ea04a8167ad97a551f0c9bd218ff18f98b3f50f335e8f1a37b68665fa4ac09c10ec0d705287d0039ae5222a62
|
7
|
+
data.tar.gz: 20abfe4f10cd0209c291e78fdadfc97377a6425f93848f7eb1668710fc1b48bf30015722f97c0b64d347691944bf7402c367c16787c6277cc5ea5a3e207e1ed8
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LazierData
|
4
|
+
class Processor
|
5
|
+
class ChildEach
|
6
|
+
NOTHING = :_lazier_data_nothing
|
7
|
+
|
8
|
+
attr_reader :upstream, :downstream, :batch_size, :input_path, :block
|
9
|
+
|
10
|
+
def initialize(upstream, downstream, input_path, &block)
|
11
|
+
@upstream = upstream
|
12
|
+
@downstream = downstream
|
13
|
+
@batch_size = nil
|
14
|
+
@input_path = input_path
|
15
|
+
@block = block
|
16
|
+
end
|
17
|
+
|
18
|
+
def call
|
19
|
+
upstream.each do |root_item, item_store|
|
20
|
+
output_yielders = build_output_yielders(item_store)
|
21
|
+
item_store.dig(*input_path).each do |item|
|
22
|
+
block.call(item, *output_yielders)
|
23
|
+
end
|
24
|
+
downstream << [root_item, item_store]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def output_path_parts
|
31
|
+
@output_path_parts ||= block.parameters[1..].map(&:last)
|
32
|
+
end
|
33
|
+
|
34
|
+
def build_output_yielders(item_store)
|
35
|
+
output_path_parts.map do |output_path_part|
|
36
|
+
::Enumerator::Yielder.new do |item|
|
37
|
+
storage_path = input_path + [output_path_part]
|
38
|
+
item_store.dig(*storage_path) << item
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LazierData
|
4
|
+
class Processor
|
5
|
+
class ChildEachSlice
|
6
|
+
NOTHING = :_lazier_data_nothing
|
7
|
+
|
8
|
+
attr_reader :upstream, :downstream, :batch_size, :input_path, :block
|
9
|
+
|
10
|
+
def initialize(upstream, downstream, batch_size, input_path, &block)
|
11
|
+
@upstream = upstream
|
12
|
+
@downstream = downstream
|
13
|
+
@batch_size = batch_size
|
14
|
+
@input_path = input_path
|
15
|
+
@block = block
|
16
|
+
end
|
17
|
+
|
18
|
+
def call
|
19
|
+
slicer.each_slice(batch_size) do |raw_yielded|
|
20
|
+
items = raw_yielded.map(&:first)
|
21
|
+
yielders = raw_yielded.last[3]
|
22
|
+
@block.call(items, *yielders)
|
23
|
+
raw_yielded.each do |_, root_item, item_store, _|
|
24
|
+
next if root_item == NOTHING
|
25
|
+
|
26
|
+
downstream << [root_item, item_store]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def slicer
|
34
|
+
Enumerator.new do |slicer|
|
35
|
+
upstream.each do |root_item, item_store|
|
36
|
+
yield_items(slicer, root_item, item_store)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def yield_items(slicer, root_item, item_store)
|
42
|
+
output_yielders = build_output_yielders(item_store)
|
43
|
+
items = item_store.dig(*input_path)
|
44
|
+
if items.count.zero?
|
45
|
+
downstream << [root_item, item_store]
|
46
|
+
elsif items.count == 1
|
47
|
+
slicer << [items.first, root_item, item_store, output_yielders]
|
48
|
+
elsif items.count > 1
|
49
|
+
yield_multiple(slicer, items, root_item, item_store, output_yielders)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def yield_multiple(slicer, items, root_item, item_store, output_yielders)
|
54
|
+
items[0..-2].each do |item|
|
55
|
+
slicer << [item, NOTHING, item_store, output_yielders]
|
56
|
+
end
|
57
|
+
|
58
|
+
slicer << [items.last, root_item, item_store, output_yielders]
|
59
|
+
end
|
60
|
+
|
61
|
+
def output_path_parts
|
62
|
+
@output_path_parts ||= block.parameters[1..].map(&:last)
|
63
|
+
end
|
64
|
+
|
65
|
+
def build_output_yielders(item_store)
|
66
|
+
output_path_parts.map do |output_path_part|
|
67
|
+
::Enumerator::Yielder.new do |item|
|
68
|
+
storage_path = input_path + [output_path_part]
|
69
|
+
item_store.dig(*storage_path) << item
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LazierData
|
4
|
+
class Processor
|
5
|
+
class RootEach
|
6
|
+
def initialize(upstream, downstream, &block)
|
7
|
+
@upstream = upstream
|
8
|
+
@downstream = downstream
|
9
|
+
@block = block
|
10
|
+
end
|
11
|
+
|
12
|
+
def call
|
13
|
+
upstream.each do |root_item, item_store|
|
14
|
+
output_yielders = build_output_yielders(item_store)
|
15
|
+
block.call(root_item, *output_yielders)
|
16
|
+
downstream << [root_item, item_store]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
attr_reader :upstream, :downstream, :block
|
23
|
+
|
24
|
+
def output_path_parts
|
25
|
+
@output_path_parts ||= block.parameters[1..].map(&:last)
|
26
|
+
end
|
27
|
+
|
28
|
+
def build_output_yielders(item_store)
|
29
|
+
output_path_parts.map do |output_path_part|
|
30
|
+
::Enumerator::Yielder.new do |item|
|
31
|
+
item_store[output_path_part] << item
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LazierData
|
4
|
+
class Processor
|
5
|
+
class RootEachSlice
|
6
|
+
def initialize(upstream, downstream, batch_size, &block)
|
7
|
+
@upstream = upstream
|
8
|
+
@downstream = downstream
|
9
|
+
@batch_size = batch_size
|
10
|
+
@block = block
|
11
|
+
end
|
12
|
+
|
13
|
+
def call
|
14
|
+
slicer.each_slice(batch_size) do |raw_yielded|
|
15
|
+
root_items = raw_yielded.map(&:first)
|
16
|
+
yielders = raw_yielded.last[2]
|
17
|
+
@block.call(root_items, *yielders)
|
18
|
+
raw_yielded.each do |root_item, item_store, _|
|
19
|
+
downstream << [root_item, item_store]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
attr_reader :upstream, :downstream, :batch_size, :block
|
27
|
+
|
28
|
+
def slicer
|
29
|
+
Enumerator.new do |slicer|
|
30
|
+
upstream.each do |root_item, item_store|
|
31
|
+
output_yielders = build_output_yielders(item_store)
|
32
|
+
slicer << [root_item, item_store, output_yielders]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# probably go in module
|
38
|
+
def output_path_parts
|
39
|
+
@output_path_parts ||= block.parameters[1..].map(&:last)
|
40
|
+
end
|
41
|
+
|
42
|
+
def build_output_yielders(item_store)
|
43
|
+
output_path_parts.map do |output_path_part|
|
44
|
+
::Enumerator::Yielder.new do |item|
|
45
|
+
item_store[output_path_part] << item
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -1,120 +1,56 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'lazier_data/item_store'
|
4
|
+
|
3
5
|
class LazierData
|
4
6
|
class Processor
|
5
7
|
NOTHING = :_lazier_data_nothing
|
8
|
+
class << self
|
9
|
+
def root(inputs)
|
10
|
+
Enumerator.new do |y|
|
11
|
+
inputs.each do |item|
|
12
|
+
y << [item, ItemStore.new]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
6
17
|
|
7
|
-
|
8
|
-
|
9
|
-
def initialize(upstream, downstream, batch_size, input_path, output_path_parts, &block)
|
18
|
+
def initialize(upstream, batch_size, path, &block)
|
10
19
|
@upstream = upstream
|
11
20
|
@downstream = downstream
|
12
21
|
@batch_size = batch_size
|
13
|
-
@
|
14
|
-
@output_path_parts = output_path_parts
|
22
|
+
@path = path
|
15
23
|
@block = block
|
16
24
|
end
|
17
25
|
|
18
26
|
def call
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
end
|
25
|
-
|
26
|
-
private
|
27
|
-
|
28
|
-
def dug # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
|
29
|
-
Enumerator.new do |dug|
|
30
|
-
upstream.each do |root_item, item_store|
|
31
|
-
output_yielders = build_output_yielders(item_store)
|
32
|
-
|
33
|
-
if input_path.empty?
|
34
|
-
log_and_yield(dug, [root_item, root_item, item_store, output_yielders], :root)
|
35
|
-
else
|
36
|
-
items = item_store.dig(*input_path)
|
37
|
-
if items.count.zero?
|
38
|
-
log_and_yield(downstream, [root_item, item_store], :no_dug)
|
39
|
-
elsif items.count == 1
|
40
|
-
log_and_yield(dug, [items.first, root_item, item_store, output_yielders], :only)
|
41
|
-
elsif items.count > 1
|
42
|
-
items[0..-2].each.with_index(1) do |item, item_number|
|
43
|
-
log_and_yield(dug, [item, NOTHING, item_store, output_yielders], :stored, item_number, items.count)
|
44
|
-
end
|
45
|
-
|
46
|
-
log_and_yield(dug, [items.last, root_item, item_store, output_yielders], :last, items.count, items.count)
|
47
|
-
else
|
48
|
-
raise 'wat'
|
49
|
-
end
|
50
|
-
end
|
27
|
+
Enumerator.new do |downstream|
|
28
|
+
if batch_size.nil?
|
29
|
+
build_each_processor(downstream)
|
30
|
+
else
|
31
|
+
build_each_slice_processor(downstream)
|
51
32
|
end
|
52
33
|
end
|
53
34
|
end
|
54
35
|
|
55
|
-
|
56
|
-
dug.each do |item, root_item, item_store, output_yielders|
|
57
|
-
log_and_call(block, [item, *output_yielders], :item)
|
58
|
-
log_and_yield(downstream, [root_item, item_store], :after_item)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def process_each_slice
|
63
|
-
dug.each_slice(batch_size) do |raw_yielded|
|
64
|
-
item_slice = raw_yielded.map(&:first)
|
65
|
-
log_and_call(block, [item_slice, *raw_yielded.last[3]], :slice)
|
66
|
-
raw_yielded.each do |_, root_item, item_store, _|
|
67
|
-
next if root_item == NOTHING
|
36
|
+
private
|
68
37
|
|
69
|
-
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
38
|
+
attr_reader :upstream, :downstream, :batch_size, :path, :block
|
73
39
|
|
74
|
-
def
|
75
|
-
|
76
|
-
::
|
77
|
-
|
78
|
-
|
79
|
-
item_store.dig(*storage_path) << item
|
80
|
-
end
|
40
|
+
def build_each_processor(downstream)
|
41
|
+
if path.empty?
|
42
|
+
Processor::RootEach.new(upstream, downstream, &block).call
|
43
|
+
else
|
44
|
+
Processor::ChildEach.new(upstream, downstream, path, &block).call
|
81
45
|
end
|
82
46
|
end
|
83
47
|
|
84
|
-
def
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
def log_and_call(callee, to_yield, msg_type)
|
90
|
-
logger.debug { build_log_message(msg_type, to_yield) }
|
91
|
-
callee.call(*to_yield)
|
92
|
-
end
|
93
|
-
|
94
|
-
def build_log_message(msg_type, to_yield, item_number = nil, found_count = nil) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity
|
95
|
-
case msg_type
|
96
|
-
when :root
|
97
|
-
"yielding dug root item: #{to_yield[0..1].inspect}"
|
98
|
-
when :no_dug
|
99
|
-
"yielding downstream (no dug items from #{input_path.inspect}): #{to_yield[0..0].inspect}"
|
100
|
-
when :only
|
101
|
-
"yielding dug stored item (1 of 1) from #{input_path.inspect}: #{to_yield[0..1].inspect}"
|
102
|
-
when :stored, :last_stored
|
103
|
-
"yielding dug stored item (#{item_number} of #{found_count}) " \
|
104
|
-
"from #{input_path.inspect}: #{to_yield[0..1].inspect}"
|
105
|
-
when :item
|
106
|
-
"yielding item to #{block.source_location}: #{to_yield[0..0].inspect}"
|
107
|
-
when :after_item
|
108
|
-
"yielding downstream (after item): #{to_yield[0..0].inspect}"
|
109
|
-
when :slice
|
110
|
-
"yielding slice to #{block.source_location}: #{to_yield[0..0].inspect}"
|
111
|
-
when :after_slice
|
112
|
-
"yielding downstream (after slice): #{to_yield[0..0].inspect}"
|
48
|
+
def build_each_slice_processor(downstream)
|
49
|
+
if path.empty?
|
50
|
+
Processor::RootEachSlice.new(upstream, downstream, batch_size, &block).call
|
51
|
+
else
|
52
|
+
Processor::ChildEachSlice.new(upstream, downstream, batch_size, path, &block).call
|
113
53
|
end
|
114
54
|
end
|
115
|
-
|
116
|
-
def logger
|
117
|
-
LazierData.logger
|
118
|
-
end
|
119
55
|
end
|
120
56
|
end
|
data/lib/lazier_data/version.rb
CHANGED
data/lib/lazier_data.rb
CHANGED
@@ -3,32 +3,14 @@
|
|
3
3
|
require 'lazier_data/child'
|
4
4
|
require 'lazier_data/item_store'
|
5
5
|
require 'lazier_data/processor'
|
6
|
+
require 'lazier_data/processor/root_each'
|
7
|
+
require 'lazier_data/processor/root_each_slice'
|
8
|
+
require 'lazier_data/processor/child_each'
|
9
|
+
require 'lazier_data/processor/child_each_slice'
|
6
10
|
|
7
11
|
class LazierData
|
8
|
-
class << self
|
9
|
-
attr_writer :logger
|
10
|
-
|
11
|
-
def logger
|
12
|
-
@logger ||= null_logger
|
13
|
-
end
|
14
|
-
|
15
|
-
private
|
16
|
-
|
17
|
-
def null_logger
|
18
|
-
Class.new do
|
19
|
-
class << self
|
20
|
-
def method_missing(*, **, &); end
|
21
|
-
|
22
|
-
def respond_to_missing?
|
23
|
-
true
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
12
|
def initialize(inputs)
|
31
|
-
@initial_processor_builder = proc {
|
13
|
+
@initial_processor_builder = proc { Processor.root(inputs) }
|
32
14
|
@processor_builders = []
|
33
15
|
@children = {}
|
34
16
|
end
|
@@ -41,9 +23,7 @@ class LazierData
|
|
41
23
|
end
|
42
24
|
|
43
25
|
parent.add do |upstream|
|
44
|
-
|
45
|
-
Processor.new(upstream, downstream, batch_size, my_path, output_path_parts, &block).call
|
46
|
-
end
|
26
|
+
Processor.new(upstream, batch_size, my_path, &block).call
|
47
27
|
end
|
48
28
|
end
|
49
29
|
|
@@ -63,12 +43,7 @@ class LazierData
|
|
63
43
|
@children.fetch(path_part)
|
64
44
|
end
|
65
45
|
|
66
|
-
def logger
|
67
|
-
LazierData.logger
|
68
|
-
end
|
69
|
-
|
70
46
|
def go
|
71
|
-
logger.info { 'initiating processing' }
|
72
47
|
upstream = @initial_processor_builder.call
|
73
48
|
processors = @processor_builders.map do |processor_builder|
|
74
49
|
upstream = processor_builder.call(upstream)
|
@@ -77,7 +52,6 @@ class LazierData
|
|
77
52
|
end
|
78
53
|
|
79
54
|
def go_stepwise
|
80
|
-
logger.info { 'initiating stepwise processing' }
|
81
55
|
stepwise_results = []
|
82
56
|
results = @initial_processor_builder.call.to_a
|
83
57
|
stepwise_results << results
|
@@ -100,14 +74,6 @@ class LazierData
|
|
100
74
|
|
101
75
|
private
|
102
76
|
|
103
|
-
def root_processor(inputs)
|
104
|
-
Enumerator.new do |y|
|
105
|
-
inputs.each do |item|
|
106
|
-
y << [item, ItemStore.new]
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
77
|
def parent
|
112
78
|
self
|
113
79
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lazier_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tyler Hartland
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-04-
|
10
|
+
date: 2025-04-12 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: 'Allows setting up data processing that works intuitively, but behind
|
13
13
|
the secenes processes lazily.
|
@@ -31,6 +31,10 @@ files:
|
|
31
31
|
- lib/lazier_data/child.rb
|
32
32
|
- lib/lazier_data/item_store.rb
|
33
33
|
- lib/lazier_data/processor.rb
|
34
|
+
- lib/lazier_data/processor/child_each.rb
|
35
|
+
- lib/lazier_data/processor/child_each_slice.rb
|
36
|
+
- lib/lazier_data/processor/root_each.rb
|
37
|
+
- lib/lazier_data/processor/root_each_slice.rb
|
34
38
|
- lib/lazier_data/version.rb
|
35
39
|
homepage: https://github.com/th7/lazier_data
|
36
40
|
licenses:
|