lazier_data 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +94 -15
- data/lib/lazier_data/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c57f03c689415868d20b1c1be957564aa4e3b2517aae7dd9d8b51ca24f655f9
|
4
|
+
data.tar.gz: b620c052b871cf8d68d945baea968d834856791e564052f6b26276ab9fd867b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 030f3f706f1b6dacaa8778d7619a8f25b015138799cb90921d3ce5471ae2f2315b6e0382656c0f1198c4c304bd554495846d078323e445c458e60c424ee75fce
|
7
|
+
data.tar.gz: 91bdc0898cc4e93958f0fb40d9fb5b1700b12b8fc1a7eb06ec9a6d3d95990d84611124295bb11b660d4740ee2c9d826c1c7352a130681c423397a9a27062b1e0
|
data/README.md
CHANGED
@@ -1,28 +1,107 @@
|
|
1
|
-
#
|
1
|
+
# LazierData
|
2
2
|
|
3
|
-
|
3
|
+
The incredible productivity of massive laziness.
|
4
4
|
|
5
|
-
|
5
|
+
LazierData allows splitting, filtering, transforming, batching, and basically everything else you might want to do with data. LazierData processing _looks like_ multiple full iterations of the input data, but under the hood it will process incrementally.
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
|
7
|
+
LazierData guarantees each row of input data will be processed by steps in the order the steps are defined. This means not only does LazierData processing _look like_ each step is happening in order, but you can generally think about your data that way.
|
10
8
|
|
11
|
-
|
9
|
+
## Installation
|
12
10
|
|
13
|
-
|
14
|
-
|
11
|
+
In your Gemfile:
|
12
|
+
```ruby
|
13
|
+
source 'https://rubygems.org'
|
14
|
+
gem 'lazier_data'
|
15
15
|
```
|
16
|
+
Then:
|
17
|
+
`bundle`
|
16
18
|
|
17
|
-
|
19
|
+
## Usage
|
18
20
|
|
19
|
-
```
|
20
|
-
|
21
|
+
```ruby
|
22
|
+
# inputs are anything that can be iterated with .each
|
23
|
+
lazier = LazierData.new(inputs)
|
24
|
+
|
25
|
+
# transform
|
26
|
+
lazier.enum do |input, clean_inputs|
|
27
|
+
clean_inputs << input.except(:unwanted_field)
|
28
|
+
end
|
29
|
+
|
30
|
+
# LazierData uses the parameter name from your previous block
|
31
|
+
# to allow further processing
|
32
|
+
# lets filter our clean inputs
|
33
|
+
lazier[:clean_inputs].enum do |input, filtered_inputs|
|
34
|
+
filtered_inputs << input unless input[:skip_me]
|
35
|
+
end
|
36
|
+
|
37
|
+
# let's say our inputs have multiple types
|
38
|
+
# that need to be handled differently
|
39
|
+
lazier[:clean_inputs][:filtered_inputs].enum do |input, type_a, type_b|
|
40
|
+
case input[:type]
|
41
|
+
when :a
|
42
|
+
type_a << input
|
43
|
+
when :b
|
44
|
+
type_b << input
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# LazierData puts the results of your previous steps
|
49
|
+
# in the corresponding places according to your block params
|
50
|
+
type_a = lazier[:clean_inputs][:filtered_inputs][:type_a]
|
51
|
+
type_b = lazier[:clean_inputs][:filtered_inputs][:type_b]
|
52
|
+
|
53
|
+
# we can now process each type separately
|
54
|
+
type_a.each_slice(1000) do |batch_a|
|
55
|
+
ModelA.upsert_all(
|
56
|
+
batch_a,
|
57
|
+
unique_by: %i[unique_field1 unique_field2]
|
58
|
+
)
|
59
|
+
end
|
60
|
+
type_b.each_slice(1000) do |batch_b|
|
61
|
+
ModelB.upsert_all(
|
62
|
+
batch_b,
|
63
|
+
unique_by: %i[unique_field3 unique_field4]
|
64
|
+
)
|
65
|
+
end
|
66
|
+
|
67
|
+
# we can also pull sub items off the inputs
|
68
|
+
# note that in this example we're taking sub_items from all inputs
|
69
|
+
# instead of lazier[:clean_inputs][:filtered_inputs].enum do ...
|
70
|
+
# while it may look like we're reprocessing the entire input list
|
71
|
+
# under the hood LazierData will pass each item through each step
|
72
|
+
lazier.enum do |input, sub_items|
|
73
|
+
# the below is shorthand for:
|
74
|
+
# input[:sub_items].each { |sub_item| sub_items << sub_item }
|
75
|
+
input[:sub_items].each(&sub_items)
|
76
|
+
end
|
77
|
+
|
78
|
+
# you can mutate items directly
|
79
|
+
lazier[:sub_items].each do |sub_item|
|
80
|
+
sub_item[:other_data] = fetch_other_data
|
81
|
+
sub_item[:mutated] = true
|
82
|
+
end
|
83
|
+
|
84
|
+
# items are guaranteed to have passed through previous steps
|
85
|
+
# before reaching later steps
|
86
|
+
lazier[:sub_items].each_slice(1000) do |sub_items|
|
87
|
+
if sub_items.any? { |sub_item| !sub_item[:mutated] }
|
88
|
+
# this will never happen :)
|
89
|
+
raise 'LazierData failed me!'
|
90
|
+
end
|
91
|
+
|
92
|
+
SubItem.upsert_all(
|
93
|
+
sub_items,
|
94
|
+
unique_by: %i[unique_sub_item_field]
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
98
|
+
# at this point in the code
|
99
|
+
# none of the above has actually happened
|
100
|
+
# so, finally, we actually go
|
101
|
+
lazier.go
|
21
102
|
```
|
22
103
|
|
23
|
-
|
24
|
-
|
25
|
-
TODO: Write usage instructions here
|
104
|
+
See [lazier_data_spec.rb](spec/lazier_data_spec.rb) for proven examples.
|
26
105
|
|
27
106
|
## Development
|
28
107
|
|
data/lib/lazier_data/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lazier_data
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tyler Hartland
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-04-
|
10
|
+
date: 2025-04-11 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: 'Allows setting up data processing that works intuitively, but behind
|
13
13
|
the secenes processes lazily.
|
@@ -32,13 +32,13 @@ files:
|
|
32
32
|
- lib/lazier_data/item_store.rb
|
33
33
|
- lib/lazier_data/processor.rb
|
34
34
|
- lib/lazier_data/version.rb
|
35
|
-
homepage: https://github.com/th7/
|
35
|
+
homepage: https://github.com/th7/lazier_data
|
36
36
|
licenses:
|
37
37
|
- MIT
|
38
38
|
metadata:
|
39
39
|
allowed_push_host: https://rubygems.org
|
40
|
-
homepage_uri: https://github.com/th7/
|
41
|
-
source_code_uri: https://github.com/th7/
|
40
|
+
homepage_uri: https://github.com/th7/lazier_data
|
41
|
+
source_code_uri: https://github.com/th7/lazier_data
|
42
42
|
rubygems_mfa_required: 'true'
|
43
43
|
rdoc_options: []
|
44
44
|
require_paths:
|