datamix 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/datamix/refinements/array.rb +48 -1
- data/lib/datamix/refinements/csv_table.rb +11 -0
- data/lib/datamix/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2cf4ea62b0a5c3151b16327c1bf13aaa05c9af55
|
4
|
+
data.tar.gz: 0a45edcde7a6c198ceb68b5e69654b1e4cdc77f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 947376b91089d8389c446101b06b0386e247aa4dd372eda9c88ebd399f15817c6326cc9b96a97311fd7b9635a4e9dc9dd0ed3216ae54f4b35a5617aa50868fb5
|
7
|
+
data.tar.gz: 9dfb88a50870a940da4454a1084bdc145af6ba30ff8e3c757e82e8382d22d6a0ec17ade3f356ed6d03c669ccdb27ecdb2c1b38498ccca6855fb23144cc6d4459
|
@@ -33,6 +33,53 @@ module DataMix
|
|
33
33
|
offset -rows
|
34
34
|
end
|
35
35
|
|
36
|
+
def resample(chunk_range, seed: nil)
|
37
|
+
generator = seed ? Random.new(seed) : Random.new
|
38
|
+
|
39
|
+
# Save min value, we will adjust the result to it later
|
40
|
+
min_value = min
|
41
|
+
|
42
|
+
# Split the array to chunks
|
43
|
+
chunks = []
|
44
|
+
clone = dup
|
45
|
+
while !clone.empty? do
|
46
|
+
seam = generator.rand(chunk_range) - 1
|
47
|
+
chunks.push clone.slice! 0..seam
|
48
|
+
end
|
49
|
+
|
50
|
+
# If the last chunk contains one element only, merge with the
|
51
|
+
# previous chunk
|
52
|
+
if chunks.last.size == 1
|
53
|
+
chunks[chunks.size-2].push chunks.last.first
|
54
|
+
chunks = chunks.first chunks.size-1
|
55
|
+
end
|
56
|
+
|
57
|
+
# Shuffle the chunks
|
58
|
+
chunks = chunks.sample chunks.size, random: generator
|
59
|
+
|
60
|
+
# Adjust each chunk so that its beginning connects with the previous
|
61
|
+
# chunk normally. For this we calculate the median change value of the
|
62
|
+
# array.
|
63
|
+
chunks.each_with_index do |chunk, i|
|
64
|
+
next if i == 0
|
65
|
+
connector = chunks[i-1].last
|
66
|
+
|
67
|
+
# Calculate an array of changes, and then take a random sample
|
68
|
+
diff = chunk.each_cons(2).map { |a,b| b-a }.sample random: generator
|
69
|
+
|
70
|
+
delta = diff + connector - chunk.first
|
71
|
+
chunks[i].map! { |val| val+delta }
|
72
|
+
end
|
73
|
+
|
74
|
+
# Merge chunks to a flat array
|
75
|
+
result = chunks.flatten
|
76
|
+
|
77
|
+
# Move the entire array up or down so that its min value is equal to
|
78
|
+
# the original min value recorded at the beginning.
|
79
|
+
diff = result.min - min_value
|
80
|
+
result.map! { |val| val-diff }
|
81
|
+
end
|
82
|
+
|
36
83
|
def round(decimals=0)
|
37
84
|
map { |val| val ? val.round(decimals) : nil }
|
38
85
|
end
|
@@ -62,4 +109,4 @@ module DataMix
|
|
62
109
|
end
|
63
110
|
|
64
111
|
end
|
65
|
-
end
|
112
|
+
end
|
@@ -76,6 +76,17 @@ module DataMix
|
|
76
76
|
delete from
|
77
77
|
end
|
78
78
|
|
79
|
+
# Create a similar data table with resampled data
|
80
|
+
def resample(range, except: [], seed: nil)
|
81
|
+
except = [except] unless except.is_a? Array
|
82
|
+
cols = headers.reject { |h| except.include? h }
|
83
|
+
seed ||= Time.now.to_f
|
84
|
+
|
85
|
+
cols.each do |col|
|
86
|
+
by_col[col] = by_col[col].resample range, seed: seed
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
79
90
|
# Rounds all values in a column
|
80
91
|
def round(col, decimals: 0)
|
81
92
|
by_col[col] = by_col[col].map { |val| val ? val.round(decimals) : nil }
|
data/lib/datamix/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datamix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Ben Shitrit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: terminal-table
|
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
186
|
rubyforge_project:
|
187
|
-
rubygems_version: 2.
|
187
|
+
rubygems_version: 2.5.1
|
188
188
|
signing_key:
|
189
189
|
specification_version: 4
|
190
190
|
summary: DSL for manipulating tabular data
|