datamix 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/datamix/refinements/array.rb +48 -1
- data/lib/datamix/refinements/csv_table.rb +11 -0
- data/lib/datamix/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2cf4ea62b0a5c3151b16327c1bf13aaa05c9af55
|
4
|
+
data.tar.gz: 0a45edcde7a6c198ceb68b5e69654b1e4cdc77f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 947376b91089d8389c446101b06b0386e247aa4dd372eda9c88ebd399f15817c6326cc9b96a97311fd7b9635a4e9dc9dd0ed3216ae54f4b35a5617aa50868fb5
|
7
|
+
data.tar.gz: 9dfb88a50870a940da4454a1084bdc145af6ba30ff8e3c757e82e8382d22d6a0ec17ade3f356ed6d03c669ccdb27ecdb2c1b38498ccca6855fb23144cc6d4459
|
@@ -33,6 +33,53 @@ module DataMix
|
|
33
33
|
offset -rows
|
34
34
|
end
|
35
35
|
|
36
|
+
def resample(chunk_range, seed: nil)
|
37
|
+
generator = seed ? Random.new(seed) : Random.new
|
38
|
+
|
39
|
+
# Save min value, we will adjust the result to it later
|
40
|
+
min_value = min
|
41
|
+
|
42
|
+
# Split the array to chunks
|
43
|
+
chunks = []
|
44
|
+
clone = dup
|
45
|
+
while !clone.empty? do
|
46
|
+
seam = generator.rand(chunk_range) - 1
|
47
|
+
chunks.push clone.slice! 0..seam
|
48
|
+
end
|
49
|
+
|
50
|
+
# If the last chunk contains one element only, merge with the
|
51
|
+
# previous chunk
|
52
|
+
if chunks.last.size == 1
|
53
|
+
chunks[chunks.size-2].push chunks.last.first
|
54
|
+
chunks = chunks.first chunks.size-1
|
55
|
+
end
|
56
|
+
|
57
|
+
# Shuffle the chunks
|
58
|
+
chunks = chunks.sample chunks.size, random: generator
|
59
|
+
|
60
|
+
# Adjust each chunk so that its beginning connects with the previous
|
61
|
+
# chunk normally. For this we calculate the median change value of the
|
62
|
+
# array.
|
63
|
+
chunks.each_with_index do |chunk, i|
|
64
|
+
next if i == 0
|
65
|
+
connector = chunks[i-1].last
|
66
|
+
|
67
|
+
# Calculate an array of changes, and then take a random sample
|
68
|
+
diff = chunk.each_cons(2).map { |a,b| b-a }.sample random: generator
|
69
|
+
|
70
|
+
delta = diff + connector - chunk.first
|
71
|
+
chunks[i].map! { |val| val+delta }
|
72
|
+
end
|
73
|
+
|
74
|
+
# Merge chunks to a flat array
|
75
|
+
result = chunks.flatten
|
76
|
+
|
77
|
+
# Move the entire array up or down so that its min value is equal to
|
78
|
+
# the original min value recorded at the beginning.
|
79
|
+
diff = result.min - min_value
|
80
|
+
result.map! { |val| val-diff }
|
81
|
+
end
|
82
|
+
|
36
83
|
def round(decimals=0)
|
37
84
|
map { |val| val ? val.round(decimals) : nil }
|
38
85
|
end
|
@@ -62,4 +109,4 @@ module DataMix
|
|
62
109
|
end
|
63
110
|
|
64
111
|
end
|
65
|
-
end
|
112
|
+
end
|
@@ -76,6 +76,17 @@ module DataMix
|
|
76
76
|
delete from
|
77
77
|
end
|
78
78
|
|
79
|
+
# Create a similar data table with resampled data
|
80
|
+
def resample(range, except: [], seed: nil)
|
81
|
+
except = [except] unless except.is_a? Array
|
82
|
+
cols = headers.reject { |h| except.include? h }
|
83
|
+
seed ||= Time.now.to_f
|
84
|
+
|
85
|
+
cols.each do |col|
|
86
|
+
by_col[col] = by_col[col].resample range, seed: seed
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
79
90
|
# Rounds all values in a column
|
80
91
|
def round(col, decimals: 0)
|
81
92
|
by_col[col] = by_col[col].map { |val| val ? val.round(decimals) : nil }
|
data/lib/datamix/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datamix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Ben Shitrit
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: terminal-table
|
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
186
|
rubyforge_project:
|
187
|
-
rubygems_version: 2.
|
187
|
+
rubygems_version: 2.5.1
|
188
188
|
signing_key:
|
189
189
|
specification_version: 4
|
190
190
|
summary: DSL for manipulating tabular data
|