datamix 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2e25311621834c0792028b34973e716c1dc0bce2
4
- data.tar.gz: df08942707700a403c16e55bf0cd39659520ec7c
3
+ metadata.gz: 2cf4ea62b0a5c3151b16327c1bf13aaa05c9af55
4
+ data.tar.gz: 0a45edcde7a6c198ceb68b5e69654b1e4cdc77f3
5
5
  SHA512:
6
- metadata.gz: 643e11e9a83a4c498ca2c042b59780eed0e95a26f07cb6f2624e294b78812c6abeb6669bc897cccb21b65119d906f751b3f671e4696b7d90b802df9a0399d933
7
- data.tar.gz: 1c9dc5162696b3b6de6110bee2d0cf183ceec3f913186bc7887554a6d716255212b4761514da3762866b1a668d41c0f55ac2d1a98183d5d86265d36948a82e58
6
+ metadata.gz: 947376b91089d8389c446101b06b0386e247aa4dd372eda9c88ebd399f15817c6326cc9b96a97311fd7b9635a4e9dc9dd0ed3216ae54f4b35a5617aa50868fb5
7
+ data.tar.gz: 9dfb88a50870a940da4454a1084bdc145af6ba30ff8e3c757e82e8382d22d6a0ec17ade3f356ed6d03c669ccdb27ecdb2c1b38498ccca6855fb23144cc6d4459
@@ -33,6 +33,53 @@ module DataMix
33
33
  offset -rows
34
34
  end
35
35
 
36
+ def resample(chunk_range, seed: nil)
37
+ generator = seed ? Random.new(seed) : Random.new
38
+
39
+ # Save min value, we will adjust the result to it later
40
+ min_value = min
41
+
42
+ # Split the array to chunks
43
+ chunks = []
44
+ clone = dup
45
+ while !clone.empty? do
46
+ seam = generator.rand(chunk_range) - 1
47
+ chunks.push clone.slice! 0..seam
48
+ end
49
+
50
+ # If the last chunk contains one element only, merge with the
51
+ # previous chunk
52
+ if chunks.last.size == 1
53
+ chunks[chunks.size-2].push chunks.last.first
54
+ chunks = chunks.first chunks.size-1
55
+ end
56
+
57
+ # Shuffle the chunks
58
+ chunks = chunks.sample chunks.size, random: generator
59
+
60
+ # Adjust each chunk so that its beginning connects with the previous
61
+ # chunk normally. For this we calculate the median change value of the
62
+ # array.
63
+ chunks.each_with_index do |chunk, i|
64
+ next if i == 0
65
+ connector = chunks[i-1].last
66
+
67
+ # Calculate an array of changes, and then take a random sample
68
+ diff = chunk.each_cons(2).map { |a,b| b-a }.sample random: generator
69
+
70
+ delta = diff + connector - chunk.first
71
+ chunks[i].map! { |val| val+delta }
72
+ end
73
+
74
+ # Merge chunks to a flat array
75
+ result = chunks.flatten
76
+
77
+ # Move the entire array up or down so that its min value is equal to
78
+ # the original min value recorded at the beginning.
79
+ diff = result.min - min_value
80
+ result.map! { |val| val-diff }
81
+ end
82
+
36
83
  def round(decimals=0)
37
84
  map { |val| val ? val.round(decimals) : nil }
38
85
  end
@@ -62,4 +109,4 @@ module DataMix
62
109
  end
63
110
 
64
111
  end
65
- end
112
+ end
@@ -76,6 +76,17 @@ module DataMix
76
76
  delete from
77
77
  end
78
78
 
79
+ # Create a similar data table with resampled data
80
+ def resample(range, except: [], seed: nil)
81
+ except = [except] unless except.is_a? Array
82
+ cols = headers.reject { |h| except.include? h }
83
+ seed ||= Time.now.to_f
84
+
85
+ cols.each do |col|
86
+ by_col[col] = by_col[col].resample range, seed: seed
87
+ end
88
+ end
89
+
79
90
  # Rounds all values in a column
80
91
  def round(col, decimals: 0)
81
92
  by_col[col] = by_col[col].map { |val| val ? val.round(decimals) : nil }
@@ -1,3 +1,3 @@
1
1
  module DataMix
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datamix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-10 00:00:00.000000000 Z
11
+ date: 2017-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: terminal-table
@@ -184,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
184
184
  version: '0'
185
185
  requirements: []
186
186
  rubyforge_project:
187
- rubygems_version: 2.6.6
187
+ rubygems_version: 2.5.1
188
188
  signing_key:
189
189
  specification_version: 4
190
190
  summary: DSL for manipulating tabular data