datamix 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +45 -0
- data/lib/datamix/exceptions.rb +3 -0
- data/lib/datamix/refinements/array.rb +73 -0
- data/lib/datamix/refinements/csv_table.rb +111 -0
- data/lib/datamix/refinements/nil.rb +8 -0
- data/lib/datamix/refinements/object.rb +7 -0
- data/lib/datamix/version.rb +3 -0
- data/lib/datamix.rb +8 -0
- metadata +191 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 26fd664e65a641fa048b990aca4858e10951d3e3
|
4
|
+
data.tar.gz: 5a92b9c11be1d677f6c41b8567fadfcbd3e15315
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e703a60fd3fd5baa210feef386c9f385c86043bfbe5bec9d1eb8a46454773cf09678da7abf3cd2098693b58bd7109b08fcf769de5832014acde5b1df7455ee77
|
7
|
+
data.tar.gz: 2d21ece4e2a523ae0e15b72d642610d342531a93f1ad3c492dfdf61d9180ccc197bba276c48b8f788c7d74977b82a0d376c23375b8e17efc4ea6fd542198ca7c
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
DataMix - DSL for manipulating tabular data
|
2
|
+
==================================================
|
3
|
+
|
4
|
+
[](https://rubygems.org/gems/datamix)
|
5
|
+
[](https://travis-ci.org/DannyBen/datamix)
|
6
|
+
[](https://codeclimate.com/github/DannyBen/datamix)
|
7
|
+
[](https://gemnasium.com/DannyBen/datamix)
|
8
|
+
|
9
|
+
---
|
10
|
+
|
11
|
+
This library refines Ruby's [`CSV::Table`][1] and `Array` object to provide a DSL
|
12
|
+
for manipulating tabular data.
|
13
|
+
|
14
|
+
---
|
15
|
+
|
16
|
+
|
17
|
+
Install
|
18
|
+
--------------------------------------------------
|
19
|
+
|
20
|
+
```
|
21
|
+
$ gem install datamix
|
22
|
+
```
|
23
|
+
|
24
|
+
Or with bundler:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
gem 'datamix'
|
28
|
+
```
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
Usage
|
33
|
+
--------------------------------------------------
|
34
|
+
|
35
|
+
Require the library and enable the refinements with `using DataMix`:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
require 'datamix'
|
39
|
+
using DataMix
|
40
|
+
```
|
41
|
+
|
42
|
+
TODO: Complete documentation.
|
43
|
+
|
44
|
+
|
45
|
+
[1]: https://ruby-doc.org/stdlib-2.3.1/libdoc/csv/rdoc/CSV/Table.html
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module DataMix
|
2
|
+
refine Array do
|
3
|
+
def -(other)
|
4
|
+
if other.respond_to? :each
|
5
|
+
each_with_index do |val, index|
|
6
|
+
self[index] = other[index] ? val - other[index] : nil
|
7
|
+
end
|
8
|
+
else
|
9
|
+
map { |val| val - other }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def +(other)
|
14
|
+
if other.respond_to? :each
|
15
|
+
each_with_index do |val, index|
|
16
|
+
self[index] = other[index] ? val + other[index] : nil
|
17
|
+
end
|
18
|
+
else
|
19
|
+
map { |val| val + other }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def *(other)
|
24
|
+
if other.respond_to? :each
|
25
|
+
each_with_index do |val, index|
|
26
|
+
self[index] = other[index] ? val * other[index] : nil
|
27
|
+
end
|
28
|
+
else
|
29
|
+
map { |val| val * other }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def /(other)
|
34
|
+
if other.respond_to? :each
|
35
|
+
each_with_index do |val, index|
|
36
|
+
self[index] = other[index] ? val / other[index].to_f : nil
|
37
|
+
end
|
38
|
+
else
|
39
|
+
map { |val| val / other.to_f }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def offset(rows)
|
44
|
+
padding = Array.new rows.abs
|
45
|
+
if rows >= 0
|
46
|
+
Array.new(padding).concat self[0...(self.size-rows)]
|
47
|
+
else
|
48
|
+
self[(rows.abs)...(self.size)].concat Array.new(padding)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def prev(rows=1)
|
53
|
+
offset rows
|
54
|
+
end
|
55
|
+
|
56
|
+
def round(decimals=0)
|
57
|
+
map { |val| val ? val.round(decimals) : nil }
|
58
|
+
end
|
59
|
+
|
60
|
+
def uniq?
|
61
|
+
self.length == self.uniq.length
|
62
|
+
end
|
63
|
+
|
64
|
+
def window(window_size, &_block)
|
65
|
+
result = (0..(size-window_size)).map do |index|
|
66
|
+
yield self[index...(index+window_size)]
|
67
|
+
end
|
68
|
+
|
69
|
+
Array.new(window_size-1).concat result
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'terminal-table'
|
2
|
+
|
3
|
+
module DataMix
|
4
|
+
refine CSV::Table do
|
5
|
+
|
6
|
+
# Delete all rows that have one or more empty or nil values.
|
7
|
+
def delete_empty_rows
|
8
|
+
delete_if do |row|
|
9
|
+
row.fields.include? nil or row.include? ''
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Create a new column using a block. This method yields the given block
|
14
|
+
# row by row, providing the index to the block and returns an array
|
15
|
+
# suitable for assigning to a new column.
|
16
|
+
def derive(&_block)
|
17
|
+
by_row.each_with_index.map do |_value, index|
|
18
|
+
yield index
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Extract a regular expression pattern from a column and return a new
|
23
|
+
# column.
|
24
|
+
def extract(pattern, from:)
|
25
|
+
by_row.map { |row| row[from][pattern] }
|
26
|
+
end
|
27
|
+
|
28
|
+
# Iterate over all rows, providing the index to the block.
|
29
|
+
def iterate(&_block)
|
30
|
+
each_with_index do |_value, index|
|
31
|
+
yield index
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Join columns from another data table based on a mutual column
|
36
|
+
def join(other, on:)
|
37
|
+
raise CSVError, "No such column '#{on}' in source" unless headers.include? on
|
38
|
+
raise CSVError, "No such column '#{on}' in other" unless other.headers.include? on
|
39
|
+
raise CSVError, "source[#{on}] is not unique" unless by_col[on].uniq?
|
40
|
+
raise CSVError, "other[#{on}] is not unique" unless other.by_col[on].uniq?
|
41
|
+
|
42
|
+
by_row.each do |row|
|
43
|
+
other_row = other.find { |r| r[on] == row[on] }
|
44
|
+
other.headers.each do |col|
|
45
|
+
next if col == on
|
46
|
+
new_col = headers.include?(col) ? "_#{col}" : col
|
47
|
+
row[new_col] = other_row ? other_row[col] : nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Keep one or more columns, and remove the rest
|
53
|
+
def keep(*desired_cols)
|
54
|
+
headers.each do |col|
|
55
|
+
delete col unless desired_cols.include? col
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Print the first 10 lines
|
60
|
+
def preview
|
61
|
+
show 10
|
62
|
+
end
|
63
|
+
|
64
|
+
# Remove one or more columns
|
65
|
+
def remove(*desired_cols)
|
66
|
+
desired_cols.each do |col|
|
67
|
+
delete col
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Rename a column
|
72
|
+
def rename(from, to:)
|
73
|
+
by_col[to] = by_col[from]
|
74
|
+
delete from
|
75
|
+
end
|
76
|
+
|
77
|
+
# Rounds all values in a column
|
78
|
+
def round(col, decimals: 0)
|
79
|
+
by_col[col] = by_col[col].map { |val| val ? val.round(decimals) : nil }
|
80
|
+
end
|
81
|
+
|
82
|
+
# Save to a CSV or TSV file
|
83
|
+
def save_as(filename)
|
84
|
+
ext = File.extname(filename).downcase
|
85
|
+
data = ext == '.csv' ? to_s : to_tsv
|
86
|
+
File.write filename, data
|
87
|
+
end
|
88
|
+
|
89
|
+
# Print some or all rows
|
90
|
+
def show(rows=:all)
|
91
|
+
puts to_ascii rows
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns a table string
|
95
|
+
def to_ascii(rows=:all)
|
96
|
+
table = rows == :all ? by_row : first(rows)
|
97
|
+
rows = table.map { |row| row.fields }
|
98
|
+
Terminal::Table.new(headings: headers, rows: rows).to_s
|
99
|
+
end
|
100
|
+
|
101
|
+
# Convert table to a TSV string
|
102
|
+
def to_tsv
|
103
|
+
result = [headers.join( "\t")]
|
104
|
+
self.each do |row|
|
105
|
+
result << row.fields.join("\t")
|
106
|
+
end
|
107
|
+
result.join "\n"
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
data/lib/datamix.rb
ADDED
metadata
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: datamix
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Danny Ben Shitrit
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-03-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: docopt
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: terminal-table
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.7'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: runfile
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.8'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.8'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: runfile-tasks
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.4'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.4'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.5'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '3.5'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rdoc
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '5.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '5.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: simplecov
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.13'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.13'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: yard
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0.8'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0.8'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: pry
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0.10'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0.10'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: pry-doc
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0.10'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0.10'
|
153
|
+
description: DSL for manipulating tabular data
|
154
|
+
email: db@dannyben.com
|
155
|
+
executables: []
|
156
|
+
extensions: []
|
157
|
+
extra_rdoc_files: []
|
158
|
+
files:
|
159
|
+
- README.md
|
160
|
+
- lib/datamix.rb
|
161
|
+
- lib/datamix/exceptions.rb
|
162
|
+
- lib/datamix/refinements/array.rb
|
163
|
+
- lib/datamix/refinements/csv_table.rb
|
164
|
+
- lib/datamix/refinements/nil.rb
|
165
|
+
- lib/datamix/refinements/object.rb
|
166
|
+
- lib/datamix/version.rb
|
167
|
+
homepage: https://github.com/DannyBen/datamix
|
168
|
+
licenses:
|
169
|
+
- MIT
|
170
|
+
metadata: {}
|
171
|
+
post_install_message:
|
172
|
+
rdoc_options: []
|
173
|
+
require_paths:
|
174
|
+
- lib
|
175
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - ">="
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: 2.0.0
|
180
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
181
|
+
requirements:
|
182
|
+
- - ">="
|
183
|
+
- !ruby/object:Gem::Version
|
184
|
+
version: '0'
|
185
|
+
requirements: []
|
186
|
+
rubyforge_project:
|
187
|
+
rubygems_version: 2.6.6
|
188
|
+
signing_key:
|
189
|
+
specification_version: 4
|
190
|
+
summary: DSL for manipulating tabular data
|
191
|
+
test_files: []
|