datamix 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 26fd664e65a641fa048b990aca4858e10951d3e3
4
+ data.tar.gz: 5a92b9c11be1d677f6c41b8567fadfcbd3e15315
5
+ SHA512:
6
+ metadata.gz: e703a60fd3fd5baa210feef386c9f385c86043bfbe5bec9d1eb8a46454773cf09678da7abf3cd2098693b58bd7109b08fcf769de5832014acde5b1df7455ee77
7
+ data.tar.gz: 2d21ece4e2a523ae0e15b72d642610d342531a93f1ad3c492dfdf61d9180ccc197bba276c48b8f788c7d74977b82a0d376c23375b8e17efc4ea6fd542198ca7c
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ DataMix - DSL for manipulating tabular data
2
+ ==================================================
3
+
4
+ [![Gem](https://img.shields.io/gem/v/datamix.svg?style=flat-square)](https://rubygems.org/gems/datamix)
5
+ [![Travis](https://img.shields.io/travis/DannyBen/datamix.svg?style=flat-square)](https://travis-ci.org/DannyBen/datamix)
6
+ [![Code Climate](https://img.shields.io/codeclimate/github/DannyBen/datamix.svg?style=flat-square)](https://codeclimate.com/github/DannyBen/datamix)
7
+ [![Gemnasium](https://img.shields.io/gemnasium/DannyBen/datamix.svg?style=flat-square)](https://gemnasium.com/DannyBen/datamix)
8
+
9
+ ---
10
+
11
+ This library refines Ruby's [`CSV::Table`][1] and `Array` object to provide a DSL
12
+ for manipulating tabular data.
13
+
14
+ ---
15
+
16
+
17
+ Install
18
+ --------------------------------------------------
19
+
20
+ ```
21
+ $ gem install datamix
22
+ ```
23
+
24
+ Or with bundler:
25
+
26
+ ```ruby
27
+ gem 'datamix'
28
+ ```
29
+
30
+
31
+
32
+ Usage
33
+ --------------------------------------------------
34
+
35
+ Require the library and enable the refinements with `using DataMix`:
36
+
37
+ ```ruby
38
+ require 'datamix'
39
+ using DataMix
40
+ ```
41
+
42
+ TODO: Complete documentation.
43
+
44
+
45
+ [1]: https://ruby-doc.org/stdlib-2.3.1/libdoc/csv/rdoc/CSV/Table.html
@@ -0,0 +1,3 @@
1
+ module DataMix
2
+ class CSVError < StandardError; end
3
+ end
@@ -0,0 +1,73 @@
1
+ module DataMix
2
+ refine Array do
3
+ def -(other)
4
+ if other.respond_to? :each
5
+ each_with_index do |val, index|
6
+ self[index] = other[index] ? val - other[index] : nil
7
+ end
8
+ else
9
+ map { |val| val - other }
10
+ end
11
+ end
12
+
13
+ def +(other)
14
+ if other.respond_to? :each
15
+ each_with_index do |val, index|
16
+ self[index] = other[index] ? val + other[index] : nil
17
+ end
18
+ else
19
+ map { |val| val + other }
20
+ end
21
+ end
22
+
23
+ def *(other)
24
+ if other.respond_to? :each
25
+ each_with_index do |val, index|
26
+ self[index] = other[index] ? val * other[index] : nil
27
+ end
28
+ else
29
+ map { |val| val * other }
30
+ end
31
+ end
32
+
33
+ def /(other)
34
+ if other.respond_to? :each
35
+ each_with_index do |val, index|
36
+ self[index] = other[index] ? val / other[index].to_f : nil
37
+ end
38
+ else
39
+ map { |val| val / other.to_f }
40
+ end
41
+ end
42
+
43
+ def offset(rows)
44
+ padding = Array.new rows.abs
45
+ if rows >= 0
46
+ Array.new(padding).concat self[0...(self.size-rows)]
47
+ else
48
+ self[(rows.abs)...(self.size)].concat Array.new(padding)
49
+ end
50
+ end
51
+
52
+ def prev(rows=1)
53
+ offset rows
54
+ end
55
+
56
+ def round(decimals=0)
57
+ map { |val| val ? val.round(decimals) : nil }
58
+ end
59
+
60
+ def uniq?
61
+ self.length == self.uniq.length
62
+ end
63
+
64
+ def window(window_size, &_block)
65
+ result = (0..(size-window_size)).map do |index|
66
+ yield self[index...(index+window_size)]
67
+ end
68
+
69
+ Array.new(window_size-1).concat result
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,111 @@
1
+ require 'terminal-table'
2
+
3
+ module DataMix
4
+ refine CSV::Table do
5
+
6
+ # Delete all rows that have one or more empty or nil values.
7
+ def delete_empty_rows
8
+ delete_if do |row|
9
+ row.fields.include? nil or row.include? ''
10
+ end
11
+ end
12
+
13
+ # Create a new column using a block. This method yields the given block
14
+ # row by row, providing the index to the block and returns an array
15
+ # suitable for assigning to a new column.
16
+ def derive(&_block)
17
+ by_row.each_with_index.map do |_value, index|
18
+ yield index
19
+ end
20
+ end
21
+
22
+ # Extract a regular expression pattern from a column and return a new
23
+ # column.
24
+ def extract(pattern, from:)
25
+ by_row.map { |row| row[from][pattern] }
26
+ end
27
+
28
+ # Iterate over all rows, providing the index to the block.
29
+ def iterate(&_block)
30
+ each_with_index do |_value, index|
31
+ yield index
32
+ end
33
+ end
34
+
35
+ # Join columns from another data table based on a mutual column
36
+ def join(other, on:)
37
+ raise CSVError, "No such column '#{on}' in source" unless headers.include? on
38
+ raise CSVError, "No such column '#{on}' in other" unless other.headers.include? on
39
+ raise CSVError, "source[#{on}] is not unique" unless by_col[on].uniq?
40
+ raise CSVError, "other[#{on}] is not unique" unless other.by_col[on].uniq?
41
+
42
+ by_row.each do |row|
43
+ other_row = other.find { |r| r[on] == row[on] }
44
+ other.headers.each do |col|
45
+ next if col == on
46
+ new_col = headers.include?(col) ? "_#{col}" : col
47
+ row[new_col] = other_row ? other_row[col] : nil
48
+ end
49
+ end
50
+ end
51
+
52
+ # Keep one or more columns, and remove the rest
53
+ def keep(*desired_cols)
54
+ headers.each do |col|
55
+ delete col unless desired_cols.include? col
56
+ end
57
+ end
58
+
59
+ # Print the first 10 lines
60
+ def preview
61
+ show 10
62
+ end
63
+
64
+ # Remove one or more columns
65
+ def remove(*desired_cols)
66
+ desired_cols.each do |col|
67
+ delete col
68
+ end
69
+ end
70
+
71
+ # Rename a column
72
+ def rename(from, to:)
73
+ by_col[to] = by_col[from]
74
+ delete from
75
+ end
76
+
77
+ # Rounds all values in a column
78
+ def round(col, decimals: 0)
79
+ by_col[col] = by_col[col].map { |val| val ? val.round(decimals) : nil }
80
+ end
81
+
82
+ # Save to a CSV or TSV file
83
+ def save_as(filename)
84
+ ext = File.extname(filename).downcase
85
+ data = ext == '.csv' ? to_s : to_tsv
86
+ File.write filename, data
87
+ end
88
+
89
+ # Print some or all rows
90
+ def show(rows=:all)
91
+ puts to_ascii rows
92
+ end
93
+
94
+ # Returns a table string
95
+ def to_ascii(rows=:all)
96
+ table = rows == :all ? by_row : first(rows)
97
+ rows = table.map { |row| row.fields }
98
+ Terminal::Table.new(headings: headers, rows: rows).to_s
99
+ end
100
+
101
+ # Convert table to a TSV string
102
+ def to_tsv
103
+ result = [headers.join( "\t")]
104
+ self.each do |row|
105
+ result << row.fields.join("\t")
106
+ end
107
+ result.join "\n"
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,8 @@
1
+ module DataMix
2
+ refine NilClass do
3
+ def *(other); nil end
4
+ def +(other); nil end
5
+ def -(other); nil end
6
+ def /(other); nil end
7
+ end
8
+ end
@@ -0,0 +1,7 @@
1
+ module DataMix
2
+ refine Object do
3
+ def file(filename)
4
+ CSV.table filename
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module DataMix
2
+ VERSION = "0.0.1"
3
+ end
data/lib/datamix.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'csv'
2
+
3
+ require 'datamix/version'
4
+ require 'datamix/exceptions'
5
+ require 'datamix/refinements/array'
6
+ require 'datamix/refinements/csv_table'
7
+ require 'datamix/refinements/nil'
8
+ require 'datamix/refinements/object'
metadata ADDED
@@ -0,0 +1,191 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: datamix
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Danny Ben Shitrit
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-03-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: docopt
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: terminal-table
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.7'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: runfile
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.8'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.8'
55
+ - !ruby/object:Gem::Dependency
56
+ name: runfile-tasks
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.4'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.4'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.5'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.5'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rdoc
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '5.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '5.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: simplecov
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.13'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.13'
111
+ - !ruby/object:Gem::Dependency
112
+ name: yard
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.8'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0.8'
125
+ - !ruby/object:Gem::Dependency
126
+ name: pry
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.10'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.10'
139
+ - !ruby/object:Gem::Dependency
140
+ name: pry-doc
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.10'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.10'
153
+ description: DSL for manipulating tabular data
154
+ email: db@dannyben.com
155
+ executables: []
156
+ extensions: []
157
+ extra_rdoc_files: []
158
+ files:
159
+ - README.md
160
+ - lib/datamix.rb
161
+ - lib/datamix/exceptions.rb
162
+ - lib/datamix/refinements/array.rb
163
+ - lib/datamix/refinements/csv_table.rb
164
+ - lib/datamix/refinements/nil.rb
165
+ - lib/datamix/refinements/object.rb
166
+ - lib/datamix/version.rb
167
+ homepage: https://github.com/DannyBen/datamix
168
+ licenses:
169
+ - MIT
170
+ metadata: {}
171
+ post_install_message:
172
+ rdoc_options: []
173
+ require_paths:
174
+ - lib
175
+ required_ruby_version: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - ">="
178
+ - !ruby/object:Gem::Version
179
+ version: 2.0.0
180
+ required_rubygems_version: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - ">="
183
+ - !ruby/object:Gem::Version
184
+ version: '0'
185
+ requirements: []
186
+ rubyforge_project:
187
+ rubygems_version: 2.6.6
188
+ signing_key:
189
+ specification_version: 4
190
+ summary: DSL for manipulating tabular data
191
+ test_files: []