datamix 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 26fd664e65a641fa048b990aca4858e10951d3e3
4
+ data.tar.gz: 5a92b9c11be1d677f6c41b8567fadfcbd3e15315
5
+ SHA512:
6
+ metadata.gz: e703a60fd3fd5baa210feef386c9f385c86043bfbe5bec9d1eb8a46454773cf09678da7abf3cd2098693b58bd7109b08fcf769de5832014acde5b1df7455ee77
7
+ data.tar.gz: 2d21ece4e2a523ae0e15b72d642610d342531a93f1ad3c492dfdf61d9180ccc197bba276c48b8f788c7d74977b82a0d376c23375b8e17efc4ea6fd542198ca7c
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ DataMix - DSL for manipulating tabular data
2
+ ==================================================
3
+
4
+ [![Gem](https://img.shields.io/gem/v/datamix.svg?style=flat-square)](https://rubygems.org/gems/datamix)
5
+ [![Travis](https://img.shields.io/travis/DannyBen/datamix.svg?style=flat-square)](https://travis-ci.org/DannyBen/datamix)
6
+ [![Code Climate](https://img.shields.io/codeclimate/github/DannyBen/datamix.svg?style=flat-square)](https://codeclimate.com/github/DannyBen/datamix)
7
+ [![Gemnasium](https://img.shields.io/gemnasium/DannyBen/datamix.svg?style=flat-square)](https://gemnasium.com/DannyBen/datamix)
8
+
9
+ ---
10
+
11
+ This library refines Ruby's [`CSV::Table`][1] and `Array` object to provide a DSL
12
+ for manipulating tabular data.
13
+
14
+ ---
15
+
16
+
17
+ Install
18
+ --------------------------------------------------
19
+
20
+ ```
21
+ $ gem install datamix
22
+ ```
23
+
24
+ Or with bundler:
25
+
26
+ ```ruby
27
+ gem 'datamix'
28
+ ```
29
+
30
+
31
+
32
+ Usage
33
+ --------------------------------------------------
34
+
35
+ Require the library and enable the refinements with `using DataMix`:
36
+
37
+ ```ruby
38
+ require 'datamix'
39
+ using DataMix
40
+ ```
41
+
42
+ TODO: Complete documentation.
43
+
44
+
45
+ [1]: https://ruby-doc.org/stdlib-2.3.1/libdoc/csv/rdoc/CSV/Table.html
@@ -0,0 +1,3 @@
1
+ module DataMix
2
+ class CSVError < StandardError; end
3
+ end
@@ -0,0 +1,73 @@
1
+ module DataMix
2
+ refine Array do
3
+ def -(other)
4
+ if other.respond_to? :each
5
+ each_with_index do |val, index|
6
+ self[index] = other[index] ? val - other[index] : nil
7
+ end
8
+ else
9
+ map { |val| val - other }
10
+ end
11
+ end
12
+
13
+ def +(other)
14
+ if other.respond_to? :each
15
+ each_with_index do |val, index|
16
+ self[index] = other[index] ? val + other[index] : nil
17
+ end
18
+ else
19
+ map { |val| val + other }
20
+ end
21
+ end
22
+
23
+ def *(other)
24
+ if other.respond_to? :each
25
+ each_with_index do |val, index|
26
+ self[index] = other[index] ? val * other[index] : nil
27
+ end
28
+ else
29
+ map { |val| val * other }
30
+ end
31
+ end
32
+
33
+ def /(other)
34
+ if other.respond_to? :each
35
+ each_with_index do |val, index|
36
+ self[index] = other[index] ? val / other[index].to_f : nil
37
+ end
38
+ else
39
+ map { |val| val / other.to_f }
40
+ end
41
+ end
42
+
43
+ def offset(rows)
44
+ padding = Array.new rows.abs
45
+ if rows >= 0
46
+ Array.new(padding).concat self[0...(self.size-rows)]
47
+ else
48
+ self[(rows.abs)...(self.size)].concat Array.new(padding)
49
+ end
50
+ end
51
+
52
+ def prev(rows=1)
53
+ offset rows
54
+ end
55
+
56
+ def round(decimals=0)
57
+ map { |val| val ? val.round(decimals) : nil }
58
+ end
59
+
60
+ def uniq?
61
+ self.length == self.uniq.length
62
+ end
63
+
64
+ def window(window_size, &_block)
65
+ result = (0..(size-window_size)).map do |index|
66
+ yield self[index...(index+window_size)]
67
+ end
68
+
69
+ Array.new(window_size-1).concat result
70
+ end
71
+
72
+ end
73
+ end
@@ -0,0 +1,111 @@
1
+ require 'terminal-table'
2
+
3
+ module DataMix
4
+ refine CSV::Table do
5
+
6
+ # Delete all rows that have one or more empty or nil values.
7
+ def delete_empty_rows
8
+ delete_if do |row|
9
+ row.fields.include? nil or row.include? ''
10
+ end
11
+ end
12
+
13
+ # Create a new column using a block. This method yields the given block
14
+ # row by row, providing the index to the block and returns an array
15
+ # suitable for assigning to a new column.
16
+ def derive(&_block)
17
+ by_row.each_with_index.map do |_value, index|
18
+ yield index
19
+ end
20
+ end
21
+
22
+ # Extract a regular expression pattern from a column and return a new
23
+ # column.
24
+ def extract(pattern, from:)
25
+ by_row.map { |row| row[from][pattern] }
26
+ end
27
+
28
+ # Iterate over all rows, providing the index to the block.
29
+ def iterate(&_block)
30
+ each_with_index do |_value, index|
31
+ yield index
32
+ end
33
+ end
34
+
35
+ # Join columns from another data table based on a mutual column
36
+ def join(other, on:)
37
+ raise CSVError, "No such column '#{on}' in source" unless headers.include? on
38
+ raise CSVError, "No such column '#{on}' in other" unless other.headers.include? on
39
+ raise CSVError, "source[#{on}] is not unique" unless by_col[on].uniq?
40
+ raise CSVError, "other[#{on}] is not unique" unless other.by_col[on].uniq?
41
+
42
+ by_row.each do |row|
43
+ other_row = other.find { |r| r[on] == row[on] }
44
+ other.headers.each do |col|
45
+ next if col == on
46
+ new_col = headers.include?(col) ? "_#{col}" : col
47
+ row[new_col] = other_row ? other_row[col] : nil
48
+ end
49
+ end
50
+ end
51
+
52
+ # Keep one or more columns, and remove the rest
53
+ def keep(*desired_cols)
54
+ headers.each do |col|
55
+ delete col unless desired_cols.include? col
56
+ end
57
+ end
58
+
59
+ # Print the first 10 lines
60
+ def preview
61
+ show 10
62
+ end
63
+
64
+ # Remove one or more columns
65
+ def remove(*desired_cols)
66
+ desired_cols.each do |col|
67
+ delete col
68
+ end
69
+ end
70
+
71
+ # Rename a column
72
+ def rename(from, to:)
73
+ by_col[to] = by_col[from]
74
+ delete from
75
+ end
76
+
77
+ # Rounds all values in a column
78
+ def round(col, decimals: 0)
79
+ by_col[col] = by_col[col].map { |val| val ? val.round(decimals) : nil }
80
+ end
81
+
82
+ # Save to a CSV or TSV file
83
+ def save_as(filename)
84
+ ext = File.extname(filename).downcase
85
+ data = ext == '.csv' ? to_s : to_tsv
86
+ File.write filename, data
87
+ end
88
+
89
+ # Print some or all rows
90
+ def show(rows=:all)
91
+ puts to_ascii rows
92
+ end
93
+
94
+ # Returns a table string
95
+ def to_ascii(rows=:all)
96
+ table = rows == :all ? by_row : first(rows)
97
+ rows = table.map { |row| row.fields }
98
+ Terminal::Table.new(headings: headers, rows: rows).to_s
99
+ end
100
+
101
+ # Convert table to a TSV string
102
+ def to_tsv
103
+ result = [headers.join( "\t")]
104
+ self.each do |row|
105
+ result << row.fields.join("\t")
106
+ end
107
+ result.join "\n"
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,8 @@
1
+ module DataMix
2
+ refine NilClass do
3
+ def *(other); nil end
4
+ def +(other); nil end
5
+ def -(other); nil end
6
+ def /(other); nil end
7
+ end
8
+ end
@@ -0,0 +1,7 @@
1
+ module DataMix
2
+ refine Object do
3
+ def file(filename)
4
+ CSV.table filename
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module DataMix
2
+ VERSION = "0.0.1"
3
+ end
data/lib/datamix.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'csv'
2
+
3
+ require 'datamix/version'
4
+ require 'datamix/exceptions'
5
+ require 'datamix/refinements/array'
6
+ require 'datamix/refinements/csv_table'
7
+ require 'datamix/refinements/nil'
8
+ require 'datamix/refinements/object'
metadata ADDED
@@ -0,0 +1,191 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: datamix
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Danny Ben Shitrit
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-03-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: docopt
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: terminal-table
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.7'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: runfile
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.8'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.8'
55
+ - !ruby/object:Gem::Dependency
56
+ name: runfile-tasks
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.4'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.4'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.5'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.5'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rdoc
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '5.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '5.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: simplecov
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.13'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.13'
111
+ - !ruby/object:Gem::Dependency
112
+ name: yard
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.8'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0.8'
125
+ - !ruby/object:Gem::Dependency
126
+ name: pry
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.10'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.10'
139
+ - !ruby/object:Gem::Dependency
140
+ name: pry-doc
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: '0.10'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: '0.10'
153
+ description: DSL for manipulating tabular data
154
+ email: db@dannyben.com
155
+ executables: []
156
+ extensions: []
157
+ extra_rdoc_files: []
158
+ files:
159
+ - README.md
160
+ - lib/datamix.rb
161
+ - lib/datamix/exceptions.rb
162
+ - lib/datamix/refinements/array.rb
163
+ - lib/datamix/refinements/csv_table.rb
164
+ - lib/datamix/refinements/nil.rb
165
+ - lib/datamix/refinements/object.rb
166
+ - lib/datamix/version.rb
167
+ homepage: https://github.com/DannyBen/datamix
168
+ licenses:
169
+ - MIT
170
+ metadata: {}
171
+ post_install_message:
172
+ rdoc_options: []
173
+ require_paths:
174
+ - lib
175
+ required_ruby_version: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - ">="
178
+ - !ruby/object:Gem::Version
179
+ version: 2.0.0
180
+ required_rubygems_version: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - ">="
183
+ - !ruby/object:Gem::Version
184
+ version: '0'
185
+ requirements: []
186
+ rubyforge_project:
187
+ rubygems_version: 2.6.6
188
+ signing_key:
189
+ specification_version: 4
190
+ summary: DSL for manipulating tabular data
191
+ test_files: []