slicing 0.1.0.pre → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -6
- data/lib/slicing.rb +117 -17
- data/lib/slicing/version.rb +1 -1
- metadata +4 -5
- data/slicing-0.1.0.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 014cfbd59f2b2e16e160d0188b59984f949eb0f6
|
4
|
+
data.tar.gz: b42b46be39904cbf248cfea88bc1b8eee2e0ee1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 49dcfb2d5407fc30018d660bfaa07ea56c82a6d848a5b8de585450d4555ba16415c393cc9cfa01537801ec2a0eae3b48b024d38803799e14e2616579fdf985de
|
7
|
+
data.tar.gz: a7717fc4df45630346a4c9e6003ae34f13617ec0d542631bc61e04c0a7d6caed253182d1ff61bb560b2ec4ddc2dfefd9c873aafb5d5ec28295fef7e84be5c9bb
|
data/README.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Slicing
|
2
|
-
:v: Slice and dice your csv files for further analysis
|
2
|
+
:v: Slice and dice your csv files for further analysis.
|
3
|
+
|
4
|
+
# Reason for making slicing
|
5
|
+
No database required, mom!
|
6
|
+
|
7
|
+
Data slicing and cleaning usually happen within a database like mysql or postgresql. And you will need to use sql command to manipulate the data.
|
8
|
+
|
9
|
+
I thought why can't I slice and dice them on the go. And take a subset of the data and peek it through excel or a text-editor.
|
10
|
+
|
11
|
+
Hence, slicing is created to make it easy to slice csv files from terminal.
|
3
12
|
|
4
13
|
## Installation
|
5
14
|
|
@@ -18,16 +27,17 @@ Or install it yourself as:
|
|
18
27
|
$ gem install slicing
|
19
28
|
|
20
29
|
## Usage
|
30
|
+
`slicing mask` - mask the column with md5.
|
21
31
|
|
22
|
-
`slicing subset` -
|
32
|
+
`slicing subset` - return a subset of 10 lines of the bigger csv file.
|
23
33
|
|
24
|
-
`slicing head` -
|
34
|
+
`slicing head` - return the header of the csv file.
|
25
35
|
|
26
|
-
`slicing rm` -
|
36
|
+
`slicing rm` - remove the column from csv file.
|
27
37
|
|
28
|
-
`slicing first` -
|
38
|
+
`slicing first` - return the first line of data of the csv file.
|
29
39
|
|
30
|
-
`slicing count` -
|
40
|
+
`slicing count` - return the total row and column of the csv file.
|
31
41
|
|
32
42
|
## Development
|
33
43
|
|
data/lib/slicing.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "slicing/version"
|
2
|
+
require 'digest/md5'
|
2
3
|
require 'thor'
|
3
4
|
require 'csv'
|
4
5
|
|
@@ -6,23 +7,72 @@ module Slicing
|
|
6
7
|
class Base < Thor
|
7
8
|
check_unknown_options!
|
8
9
|
package_name 'slicing'
|
9
|
-
default_task :
|
10
|
+
default_task :help
|
10
11
|
|
11
|
-
desc :
|
12
|
+
desc :sample, "create a sample output"
|
13
|
+
def sample path, output_path, size
|
14
|
+
file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8")
|
15
|
+
sample = file_csv.sample(size)
|
16
|
+
CSV.open(output_path, "a+") do |csv|
|
17
|
+
sample.each do |value|
|
18
|
+
csv << value
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
desc :freq, "calculate item frequencies"
|
24
|
+
def freq path, column_name, output_path
|
25
|
+
file_to_count = "./#{path}.csv"
|
26
|
+
output = "./#{path}-counted.csv"
|
27
|
+
file_to_count_csv = CSV.read(file_to_count,:headers=> true, :encoding => "ISO8859-1:utf-8")
|
28
|
+
unique_nric_array = file_to_count_csv[column_name]
|
29
|
+
unique_nric = []
|
30
|
+
unique_nric_array.each_with_index do |value, index|
|
31
|
+
unique_nric.push(value) if index !=0
|
32
|
+
end
|
33
|
+
|
34
|
+
final_hash = score(unique_nric)
|
35
|
+
CSV.open(output, "a+") do |csv|
|
36
|
+
final_hash.each do |value|
|
37
|
+
csv << [value[0], value[1]]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
desc :mask, "mask a particular column"
|
44
|
+
def mask path, column_name, output_path
|
45
|
+
original = CSV.read(path, { headers: true, return_headers: true, :encoding => "ISO8859-1:utf-8"})
|
46
|
+
CSV.open(output_path, 'a+') do |csv|
|
47
|
+
original.each do |row|
|
48
|
+
csv << array
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
desc :rm, "remove a column"
|
54
|
+
method_option :utf, type: :string, aliases: '-u', default: "ISO8859-1:utf-8"
|
55
|
+
method_option :headers, type: :boolean, aliases: '-h', default: true
|
56
|
+
method_option :rowsep, type: :string, aliases: '-r', default: nil
|
12
57
|
def rm path, column_name, output
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
58
|
+
# headers, rowsep, utf = process_options(options[:headers], options[:rowsep], options[:utf])
|
59
|
+
if options[:rowsep] != nil
|
60
|
+
original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :row_sep=> options[:rowsep], :encoding => options[:utf]})
|
61
|
+
else
|
62
|
+
original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :encoding => options[:utf]})
|
63
|
+
end
|
64
|
+
original.delete(column_name)
|
65
|
+
CSV.open(output, 'a+') do |csv|
|
66
|
+
original.each do |row|
|
17
67
|
csv << row
|
18
68
|
end
|
19
69
|
end
|
20
70
|
end
|
21
71
|
|
22
|
-
|
23
|
-
|
72
|
+
desc :first, "display the first numbers of line"
|
73
|
+
method_option :line, type: :numeric, aliases: '-l', default: 100
|
24
74
|
def first csv_file #, value=100
|
25
|
-
stop =
|
75
|
+
stop = options[:line]
|
26
76
|
counter = 0
|
27
77
|
CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
|
28
78
|
exit if counter == stop
|
@@ -31,11 +81,10 @@ module Slicing
|
|
31
81
|
puts row
|
32
82
|
rescue
|
33
83
|
end
|
34
|
-
|
35
84
|
end
|
36
85
|
end
|
37
86
|
|
38
|
-
desc :head, ""
|
87
|
+
desc :head, "show the headers"
|
39
88
|
def head csv_file
|
40
89
|
CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
|
41
90
|
puts row
|
@@ -45,18 +94,29 @@ module Slicing
|
|
45
94
|
end
|
46
95
|
end
|
47
96
|
|
97
|
+
desc :unique, "calculate number of unique values in column"
|
98
|
+
def unique path, column_name
|
99
|
+
data = CSV.read(path, :headers => true, return_headers: true, encoding: "ISO8859-1:utf-8")
|
100
|
+
array = data[column_name]
|
101
|
+
puts array.uniq.count if array != nil
|
102
|
+
end
|
103
|
+
|
48
104
|
|
49
|
-
desc :count, ""
|
105
|
+
desc :count, "count the number of rows and columns"
|
50
106
|
def count csv_file
|
51
|
-
data = CSV.read(csv_file)
|
52
|
-
puts "#{data.count} rows"
|
107
|
+
data = CSV.read(csv_file, :headers => false, encoding: "ISO8859-1:utf-8")
|
108
|
+
puts "#{data.count} rows #{data[0].count} columns"
|
109
|
+
puts "---"
|
110
|
+
puts "#{data[0]}"
|
53
111
|
end
|
54
112
|
|
55
|
-
desc :subset, ""
|
56
|
-
|
113
|
+
desc :subset, "create a subset of the data"
|
114
|
+
method_option :line, type: :numeric, aliases: '-l', default: 1000
|
115
|
+
def subset(csv_file, output)
|
57
116
|
path = csv_file
|
58
117
|
output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
|
59
|
-
stop =
|
118
|
+
# options[:num] == nil ? (stop = 10) : (stop = options[:num])
|
119
|
+
stop = options[:line]
|
60
120
|
counter = 0
|
61
121
|
CSV.foreach(path, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
|
62
122
|
exit if counter == stop
|
@@ -70,5 +130,45 @@ module Slicing
|
|
70
130
|
end
|
71
131
|
end
|
72
132
|
|
133
|
+
# desc :subsetagain, ""
|
134
|
+
# def subsetagain csv_file, output, value=10
|
135
|
+
# path = csv_file
|
136
|
+
# output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
|
137
|
+
# stop = value
|
138
|
+
# counter = 0
|
139
|
+
# CSV.foreach(path, :headers => false, :row_sep => "\r\n", encoding: "ISO8859-1:utf-8") do |row|
|
140
|
+
# exit if counter == stop
|
141
|
+
# begin
|
142
|
+
# counter = counter + 1
|
143
|
+
# CSV.open(output_directory, "a+") do |csv|
|
144
|
+
# csv << row
|
145
|
+
# end
|
146
|
+
# rescue
|
147
|
+
# end
|
148
|
+
# end
|
149
|
+
# end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def process_options headers, rowsep, utf
|
154
|
+
if headers == nil
|
155
|
+
headers = true
|
156
|
+
else
|
157
|
+
headers = headers
|
158
|
+
end
|
159
|
+
return true, "\r\n" , "ISO8859-1:utf-8"
|
160
|
+
end
|
161
|
+
|
162
|
+
def masking(value)
|
163
|
+
value != nil ? answer = Digest::MD5.hexdigest(value) : answer
|
164
|
+
end
|
165
|
+
|
166
|
+
def score( array )
|
167
|
+
hash = Hash.new(0)
|
168
|
+
array.each{|key| hash[key] += 1}
|
169
|
+
hash
|
170
|
+
end
|
171
|
+
|
172
|
+
|
73
173
|
end
|
74
174
|
end
|
data/lib/slicing/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slicing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bryan Lim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,7 +58,6 @@ files:
|
|
58
58
|
- bin/slicing
|
59
59
|
- lib/slicing.rb
|
60
60
|
- lib/slicing/version.rb
|
61
|
-
- slicing-0.1.0.gem
|
62
61
|
- slicing.gemspec
|
63
62
|
homepage: http://github.com/ytbryan/slicing
|
64
63
|
licenses:
|
@@ -75,9 +74,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
75
74
|
version: '0'
|
76
75
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
76
|
requirements:
|
78
|
-
- - "
|
77
|
+
- - ">="
|
79
78
|
- !ruby/object:Gem::Version
|
80
|
-
version:
|
79
|
+
version: '0'
|
81
80
|
requirements: []
|
82
81
|
rubyforge_project:
|
83
82
|
rubygems_version: 2.5.1
|
data/slicing-0.1.0.gem
DELETED
Binary file
|