slicing 0.1.0.pre → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -6
- data/lib/slicing.rb +117 -17
- data/lib/slicing/version.rb +1 -1
- metadata +4 -5
- data/slicing-0.1.0.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 014cfbd59f2b2e16e160d0188b59984f949eb0f6
|
4
|
+
data.tar.gz: b42b46be39904cbf248cfea88bc1b8eee2e0ee1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 49dcfb2d5407fc30018d660bfaa07ea56c82a6d848a5b8de585450d4555ba16415c393cc9cfa01537801ec2a0eae3b48b024d38803799e14e2616579fdf985de
|
7
|
+
data.tar.gz: a7717fc4df45630346a4c9e6003ae34f13617ec0d542631bc61e04c0a7d6caed253182d1ff61bb560b2ec4ddc2dfefd9c873aafb5d5ec28295fef7e84be5c9bb
|
data/README.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Slicing
|
2
|
-
:v: Slice and dice your csv files for further analysis
|
2
|
+
:v: Slice and dice your csv files for further analysis.
|
3
|
+
|
4
|
+
# Reason for making slicing
|
5
|
+
No database required, mom!
|
6
|
+
|
7
|
+
Data slicing and cleaning usually happen within a database like mysql or postgresql. And you will need to use sql command to manipulate the data.
|
8
|
+
|
9
|
+
I thought why can't I slice and dice them on the go. And take a subset of the data and peek it through excel or a text-editor.
|
10
|
+
|
11
|
+
Hence, slicing is created to make it easy to slice csv files from terminal.
|
3
12
|
|
4
13
|
## Installation
|
5
14
|
|
@@ -18,16 +27,17 @@ Or install it yourself as:
|
|
18
27
|
$ gem install slicing
|
19
28
|
|
20
29
|
## Usage
|
30
|
+
`slicing mask` - mask the column with md5.
|
21
31
|
|
22
|
-
`slicing subset` -
|
32
|
+
`slicing subset` - return a subset of 10 lines of the bigger csv file.
|
23
33
|
|
24
|
-
`slicing head` -
|
34
|
+
`slicing head` - return the header of the csv file.
|
25
35
|
|
26
|
-
`slicing rm` -
|
36
|
+
`slicing rm` - remove the column from csv file.
|
27
37
|
|
28
|
-
`slicing first` -
|
38
|
+
`slicing first` - return the first line of data of the csv file.
|
29
39
|
|
30
|
-
`slicing count` -
|
40
|
+
`slicing count` - return the total row and column of the csv file.
|
31
41
|
|
32
42
|
## Development
|
33
43
|
|
data/lib/slicing.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "slicing/version"
|
2
|
+
require 'digest/md5'
|
2
3
|
require 'thor'
|
3
4
|
require 'csv'
|
4
5
|
|
@@ -6,23 +7,72 @@ module Slicing
|
|
6
7
|
class Base < Thor
|
7
8
|
check_unknown_options!
|
8
9
|
package_name 'slicing'
|
9
|
-
default_task :
|
10
|
+
default_task :help
|
10
11
|
|
11
|
-
desc :
|
12
|
+
desc :sample, "create a sample output"
|
13
|
+
def sample path, output_path, size
|
14
|
+
file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8")
|
15
|
+
sample = file_csv.sample(size)
|
16
|
+
CSV.open(output_path, "a+") do |csv|
|
17
|
+
sample.each do |value|
|
18
|
+
csv << value
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
desc :freq, "calculate item frequencies"
|
24
|
+
def freq path, column_name, output_path
|
25
|
+
file_to_count = "./#{path}.csv"
|
26
|
+
output = "./#{path}-counted.csv"
|
27
|
+
file_to_count_csv = CSV.read(file_to_count,:headers=> true, :encoding => "ISO8859-1:utf-8")
|
28
|
+
unique_nric_array = file_to_count_csv[column_name]
|
29
|
+
unique_nric = []
|
30
|
+
unique_nric_array.each_with_index do |value, index|
|
31
|
+
unique_nric.push(value) if index !=0
|
32
|
+
end
|
33
|
+
|
34
|
+
final_hash = score(unique_nric)
|
35
|
+
CSV.open(output, "a+") do |csv|
|
36
|
+
final_hash.each do |value|
|
37
|
+
csv << [value[0], value[1]]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
desc :mask, "mask a particular column"
|
44
|
+
def mask path, column_name, output_path
|
45
|
+
original = CSV.read(path, { headers: true, return_headers: true, :encoding => "ISO8859-1:utf-8"})
|
46
|
+
CSV.open(output_path, 'a+') do |csv|
|
47
|
+
original.each do |row|
|
48
|
+
csv << array
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
desc :rm, "remove a column"
|
54
|
+
method_option :utf, type: :string, aliases: '-u', default: "ISO8859-1:utf-8"
|
55
|
+
method_option :headers, type: :boolean, aliases: '-h', default: true
|
56
|
+
method_option :rowsep, type: :string, aliases: '-r', default: nil
|
12
57
|
def rm path, column_name, output
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
58
|
+
# headers, rowsep, utf = process_options(options[:headers], options[:rowsep], options[:utf])
|
59
|
+
if options[:rowsep] != nil
|
60
|
+
original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :row_sep=> options[:rowsep], :encoding => options[:utf]})
|
61
|
+
else
|
62
|
+
original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :encoding => options[:utf]})
|
63
|
+
end
|
64
|
+
original.delete(column_name)
|
65
|
+
CSV.open(output, 'a+') do |csv|
|
66
|
+
original.each do |row|
|
17
67
|
csv << row
|
18
68
|
end
|
19
69
|
end
|
20
70
|
end
|
21
71
|
|
22
|
-
|
23
|
-
|
72
|
+
desc :first, "display the first numbers of line"
|
73
|
+
method_option :line, type: :numeric, aliases: '-l', default: 100
|
24
74
|
def first csv_file #, value=100
|
25
|
-
stop =
|
75
|
+
stop = options[:line]
|
26
76
|
counter = 0
|
27
77
|
CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
|
28
78
|
exit if counter == stop
|
@@ -31,11 +81,10 @@ module Slicing
|
|
31
81
|
puts row
|
32
82
|
rescue
|
33
83
|
end
|
34
|
-
|
35
84
|
end
|
36
85
|
end
|
37
86
|
|
38
|
-
desc :head, ""
|
87
|
+
desc :head, "show the headers"
|
39
88
|
def head csv_file
|
40
89
|
CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
|
41
90
|
puts row
|
@@ -45,18 +94,29 @@ module Slicing
|
|
45
94
|
end
|
46
95
|
end
|
47
96
|
|
97
|
+
desc :unique, "calculate number of unique values in column"
|
98
|
+
def unique path, column_name
|
99
|
+
data = CSV.read(path, :headers => true, return_headers: true, encoding: "ISO8859-1:utf-8")
|
100
|
+
array = data[column_name]
|
101
|
+
puts array.uniq.count if array != nil
|
102
|
+
end
|
103
|
+
|
48
104
|
|
49
|
-
desc :count, ""
|
105
|
+
desc :count, "count the number of rows and columns"
|
50
106
|
def count csv_file
|
51
|
-
data = CSV.read(csv_file)
|
52
|
-
puts "#{data.count} rows"
|
107
|
+
data = CSV.read(csv_file, :headers => false, encoding: "ISO8859-1:utf-8")
|
108
|
+
puts "#{data.count} rows #{data[0].count} columns"
|
109
|
+
puts "---"
|
110
|
+
puts "#{data[0]}"
|
53
111
|
end
|
54
112
|
|
55
|
-
desc :subset, ""
|
56
|
-
|
113
|
+
desc :subset, "create a subset of the data"
|
114
|
+
method_option :line, type: :numeric, aliases: '-l', default: 1000
|
115
|
+
def subset(csv_file, output)
|
57
116
|
path = csv_file
|
58
117
|
output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
|
59
|
-
stop =
|
118
|
+
# options[:num] == nil ? (stop = 10) : (stop = options[:num])
|
119
|
+
stop = options[:line]
|
60
120
|
counter = 0
|
61
121
|
CSV.foreach(path, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
|
62
122
|
exit if counter == stop
|
@@ -70,5 +130,45 @@ module Slicing
|
|
70
130
|
end
|
71
131
|
end
|
72
132
|
|
133
|
+
# desc :subsetagain, ""
|
134
|
+
# def subsetagain csv_file, output, value=10
|
135
|
+
# path = csv_file
|
136
|
+
# output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
|
137
|
+
# stop = value
|
138
|
+
# counter = 0
|
139
|
+
# CSV.foreach(path, :headers => false, :row_sep => "\r\n", encoding: "ISO8859-1:utf-8") do |row|
|
140
|
+
# exit if counter == stop
|
141
|
+
# begin
|
142
|
+
# counter = counter + 1
|
143
|
+
# CSV.open(output_directory, "a+") do |csv|
|
144
|
+
# csv << row
|
145
|
+
# end
|
146
|
+
# rescue
|
147
|
+
# end
|
148
|
+
# end
|
149
|
+
# end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def process_options headers, rowsep, utf
|
154
|
+
if headers == nil
|
155
|
+
headers = true
|
156
|
+
else
|
157
|
+
headers = headers
|
158
|
+
end
|
159
|
+
return true, "\r\n" , "ISO8859-1:utf-8"
|
160
|
+
end
|
161
|
+
|
162
|
+
def masking(value)
|
163
|
+
value != nil ? answer = Digest::MD5.hexdigest(value) : answer
|
164
|
+
end
|
165
|
+
|
166
|
+
def score( array )
|
167
|
+
hash = Hash.new(0)
|
168
|
+
array.each{|key| hash[key] += 1}
|
169
|
+
hash
|
170
|
+
end
|
171
|
+
|
172
|
+
|
73
173
|
end
|
74
174
|
end
|
data/lib/slicing/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slicing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bryan Lim
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,7 +58,6 @@ files:
|
|
58
58
|
- bin/slicing
|
59
59
|
- lib/slicing.rb
|
60
60
|
- lib/slicing/version.rb
|
61
|
-
- slicing-0.1.0.gem
|
62
61
|
- slicing.gemspec
|
63
62
|
homepage: http://github.com/ytbryan/slicing
|
64
63
|
licenses:
|
@@ -75,9 +74,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
75
74
|
version: '0'
|
76
75
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
76
|
requirements:
|
78
|
-
- - "
|
77
|
+
- - ">="
|
79
78
|
- !ruby/object:Gem::Version
|
80
|
-
version:
|
79
|
+
version: '0'
|
81
80
|
requirements: []
|
82
81
|
rubyforge_project:
|
83
82
|
rubygems_version: 2.5.1
|
data/slicing-0.1.0.gem
DELETED
Binary file
|