slicing 0.1.0.pre → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ac5e311db2eb6d863920974446dabd370e217a71
4
- data.tar.gz: 5a6b337cc80ad664fdd23a41af0b6a406755f203
3
+ metadata.gz: 014cfbd59f2b2e16e160d0188b59984f949eb0f6
4
+ data.tar.gz: b42b46be39904cbf248cfea88bc1b8eee2e0ee1a
5
5
  SHA512:
6
- metadata.gz: b45eb3134ff3c96fbe7da6f76ccb5b987abb7caab0d55e6cf3cc972e2474132f14559d9fa41111df5557ced0952dad12e9cddd4f55c9fd6dead6f8958c35b5c4
7
- data.tar.gz: 386af6723a8e33bacff56a6e45f31b57316c79c03a815d4533c73e51b6c2f31323c62dc1b1cae5ac18916018834181a37ebd7a38826282946895298a3b6b9e20
6
+ metadata.gz: 49dcfb2d5407fc30018d660bfaa07ea56c82a6d848a5b8de585450d4555ba16415c393cc9cfa01537801ec2a0eae3b48b024d38803799e14e2616579fdf985de
7
+ data.tar.gz: a7717fc4df45630346a4c9e6003ae34f13617ec0d542631bc61e04c0a7d6caed253182d1ff61bb560b2ec4ddc2dfefd9c873aafb5d5ec28295fef7e84be5c9bb
data/README.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Slicing
2
- :v: Slice and dice your csv files for further analysis
2
+ :v: Slice and dice your csv files for further analysis.
3
+
4
+ # Reason for making slicing
5
+ No database required, mom!
6
+
7
+ Data slicing and cleaning usually happen within a database like mysql or postgresql. And you will need to use sql command to manipulate the data.
8
+
9
+ I thought why can't I slice and dice them on the go. And take a subset of the data and peek it through excel or a text-editor.
10
+
11
+ Hence, slicing is created to make it easy to slice csv files from terminal.
3
12
 
4
13
  ## Installation
5
14
 
@@ -18,16 +27,17 @@ Or install it yourself as:
18
27
  $ gem install slicing
19
28
 
20
29
  ## Usage
30
+ `slicing mask` - mask the column with md5.
21
31
 
22
- `slicing subset` -
32
+ `slicing subset` - return a subset of 10 lines of the bigger csv file.
23
33
 
24
- `slicing head` -
34
+ `slicing head` - return the header of the csv file.
25
35
 
26
- `slicing rm` -
36
+ `slicing rm` - remove the column from csv file.
27
37
 
28
- `slicing first` -
38
+ `slicing first` - return the first line of data of the csv file.
29
39
 
30
- `slicing count` -
40
+ `slicing count` - return the total row and column of the csv file.
31
41
 
32
42
  ## Development
33
43
 
@@ -1,4 +1,5 @@
1
1
  require "slicing/version"
2
+ require 'digest/md5'
2
3
  require 'thor'
3
4
  require 'csv'
4
5
 
@@ -6,23 +7,72 @@ module Slicing
6
7
  class Base < Thor
7
8
  check_unknown_options!
8
9
  package_name 'slicing'
9
- default_task :hello
10
+ default_task :help
10
11
 
11
- desc :rm, ""
12
+ desc :sample, "create a sample output"
13
+ def sample path, output_path, size
14
+ file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8")
15
+ sample = file_csv.sample(size)
16
+ CSV.open(output_path, "a+") do |csv|
17
+ sample.each do |value|
18
+ csv << value
19
+ end
20
+ end
21
+ end
22
+
23
+ desc :freq, "calculate item frequencies"
24
+ def freq path, column_name, output_path
25
+ file_to_count = "./#{path}.csv"
26
+ output = "./#{path}-counted.csv"
27
+ file_to_count_csv = CSV.read(file_to_count,:headers=> true, :encoding => "ISO8859-1:utf-8")
28
+ unique_nric_array = file_to_count_csv[column_name]
29
+ unique_nric = []
30
+ unique_nric_array.each_with_index do |value, index|
31
+ unique_nric.push(value) if index !=0
32
+ end
33
+
34
+ final_hash = score(unique_nric)
35
+ CSV.open(output, "a+") do |csv|
36
+ final_hash.each do |value|
37
+ csv << [value[0], value[1]]
38
+ end
39
+ end
40
+ end
41
+
42
+
43
+ desc :mask, "mask a particular column"
44
+ def mask path, column_name, output_path
45
+ original = CSV.read(path, { headers: true, return_headers: true, :encoding => "ISO8859-1:utf-8"})
46
+ CSV.open(output_path, 'a+') do |csv|
47
+ original.each do |row|
48
+ csv << array
49
+ end
50
+ end
51
+ end
52
+
53
+ desc :rm, "remove a column"
54
+ method_option :utf, type: :string, aliases: '-u', default: "ISO8859-1:utf-8"
55
+ method_option :headers, type: :boolean, aliases: '-h', default: true
56
+ method_option :rowsep, type: :string, aliases: '-r', default: nil
12
57
  def rm path, column_name, output
13
- data = CSV.read(path, :headers=> false, :encoding => "ISO8859-1:utf-8") #2014
14
- data.delete(column_name)
15
- CSV.open(output,"a+") do |csv|
16
- data.each_with_index do |row,index|
58
+ # headers, rowsep, utf = process_options(options[:headers], options[:rowsep], options[:utf])
59
+ if options[:rowsep] != nil
60
+ original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :row_sep=> options[:rowsep], :encoding => options[:utf]})
61
+ else
62
+ original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :encoding => options[:utf]})
63
+ end
64
+ original.delete(column_name)
65
+ CSV.open(output, 'a+') do |csv|
66
+ original.each do |row|
17
67
  csv << row
18
68
  end
19
69
  end
20
70
  end
21
71
 
22
-
23
- desc :first, ""
72
+ desc :first, "display the first numbers of line"
73
+ method_option :line, type: :numeric, aliases: '-l', default: 100
24
74
  def first csv_file #, value=100
25
- stop = 100
75
+ stop = options[:line]
26
76
  counter = 0
27
77
  CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
28
78
  exit if counter == stop
@@ -31,11 +81,10 @@ module Slicing
31
81
  puts row
32
82
  rescue
33
83
  end
34
-
35
84
  end
36
85
  end
37
86
 
38
- desc :head, ""
87
+ desc :head, "show the headers"
39
88
  def head csv_file
40
89
  CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
41
90
  puts row
@@ -45,18 +94,29 @@ module Slicing
45
94
  end
46
95
  end
47
96
 
97
+ desc :unique, "calculate number of unique values in column"
98
+ def unique path, column_name
99
+ data = CSV.read(path, :headers => true, return_headers: true, encoding: "ISO8859-1:utf-8")
100
+ array = data[column_name]
101
+ puts array.uniq.count if array != nil
102
+ end
103
+
48
104
 
49
- desc :count, ""
105
+ desc :count, "count the number of rows and columns"
50
106
  def count csv_file
51
- data = CSV.read(csv_file)
52
- puts "#{data.count} rows"
107
+ data = CSV.read(csv_file, :headers => false, encoding: "ISO8859-1:utf-8")
108
+ puts "#{data.count} rows #{data[0].count} columns"
109
+ puts "---"
110
+ puts "#{data[0]}"
53
111
  end
54
112
 
55
- desc :subset, ""
56
- def subset csv_file, output, value=10
113
+ desc :subset, "create a subset of the data"
114
+ method_option :line, type: :numeric, aliases: '-l', default: 1000
115
+ def subset(csv_file, output)
57
116
  path = csv_file
58
117
  output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
59
- stop = value
118
+ # options[:num] == nil ? (stop = 10) : (stop = options[:num])
119
+ stop = options[:line]
60
120
  counter = 0
61
121
  CSV.foreach(path, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
62
122
  exit if counter == stop
@@ -70,5 +130,45 @@ module Slicing
70
130
  end
71
131
  end
72
132
 
133
+ # desc :subsetagain, ""
134
+ # def subsetagain csv_file, output, value=10
135
+ # path = csv_file
136
+ # output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
137
+ # stop = value
138
+ # counter = 0
139
+ # CSV.foreach(path, :headers => false, :row_sep => "\r\n", encoding: "ISO8859-1:utf-8") do |row|
140
+ # exit if counter == stop
141
+ # begin
142
+ # counter = counter + 1
143
+ # CSV.open(output_directory, "a+") do |csv|
144
+ # csv << row
145
+ # end
146
+ # rescue
147
+ # end
148
+ # end
149
+ # end
150
+
151
+ private
152
+
153
+ def process_options headers, rowsep, utf
154
+ if headers == nil
155
+ headers = true
156
+ else
157
+ headers = headers
158
+ end
159
+ return true, "\r\n" , "ISO8859-1:utf-8"
160
+ end
161
+
162
+ def masking(value)
163
+ value != nil ? answer = Digest::MD5.hexdigest(value) : answer
164
+ end
165
+
166
+ def score( array )
167
+ hash = Hash.new(0)
168
+ array.each{|key| hash[key] += 1}
169
+ hash
170
+ end
171
+
172
+
73
173
  end
74
174
  end
@@ -1,3 +1,3 @@
1
1
  module Slicing
2
- VERSION = "0.1.0.pre"
2
+ VERSION = "0.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slicing
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.pre
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Lim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-24 00:00:00.000000000 Z
11
+ date: 2016-07-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -58,7 +58,6 @@ files:
58
58
  - bin/slicing
59
59
  - lib/slicing.rb
60
60
  - lib/slicing/version.rb
61
- - slicing-0.1.0.gem
62
61
  - slicing.gemspec
63
62
  homepage: http://github.com/ytbryan/slicing
64
63
  licenses:
@@ -75,9 +74,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
75
74
  version: '0'
76
75
  required_rubygems_version: !ruby/object:Gem::Requirement
77
76
  requirements:
78
- - - ">"
77
+ - - ">="
79
78
  - !ruby/object:Gem::Version
80
- version: 1.3.1
79
+ version: '0'
81
80
  requirements: []
82
81
  rubyforge_project:
83
82
  rubygems_version: 2.5.1
Binary file