slicing 0.1.0.pre → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ac5e311db2eb6d863920974446dabd370e217a71
4
- data.tar.gz: 5a6b337cc80ad664fdd23a41af0b6a406755f203
3
+ metadata.gz: 014cfbd59f2b2e16e160d0188b59984f949eb0f6
4
+ data.tar.gz: b42b46be39904cbf248cfea88bc1b8eee2e0ee1a
5
5
  SHA512:
6
- metadata.gz: b45eb3134ff3c96fbe7da6f76ccb5b987abb7caab0d55e6cf3cc972e2474132f14559d9fa41111df5557ced0952dad12e9cddd4f55c9fd6dead6f8958c35b5c4
7
- data.tar.gz: 386af6723a8e33bacff56a6e45f31b57316c79c03a815d4533c73e51b6c2f31323c62dc1b1cae5ac18916018834181a37ebd7a38826282946895298a3b6b9e20
6
+ metadata.gz: 49dcfb2d5407fc30018d660bfaa07ea56c82a6d848a5b8de585450d4555ba16415c393cc9cfa01537801ec2a0eae3b48b024d38803799e14e2616579fdf985de
7
+ data.tar.gz: a7717fc4df45630346a4c9e6003ae34f13617ec0d542631bc61e04c0a7d6caed253182d1ff61bb560b2ec4ddc2dfefd9c873aafb5d5ec28295fef7e84be5c9bb
data/README.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Slicing
2
- :v: Slice and dice your csv files for further analysis
2
+ :v: Slice and dice your csv files for further analysis.
3
+
4
+ # Reason for making slicing
5
+ No database required, mom!
6
+
7
+ Data slicing and cleaning usually happen within a database like mysql or postgresql. And you will need to use sql command to manipulate the data.
8
+
9
+ I thought why can't I slice and dice them on the go. And take a subset of the data and peek it through excel or a text-editor.
10
+
11
+ Hence, slicing is created to make it easy to slice csv files from terminal.
3
12
 
4
13
  ## Installation
5
14
 
@@ -18,16 +27,17 @@ Or install it yourself as:
18
27
  $ gem install slicing
19
28
 
20
29
  ## Usage
30
+ `slicing mask` - mask the column with md5.
21
31
 
22
- `slicing subset` -
32
+ `slicing subset` - return a subset of 10 lines of the bigger csv file.
23
33
 
24
- `slicing head` -
34
+ `slicing head` - return the header of the csv file.
25
35
 
26
- `slicing rm` -
36
+ `slicing rm` - remove the column from csv file.
27
37
 
28
- `slicing first` -
38
+ `slicing first` - return the first line of data of the csv file.
29
39
 
30
- `slicing count` -
40
+ `slicing count` - return the total row and column of the csv file.
31
41
 
32
42
  ## Development
33
43
 
@@ -1,4 +1,5 @@
1
1
  require "slicing/version"
2
+ require 'digest/md5'
2
3
  require 'thor'
3
4
  require 'csv'
4
5
 
@@ -6,23 +7,72 @@ module Slicing
6
7
  class Base < Thor
7
8
  check_unknown_options!
8
9
  package_name 'slicing'
9
- default_task :hello
10
+ default_task :help
10
11
 
11
- desc :rm, ""
12
+ desc :sample, "create a sample output"
13
+ def sample path, output_path, size
14
+ file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8")
15
+ sample = file_csv.sample(size)
16
+ CSV.open(output_path, "a+") do |csv|
17
+ sample.each do |value|
18
+ csv << value
19
+ end
20
+ end
21
+ end
22
+
23
+ desc :freq, "calculate item frequencies"
24
+ def freq path, column_name, output_path
25
+ file_to_count = "./#{path}.csv"
26
+ output = "./#{path}-counted.csv"
27
+ file_to_count_csv = CSV.read(file_to_count,:headers=> true, :encoding => "ISO8859-1:utf-8")
28
+ unique_nric_array = file_to_count_csv[column_name]
29
+ unique_nric = []
30
+ unique_nric_array.each_with_index do |value, index|
31
+ unique_nric.push(value) if index !=0
32
+ end
33
+
34
+ final_hash = score(unique_nric)
35
+ CSV.open(output, "a+") do |csv|
36
+ final_hash.each do |value|
37
+ csv << [value[0], value[1]]
38
+ end
39
+ end
40
+ end
41
+
42
+
43
+ desc :mask, "mask a particular column"
44
+ def mask path, column_name, output_path
45
+ original = CSV.read(path, { headers: true, return_headers: true, :encoding => "ISO8859-1:utf-8"})
46
+ CSV.open(output_path, 'a+') do |csv|
47
+ original.each do |row|
48
+ csv << array
49
+ end
50
+ end
51
+ end
52
+
53
+ desc :rm, "remove a column"
54
+ method_option :utf, type: :string, aliases: '-u', default: "ISO8859-1:utf-8"
55
+ method_option :headers, type: :boolean, aliases: '-h', default: true
56
+ method_option :rowsep, type: :string, aliases: '-r', default: nil
12
57
  def rm path, column_name, output
13
- data = CSV.read(path, :headers=> false, :encoding => "ISO8859-1:utf-8") #2014
14
- data.delete(column_name)
15
- CSV.open(output,"a+") do |csv|
16
- data.each_with_index do |row,index|
58
+ # headers, rowsep, utf = process_options(options[:headers], options[:rowsep], options[:utf])
59
+ if options[:rowsep] != nil
60
+ original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :row_sep=> options[:rowsep], :encoding => options[:utf]})
61
+ else
62
+ original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :encoding => options[:utf]})
63
+ end
64
+ original.delete(column_name)
65
+ CSV.open(output, 'a+') do |csv|
66
+ original.each do |row|
17
67
  csv << row
18
68
  end
19
69
  end
20
70
  end
21
71
 
22
-
23
- desc :first, ""
72
+ desc :first, "display the first numbers of line"
73
+ method_option :line, type: :numeric, aliases: '-l', default: 100
24
74
  def first csv_file #, value=100
25
- stop = 100
75
+ stop = options[:line]
26
76
  counter = 0
27
77
  CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
28
78
  exit if counter == stop
@@ -31,11 +81,10 @@ module Slicing
31
81
  puts row
32
82
  rescue
33
83
  end
34
-
35
84
  end
36
85
  end
37
86
 
38
- desc :head, ""
87
+ desc :head, "show the headers"
39
88
  def head csv_file
40
89
  CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
41
90
  puts row
@@ -45,18 +94,29 @@ module Slicing
45
94
  end
46
95
  end
47
96
 
97
+ desc :unique, "calculate number of unique values in column"
98
+ def unique path, column_name
99
+ data = CSV.read(path, :headers => true, return_headers: true, encoding: "ISO8859-1:utf-8")
100
+ array = data[column_name]
101
+ puts array.uniq.count if array != nil
102
+ end
103
+
48
104
 
49
- desc :count, ""
105
+ desc :count, "count the number of rows and columns"
50
106
  def count csv_file
51
- data = CSV.read(csv_file)
52
- puts "#{data.count} rows"
107
+ data = CSV.read(csv_file, :headers => false, encoding: "ISO8859-1:utf-8")
108
+ puts "#{data.count} rows #{data[0].count} columns"
109
+ puts "---"
110
+ puts "#{data[0]}"
53
111
  end
54
112
 
55
- desc :subset, ""
56
- def subset csv_file, output, value=10
113
+ desc :subset, "create a subset of the data"
114
+ method_option :line, type: :numeric, aliases: '-l', default: 1000
115
+ def subset(csv_file, output)
57
116
  path = csv_file
58
117
  output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
59
- stop = value
118
+ # options[:num] == nil ? (stop = 10) : (stop = options[:num])
119
+ stop = options[:line]
60
120
  counter = 0
61
121
  CSV.foreach(path, :headers => false, encoding: "ISO8859-1:utf-8") do |row|
62
122
  exit if counter == stop
@@ -70,5 +130,45 @@ module Slicing
70
130
  end
71
131
  end
72
132
 
133
+ # desc :subsetagain, ""
134
+ # def subsetagain csv_file, output, value=10
135
+ # path = csv_file
136
+ # output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory
137
+ # stop = value
138
+ # counter = 0
139
+ # CSV.foreach(path, :headers => false, :row_sep => "\r\n", encoding: "ISO8859-1:utf-8") do |row|
140
+ # exit if counter == stop
141
+ # begin
142
+ # counter = counter + 1
143
+ # CSV.open(output_directory, "a+") do |csv|
144
+ # csv << row
145
+ # end
146
+ # rescue
147
+ # end
148
+ # end
149
+ # end
150
+
151
+ private
152
+
153
+ def process_options headers, rowsep, utf
154
+ if headers == nil
155
+ headers = true
156
+ else
157
+ headers = headers
158
+ end
159
+ return true, "\r\n" , "ISO8859-1:utf-8"
160
+ end
161
+
162
+ def masking(value)
163
+ value != nil ? answer = Digest::MD5.hexdigest(value) : answer
164
+ end
165
+
166
+ def score( array )
167
+ hash = Hash.new(0)
168
+ array.each{|key| hash[key] += 1}
169
+ hash
170
+ end
171
+
172
+
73
173
  end
74
174
  end
@@ -1,3 +1,3 @@
1
1
  module Slicing
2
- VERSION = "0.1.0.pre"
2
+ VERSION = "0.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slicing
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.pre
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Lim
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-24 00:00:00.000000000 Z
11
+ date: 2016-07-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -58,7 +58,6 @@ files:
58
58
  - bin/slicing
59
59
  - lib/slicing.rb
60
60
  - lib/slicing/version.rb
61
- - slicing-0.1.0.gem
62
61
  - slicing.gemspec
63
62
  homepage: http://github.com/ytbryan/slicing
64
63
  licenses:
@@ -75,9 +74,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
75
74
  version: '0'
76
75
  required_rubygems_version: !ruby/object:Gem::Requirement
77
76
  requirements:
78
- - - ">"
77
+ - - ">="
79
78
  - !ruby/object:Gem::Version
80
- version: 1.3.1
79
+ version: '0'
81
80
  requirements: []
82
81
  rubyforge_project:
83
82
  rubygems_version: 2.5.1
Binary file