sycsvpro 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.rspec +1 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +51 -0
- data/LICENSE +20 -0
- data/README.md +188 -0
- data/README.rdoc +44 -0
- data/Rakefile +44 -0
- data/bin/sycsvpro +208 -0
- data/features/step_definitions/sycsvpro_steps.rb +6 -0
- data/features/support/env.rb +15 -0
- data/features/sycsvpro.feature +8 -0
- data/html/Dsl.html +201 -0
- data/html/Object.html +116 -0
- data/html/README_rdoc.html +178 -0
- data/html/Sycsvpro/Analyzer.html +239 -0
- data/html/Sycsvpro/Calculator.html +354 -0
- data/html/Sycsvpro/Collector.html +281 -0
- data/html/Sycsvpro/ColumnFilter.html +165 -0
- data/html/Sycsvpro/Counter.html +397 -0
- data/html/Sycsvpro/Extractor.html +269 -0
- data/html/Sycsvpro/Filter.html +349 -0
- data/html/Sycsvpro/Header.html +228 -0
- data/html/Sycsvpro/Mapper.html +288 -0
- data/html/Sycsvpro/Profiler.html +234 -0
- data/html/Sycsvpro/RowFilter.html +162 -0
- data/html/Sycsvpro.html +141 -0
- data/html/created.rid +17 -0
- data/html/fonts/Lato-Light.ttf +0 -0
- data/html/fonts/Lato-LightItalic.ttf +0 -0
- data/html/fonts/Lato-Regular.ttf +0 -0
- data/html/fonts/Lato-RegularItalic.ttf +0 -0
- data/html/fonts/SourceCodePro-Bold.ttf +0 -0
- data/html/fonts/SourceCodePro-Regular.ttf +0 -0
- data/html/fonts.css +167 -0
- data/html/images/add.png +0 -0
- data/html/images/arrow_up.png +0 -0
- data/html/images/brick.png +0 -0
- data/html/images/brick_link.png +0 -0
- data/html/images/bug.png +0 -0
- data/html/images/bullet_black.png +0 -0
- data/html/images/bullet_toggle_minus.png +0 -0
- data/html/images/bullet_toggle_plus.png +0 -0
- data/html/images/date.png +0 -0
- data/html/images/delete.png +0 -0
- data/html/images/find.png +0 -0
- data/html/images/loadingAnimation.gif +0 -0
- data/html/images/macFFBgHack.png +0 -0
- data/html/images/package.png +0 -0
- data/html/images/page_green.png +0 -0
- data/html/images/page_white_text.png +0 -0
- data/html/images/page_white_width.png +0 -0
- data/html/images/plugin.png +0 -0
- data/html/images/ruby.png +0 -0
- data/html/images/tag_blue.png +0 -0
- data/html/images/tag_green.png +0 -0
- data/html/images/transparent.png +0 -0
- data/html/images/wrench.png +0 -0
- data/html/images/wrench_orange.png +0 -0
- data/html/images/zoom.png +0 -0
- data/html/index.html +202 -0
- data/html/js/darkfish.js +140 -0
- data/html/js/jquery.js +18 -0
- data/html/js/navigation.js +142 -0
- data/html/js/search.js +109 -0
- data/html/js/search_index.js +1 -0
- data/html/js/searcher.js +228 -0
- data/html/rdoc.css +580 -0
- data/html/table_of_contents.html +236 -0
- data/lib/sycsvpro/analyzer.rb +40 -0
- data/lib/sycsvpro/calculator.rb +94 -0
- data/lib/sycsvpro/collector.rb +60 -0
- data/lib/sycsvpro/column_filter.rb +23 -0
- data/lib/sycsvpro/counter.rb +74 -0
- data/lib/sycsvpro/dsl.rb +37 -0
- data/lib/sycsvpro/extractor.rb +39 -0
- data/lib/sycsvpro/filter.rb +98 -0
- data/lib/sycsvpro/header.rb +29 -0
- data/lib/sycsvpro/mapper.rb +53 -0
- data/lib/sycsvpro/profiler.rb +26 -0
- data/lib/sycsvpro/row_filter.rb +20 -0
- data/lib/sycsvpro/version.rb +5 -0
- data/lib/sycsvpro.rb +9 -0
- data/spec/sycsvpro/analyze_spec.rb +23 -0
- data/spec/sycsvpro/calculator_spec.rb +45 -0
- data/spec/sycsvpro/collector_spec.rb +27 -0
- data/spec/sycsvpro/counter_spec.rb +51 -0
- data/spec/sycsvpro/extractor_spec.rb +27 -0
- data/spec/sycsvpro/files/mappings +6 -0
- data/spec/sycsvpro/files/profile.rb +42 -0
- data/spec/sycsvpro/mapper_spec.rb +33 -0
- data/spec/sycsvpro/profiler_spec.rb +32 -0
- data/sycsvpro.gemspec +24 -0
- data/sycsvpro.rdoc +29 -0
- metadata +215 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: c635bce6d7a54564795bf018de60a62a8c1efec2
|
|
4
|
+
data.tar.gz: 6ac41a7ed53e1305632ec183b2f9d10a0d536fac
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7584ba89eb1ba64a658aa610f7d4972a47df95a6acb8d5b8ad90b8b994cf7af43464c480d79a296052c50f990b69115386ac1a447ddba055e370d44e767047ed
|
|
7
|
+
data.tar.gz: 3ae8b59e1a79b4c9decb7de253b32d8df9b3dc4b463b856d6f7e8353169b5336777e2cbfe92bcf7fb7ddaa24d38f1aa52c840924403ba43dae1ec08c64af3469
|
data/.gitignore
ADDED
data/.rspec
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--color
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
sycsvpro (0.0.1)
|
|
5
|
+
gli (= 2.9.0)
|
|
6
|
+
|
|
7
|
+
GEM
|
|
8
|
+
remote: https://rubygems.org/
|
|
9
|
+
specs:
|
|
10
|
+
aruba (0.5.4)
|
|
11
|
+
childprocess (>= 0.3.6)
|
|
12
|
+
cucumber (>= 1.1.1)
|
|
13
|
+
rspec-expectations (>= 2.7.0)
|
|
14
|
+
builder (3.2.2)
|
|
15
|
+
childprocess (0.4.0)
|
|
16
|
+
ffi (~> 1.0, >= 1.0.11)
|
|
17
|
+
cucumber (1.3.10)
|
|
18
|
+
builder (>= 2.1.2)
|
|
19
|
+
diff-lcs (>= 1.1.3)
|
|
20
|
+
gherkin (~> 2.12)
|
|
21
|
+
multi_json (>= 1.7.5, < 2.0)
|
|
22
|
+
multi_test (>= 0.0.2)
|
|
23
|
+
diff-lcs (1.2.5)
|
|
24
|
+
ffi (1.9.3)
|
|
25
|
+
gherkin (2.12.2)
|
|
26
|
+
multi_json (~> 1.3)
|
|
27
|
+
gli (2.9.0)
|
|
28
|
+
json (1.8.1)
|
|
29
|
+
multi_json (1.8.4)
|
|
30
|
+
multi_test (0.0.3)
|
|
31
|
+
rake (10.1.1)
|
|
32
|
+
rdoc (4.1.1)
|
|
33
|
+
json (~> 1.4)
|
|
34
|
+
rspec (2.14.1)
|
|
35
|
+
rspec-core (~> 2.14.0)
|
|
36
|
+
rspec-expectations (~> 2.14.0)
|
|
37
|
+
rspec-mocks (~> 2.14.0)
|
|
38
|
+
rspec-core (2.14.7)
|
|
39
|
+
rspec-expectations (2.14.4)
|
|
40
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
|
41
|
+
rspec-mocks (2.14.4)
|
|
42
|
+
|
|
43
|
+
PLATFORMS
|
|
44
|
+
ruby
|
|
45
|
+
|
|
46
|
+
DEPENDENCIES
|
|
47
|
+
aruba
|
|
48
|
+
rake
|
|
49
|
+
rdoc
|
|
50
|
+
rspec
|
|
51
|
+
sycsvpro!
|
data/LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2014 Pierre Sugar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
7
|
+
the Software without restriction, including without limitation the rights to
|
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
|
10
|
+
subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
syc-svpro
|
|
2
|
+
=========
|
|
3
|
+
|
|
4
|
+
Processing of csv files. *sycsvpro* offers following functions
|
|
5
|
+
|
|
6
|
+
* analyze csv file
|
|
7
|
+
* extract rows and columns from a file
|
|
8
|
+
* collect values of rows and assign them to categories
|
|
9
|
+
* map column values to new values
|
|
10
|
+
* count values in columns and use the value as column name
|
|
11
|
+
* arithmetic operations on values of columns
|
|
12
|
+
* execute a ruby script file that operates a csv file
|
|
13
|
+
|
|
14
|
+
To get help type
|
|
15
|
+
|
|
16
|
+
$ sycsvpro -h
|
|
17
|
+
|
|
18
|
+
In the following examples we assume the following file
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
customer;machine;control;drive;motor;date;contract
|
|
22
|
+
hello;h1;con123;dri120;mot100;1.12.3013;1
|
|
23
|
+
hello;h2;con123;dri130;mot110;1.12.3013;1
|
|
24
|
+
indix;i1;con456;dri130;mot090;1.12.3013;1
|
|
25
|
+
chiro;c1;con333;dri110;mot100;1.12.3013;1
|
|
26
|
+
chiro;c2;con331;dri100;mot130;1.12.3013;1
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Analyze
|
|
30
|
+
-------
|
|
31
|
+
|
|
32
|
+
Analyze the content of the provided file *in.csv*
|
|
33
|
+
|
|
34
|
+
$ sycsvpro -f in.csv analyze
|
|
35
|
+
Analysis of in.csv
|
|
36
|
+
7 columns: ["customer", "machine", "control", "drive", "motor", "date", "contract"]
|
|
37
|
+
10393 rows
|
|
38
|
+
0: customer
|
|
39
|
+
1: machine
|
|
40
|
+
2: control
|
|
41
|
+
3: drive
|
|
42
|
+
4: motor
|
|
43
|
+
5: date
|
|
44
|
+
6: contract
|
|
45
|
+
Row sample data:
|
|
46
|
+
hello;h1;con123;dri120;mot100;16.02.2014;1
|
|
47
|
+
|
|
48
|
+
Extract
|
|
49
|
+
-------
|
|
50
|
+
|
|
51
|
+
Extract row 1,2 and 10-20 as well as columns 4 and 6-7
|
|
52
|
+
|
|
53
|
+
$ sycsvpro -f in.csv -o out.csv extract -r 1,2,10-20 -c 4,6-7
|
|
54
|
+
|
|
55
|
+
Collect
|
|
56
|
+
-------
|
|
57
|
+
|
|
58
|
+
Collect all product rows (2, 3 and 4) to the category product
|
|
59
|
+
|
|
60
|
+
$ sycsvpro -f in.csv -o out.csv collect -r 2-20 -c products:2-4
|
|
61
|
+
$ cat out.csv
|
|
62
|
+
[products]
|
|
63
|
+
con123
|
|
64
|
+
con331
|
|
65
|
+
con333
|
|
66
|
+
con456
|
|
67
|
+
dri100
|
|
68
|
+
dri110
|
|
69
|
+
dri120
|
|
70
|
+
dri130
|
|
71
|
+
mot090
|
|
72
|
+
mot100
|
|
73
|
+
mot110
|
|
74
|
+
mot130
|
|
75
|
+
|
|
76
|
+
Map
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
Map the product names to new names
|
|
80
|
+
|
|
81
|
+
The mapping file (mapping) uses the result from the collect command above
|
|
82
|
+
|
|
83
|
+
con123:control123
|
|
84
|
+
con331:control331
|
|
85
|
+
con333:control333
|
|
86
|
+
con456:control456
|
|
87
|
+
dri100:drive100
|
|
88
|
+
dri110:drive110
|
|
89
|
+
dri120:drive120
|
|
90
|
+
dri130:drive130
|
|
91
|
+
mot090:motor090
|
|
92
|
+
mot100:motor100
|
|
93
|
+
mot110:motor110
|
|
94
|
+
mot130:motor130
|
|
95
|
+
|
|
96
|
+
$ sycsvpro -f in.csv -o out.csv map mapping -c 2-4
|
|
97
|
+
|
|
98
|
+
Count
|
|
99
|
+
-----
|
|
100
|
+
|
|
101
|
+
Count all customers (key column) in rows 2 to 20 that have machines that start with *h* and have contract valid beginning after 1.1.2000
|
|
102
|
+
|
|
103
|
+
$ sycsvpro -f in.csv -o out.csv count -r 2-20 -k 0 -c 1:/^h/,5:">1.1.2000" --df "%d.%m.%Y"
|
|
104
|
+
|
|
105
|
+
The result in file out.csv is
|
|
106
|
+
|
|
107
|
+
$ cat out.csv
|
|
108
|
+
customer;>1.1.2000;^h
|
|
109
|
+
hello;2;2
|
|
110
|
+
indix;1;0
|
|
111
|
+
chiro;2;0
|
|
112
|
+
|
|
113
|
+
Calc
|
|
114
|
+
----
|
|
115
|
+
|
|
116
|
+
Process arithmetic operations on the contract count and create a target column
|
|
117
|
+
|
|
118
|
+
$ sycsvpro -f in.csv -o out.csv calc -r 2-20 -h *,target -c 6:*2,7:target=c6*10
|
|
119
|
+
|
|
120
|
+
$ cat out.csv
|
|
121
|
+
customer;machine;control;drive;motor;date;contract;target
|
|
122
|
+
hello;h1;con123;dri120;mot100;1.12.3013;2;20
|
|
123
|
+
hello;h2;con123;dri130;mot110;1.12.3013;2;20
|
|
124
|
+
indix;i1;con456;dri130;mot090;1.12.3013;2;20
|
|
125
|
+
chiro;c1;con333;dri110;mot100;1.12.3013;2;20
|
|
126
|
+
chiro;c2;con331;dri100;mot130;1.12.3013;2;20
|
|
127
|
+
|
|
128
|
+
Execute
|
|
129
|
+
-------
|
|
130
|
+
|
|
131
|
+
Execute takes a Ruby script file as an argument and processes the script. The following command executes the script *script.rb* and invokes the method *calc*
|
|
132
|
+
|
|
133
|
+
$ sycsvpro execute ./script.rb calc
|
|
134
|
+
|
|
135
|
+
Below is an example script file that is ultimately doing the same as the count command
|
|
136
|
+
|
|
137
|
+
$ sycsvpro -f in.csv -o out.csv count -r 1-20 -k 0 -c 4,5
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
def calc
|
|
141
|
+
|
|
142
|
+
customers = {}
|
|
143
|
+
heading = []
|
|
144
|
+
|
|
145
|
+
rows infile: "./spec/sycsvpro/files/in.csv",
|
|
146
|
+
row_filter: "1-20",
|
|
147
|
+
key_column: 0,
|
|
148
|
+
machine_column: 3,
|
|
149
|
+
data_columns: [4,5] do |key, machine, columns|
|
|
150
|
+
customer = customers[key] || customers[key] = { name: key, products: Hash.new(0) }
|
|
151
|
+
columns.each do |column|
|
|
152
|
+
heading << column if heading.index(column).nil?
|
|
153
|
+
customer[:products][column] += 1
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
write_to "./spec/sycsvpro/files/out.csv" do |out|
|
|
158
|
+
out.puts (["customer"] + heading.sort).join(';')
|
|
159
|
+
customers.each do |k,v|
|
|
160
|
+
line = [k]
|
|
161
|
+
heading.sort.each do |h|
|
|
162
|
+
line << v[:products][h]
|
|
163
|
+
end
|
|
164
|
+
out.puts line.join(';')
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
*rows* and *write_to* are convenience methods provided by sycsvpro that can be used in script files to operate on files.
|
|
171
|
+
|
|
172
|
+
*rows* will return values at the specified columns in the order they are provided in the call to
|
|
173
|
+
rows. The columns to be returned in the block have to end with _column_ or _columns_ dependent if a value or an array should be returned. You can find the *rows* and *write_to* methods at _lib/sycsvpro/dsl.rb_.
|
|
174
|
+
|
|
175
|
+
Working with sycsvpro
|
|
176
|
+
=====================
|
|
177
|
+
|
|
178
|
+
sycsvpro emerged from my daily work when cleaning and anaylzing data. If you want to dig deeper I would recommend [R](http://www.r-project.org/).
|
|
179
|
+
|
|
180
|
+
A work flow could be as follows
|
|
181
|
+
|
|
182
|
+
* Analyze the file `analyze`
|
|
183
|
+
* Clean the data `map`
|
|
184
|
+
* Extract rows and columns of interest `extract`
|
|
185
|
+
* Count values `count`
|
|
186
|
+
* Do arithmetic operations on the values `calc`
|
|
187
|
+
|
|
188
|
+
When I have analyzed the data I use _Microsoft Excel_ or _LibreOffice Calc_ to create nice graphs. To create more sophisiticated analysis *R* is the right tool to use.
|
data/README.rdoc
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
= sycsvpro
|
|
2
|
+
|
|
3
|
+
Author:: Pierre Sugar (mailto:pierre@sugaryourcoffee.de)
|
|
4
|
+
Copyright:: Copyright (c) 2014 by Pierre Sugar
|
|
5
|
+
License:: Distributed uder the MIT license, see LICENSE in the source distro
|
|
6
|
+
|
|
7
|
+
The application provides an interface for analyzing, cleaning and operating on csv files
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
== Install
|
|
11
|
+
|
|
12
|
+
Install:
|
|
13
|
+
|
|
14
|
+
gem install sycsvpro
|
|
15
|
+
|
|
16
|
+
== Use
|
|
17
|
+
|
|
18
|
+
sycsvpro --help
|
|
19
|
+
|
|
20
|
+
== Developing for `sycsvpro`
|
|
21
|
+
|
|
22
|
+
First install bundler
|
|
23
|
+
|
|
24
|
+
gem install bundler
|
|
25
|
+
|
|
26
|
+
Get the development dependencies
|
|
27
|
+
|
|
28
|
+
bundle install
|
|
29
|
+
|
|
30
|
+
The code is in
|
|
31
|
+
|
|
32
|
+
lib/sycsvpro
|
|
33
|
+
bin
|
|
34
|
+
|
|
35
|
+
Tests are in
|
|
36
|
+
|
|
37
|
+
spec/sycsvpro
|
|
38
|
+
|
|
39
|
+
Test files are in
|
|
40
|
+
|
|
41
|
+
spec/sycsvpro/files
|
|
42
|
+
|
|
43
|
+
:include:sycsvpro.rdoc
|
|
44
|
+
|
data/Rakefile
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
require 'rake/clean'
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'rubygems/package_task'
|
|
4
|
+
require 'rdoc/task'
|
|
5
|
+
require 'cucumber'
|
|
6
|
+
require 'cucumber/rake/task'
|
|
7
|
+
Rake::RDocTask.new do |rd|
|
|
8
|
+
rd.main = "README.rdoc"
|
|
9
|
+
rd.rdoc_files.include("README.rdoc","lib/**/*.rb","bin/**/*")
|
|
10
|
+
rd.title = 'Your application title'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
spec = eval(File.read('sycsvpro.gemspec'))
|
|
14
|
+
|
|
15
|
+
Gem::PackageTask.new(spec) do |pkg|
|
|
16
|
+
end
|
|
17
|
+
CUKE_RESULTS = 'results.html'
|
|
18
|
+
CLEAN << CUKE_RESULTS
|
|
19
|
+
desc 'Run features'
|
|
20
|
+
Cucumber::Rake::Task.new(:features) do |t|
|
|
21
|
+
opts = "features --format html -o #{CUKE_RESULTS} --format progress -x"
|
|
22
|
+
opts += " --tags #{ENV['TAGS']}" if ENV['TAGS']
|
|
23
|
+
t.cucumber_opts = opts
|
|
24
|
+
t.fork = false
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
desc 'Run features tagged as work-in-progress (@wip)'
|
|
28
|
+
Cucumber::Rake::Task.new('features:wip') do |t|
|
|
29
|
+
tag_opts = ' --tags ~@pending'
|
|
30
|
+
tag_opts = ' --tags @wip'
|
|
31
|
+
t.cucumber_opts = "features --format html -o #{CUKE_RESULTS} --format pretty -x -s#{tag_opts}"
|
|
32
|
+
t.fork = false
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
task :cucumber => :features
|
|
36
|
+
task 'cucumber:wip' => 'features:wip'
|
|
37
|
+
task :wip => 'features:wip'
|
|
38
|
+
require 'rake/testtask'
|
|
39
|
+
Rake::TestTask.new do |t|
|
|
40
|
+
t.libs << "test"
|
|
41
|
+
t.test_files = FileList['test/*_test.rb']
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
task :default => [:test,:features]
|
data/bin/sycsvpro
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'gli'
|
|
3
|
+
begin # XXX: Remove this begin/rescue before distributing your app
|
|
4
|
+
require 'sycsvpro'
|
|
5
|
+
rescue LoadError
|
|
6
|
+
STDERR.puts "In development, you need to use `bundle exec bin/sycsvpro` to run your app"
|
|
7
|
+
STDERR.puts "At install-time, RubyGems will make sure lib, etc. are in the load path"
|
|
8
|
+
STDERR.puts "Feel free to remove this message from bin/sycsvpro now"
|
|
9
|
+
exit 64
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
include GLI::App
|
|
13
|
+
|
|
14
|
+
program_desc 'Processing CSV files'
|
|
15
|
+
|
|
16
|
+
version Sycsvpro::VERSION
|
|
17
|
+
|
|
18
|
+
desc 'CSV file to operate on'
|
|
19
|
+
arg_name 'FILE'
|
|
20
|
+
flag [:f,:file]
|
|
21
|
+
|
|
22
|
+
desc 'CSV file to write the result to'
|
|
23
|
+
arg_name 'OUT_FILE'
|
|
24
|
+
flag [:o, :out]
|
|
25
|
+
|
|
26
|
+
desc 'Analyze the CSV file regarding columns, rows and content'
|
|
27
|
+
command :analyze do |c|
|
|
28
|
+
|
|
29
|
+
c.action do |global_options,options,args|
|
|
30
|
+
help_now! "You need to provide a file to analyze '-f FILE'" if global_options[:f].nil?
|
|
31
|
+
analyzer = Sycsvpro::Analyzer.new(global_options[:f])
|
|
32
|
+
result = analyzer.result
|
|
33
|
+
puts "Analysis of #{global_options[:f]}"
|
|
34
|
+
puts "#{result.col_count} columns: #{result.cols}"
|
|
35
|
+
puts "#{result.row_count} rows"
|
|
36
|
+
result.cols.each_with_index do |col, index|
|
|
37
|
+
puts "#{index}: #{col}"
|
|
38
|
+
end
|
|
39
|
+
puts "Row sample data:"
|
|
40
|
+
puts "#{result.sample_row}"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
desc 'Extract specified rows and columns from the file'
|
|
45
|
+
command :extract do |c|
|
|
46
|
+
c.desc 'Rows to extract'
|
|
47
|
+
c.arg_name '1,2,10-30|REGEXP'
|
|
48
|
+
c.flag [:r, :row], :must_match => /\d+(?:,\d+|-\d+|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/
|
|
49
|
+
|
|
50
|
+
c.desc 'Columns to extract'
|
|
51
|
+
c.arg_name '1,2,10-30'
|
|
52
|
+
c.flag [:c, :col], :must_match => /\d+(?:,\d+|-\d+)*/
|
|
53
|
+
|
|
54
|
+
c.action do |global_options,options,args|
|
|
55
|
+
help_now! "You need to provide a file to extract data from '-f FILE'" if global_options[:f].nil?
|
|
56
|
+
help_now! "You need to provide a result file '-o OUT_FILE'" if global_options[:o].nil?
|
|
57
|
+
|
|
58
|
+
puts "Extracting ..."
|
|
59
|
+
extractor = Sycsvpro::Extractor.new(infile: global_options[:f], outfile: global_options[:o],
|
|
60
|
+
rows: options[:r], cols: options[:c])
|
|
61
|
+
extractor.execute
|
|
62
|
+
puts "extract done"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
desc 'Collect values of specified rows and columns from the file and group them in categories'
|
|
67
|
+
command :collect do |c|
|
|
68
|
+
|
|
69
|
+
c.desc 'Rows to consider for collection'
|
|
70
|
+
c.arg_name 'ROW1,ROW2,ROW10-ROW30|REGEXP'
|
|
71
|
+
c.flag [:r, :row], :must_match => /\d+(?:,\d+|-\d+|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/
|
|
72
|
+
|
|
73
|
+
c.desc 'Columns to collect values from'
|
|
74
|
+
c.arg_name 'CATEGORY1:ROW1,ROW2,ROW10-ROW30+CATEGORY2:ROW3-ROW9'
|
|
75
|
+
c.flag [:c, :col], :must_match => /^\w*:\d+(?:,\d+|-\d+|\+\w*:\d+(?:,\d+|-\d+)*)*/
|
|
76
|
+
|
|
77
|
+
c.action do |global_options,options,args|
|
|
78
|
+
help_now! "You need to provide a file to collect data from '-f FILE'" if global_options[:f].nil?
|
|
79
|
+
help_now! "You need to provide a result file '-o OUT_FILE'" if global_options[:o].nil?
|
|
80
|
+
|
|
81
|
+
puts "Collecting ..."
|
|
82
|
+
collector = Sycsvpro::Collector.new(infile: global_options[:f], outfile: global_options[:o],
|
|
83
|
+
rows: options[:r], cols: options[:c])
|
|
84
|
+
collector.execute
|
|
85
|
+
puts "collect done"
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
desc 'Executes the code provided in a file'
|
|
90
|
+
arg_name 'PRO_FILE METHOD'
|
|
91
|
+
command :execute do |c|
|
|
92
|
+
c.action do |global_options,options,args|
|
|
93
|
+
help_now! "You need to provide a script FILE and a METHOD to call" if args.size < 2
|
|
94
|
+
profiler = Sycsvpro::Profiler.new(args[0])
|
|
95
|
+
profiler.execute(args[1])
|
|
96
|
+
puts "execute done"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
desc 'Counts the occurences of column values. Uses column values as headings with count as ' +
|
|
101
|
+
'values. Columns with a condition will be added as new columns and the condition will ' +
|
|
102
|
+
'be set as column name'
|
|
103
|
+
|
|
104
|
+
command :count do |c|
|
|
105
|
+
|
|
106
|
+
c.desc 'Key column that is assigned the count of column values to'
|
|
107
|
+
c.arg_name 'KEY_COLUMN'
|
|
108
|
+
c.flag [:k, :key], :must_match => /^\d+/
|
|
109
|
+
|
|
110
|
+
c.desc 'Rows to consider'
|
|
111
|
+
c.arg_name '1,2,10-30|REGEXP'
|
|
112
|
+
c.flag [:r, :row], :must_match => /\d+(?:,\d+|-\d+|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/
|
|
113
|
+
|
|
114
|
+
c.desc 'Columns to count where column 2 is counted conditionally'
|
|
115
|
+
c.arg_name '1,2:<14.2.2014,10-30'
|
|
116
|
+
c.flag [:c, :col], :must_match => /^\d+(?:,\d+|(?::[<=>]\d+.\d+.\d+|:\d+.\d+.\d+-\d+.\d+.\d+|:\/.*?\/|-\d+)*)*/
|
|
117
|
+
|
|
118
|
+
c.desc 'Format of date values'
|
|
119
|
+
c.arg_name '%d.%m.%Y|%m/%d/%Y|...'
|
|
120
|
+
c.flag [:df]
|
|
121
|
+
|
|
122
|
+
c.action do |global_options,options,args|
|
|
123
|
+
help_now! "You need to provide a file to count data from '-f FILE'" if global_options[:f].nil?
|
|
124
|
+
help_now! "You need to provide a result file '-o OUT_FILE'" if global_options[:o].nil?
|
|
125
|
+
|
|
126
|
+
counter = Sycsvpro::Counter.new(infile: global_options[:f], outfile: global_options[:o],
|
|
127
|
+
key: options[:k], rows: options[:r], cols: options[:c],
|
|
128
|
+
df: options[:df])
|
|
129
|
+
counter.execute
|
|
130
|
+
puts "count done"
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
desc 'Map values in columns to new values'
|
|
135
|
+
arg_name 'MAPPINGS-FILE'
|
|
136
|
+
command :map do |c|
|
|
137
|
+
c.desc 'Rows to consider'
|
|
138
|
+
c.arg_name 'ROW1,ROW2,ROW10-ROW30|REGEXP'
|
|
139
|
+
c.flag [:r, :row], :must_match => /\d+(?:,\d+|-\d+|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/
|
|
140
|
+
|
|
141
|
+
c.desc 'Columns to consider for mapping'
|
|
142
|
+
c.arg_name 'COL1,COL2,COL10-COL30'
|
|
143
|
+
c.flag [:c, :col], :must_match => /\d+(?:,\d+|-\d+)*/
|
|
144
|
+
|
|
145
|
+
c.action do |global_options,options,args|
|
|
146
|
+
help_now! "You need to provide a file to map data from '-f FILE'" if global_options[:f].nil?
|
|
147
|
+
help_now! "You need to provide a result file '-o OUT_FILE'" if global_options[:o].nil?
|
|
148
|
+
help_now! "You need to provide a mapping file" if args.size == 0
|
|
149
|
+
|
|
150
|
+
mapper = Sycsvpro::Mapper.new(infile: global_options[:f], outfile: global_options[:o],
|
|
151
|
+
mapping: args[0], rows: options[:r], cols: options[:c])
|
|
152
|
+
mapper.execute
|
|
153
|
+
puts "mapping done"
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
desc 'Process math operations on columns'
|
|
158
|
+
command :calc do |c|
|
|
159
|
+
|
|
160
|
+
c.desc 'The first non-empty column is considered the header. '+
|
|
161
|
+
'If additional columns are created then *,COL1,COL2 will create the additional header '+
|
|
162
|
+
'columns COL1 and COL2'
|
|
163
|
+
c.arg_name '*,COL2,COL2'
|
|
164
|
+
default_value '*'
|
|
165
|
+
c.flag [:h, :header], :must_match => /\*(?:,\w+)*/
|
|
166
|
+
|
|
167
|
+
c.desc 'Columns to consider for calculations'
|
|
168
|
+
c.arg_name 'ROW1,ROW2-ROW10|REGEXP'
|
|
169
|
+
c.flag [:r, :row], :must_match => /\d+(?:,\d+|-\d+|,\/.*\/)*|\/.*\/(?:,\/.*\/|\d+)*/
|
|
170
|
+
|
|
171
|
+
c.desc 'Column to do calculations on'
|
|
172
|
+
c.arg_name 'COL1:*2,COL2:-C3,COL3:*2+(4+C5),COL6:NEW_COL=C1+5'
|
|
173
|
+
c.flag [:c, :col], :must_match => /\d+:(?:[\*\/\+\-]|\w+=[\d|(]*)[\*\/\+\-\dc()]*(?:,\d+:(?:[\*\/\+\-]|\w+=[\d|(]*)[\*\/\+\-\dc()]*)*/
|
|
174
|
+
|
|
175
|
+
c.action do |global_options,options,args|
|
|
176
|
+
help_now! "You need to provide a file to calculate data at '-f FILE'" if global_options[:f].nil?
|
|
177
|
+
help_now! "You need to provide a result file '-o OUT_FILE'" if global_options[:o].nil?
|
|
178
|
+
help_now! "You need to provide the column flag" if options[:c].nil?
|
|
179
|
+
|
|
180
|
+
calculator = Sycsvpro::Calculator.new(infile: global_options[:f], outfile: global_options[:o],
|
|
181
|
+
header: options[:h], rows: options[:r], cols: options[:c])
|
|
182
|
+
calculator.execute
|
|
183
|
+
puts "calc done"
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
pre do |global,command,options,args|
|
|
188
|
+
# Pre logic here
|
|
189
|
+
# Return true to proceed; false to abort and not call the
|
|
190
|
+
# chosen command
|
|
191
|
+
# Use skips_pre before a command to skip this block
|
|
192
|
+
# on that command only
|
|
193
|
+
true
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
post do |global,command,options,args|
|
|
197
|
+
# Post logic here
|
|
198
|
+
# Use skips_post before a command to skip this
|
|
199
|
+
# block on that command only
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
on_error do |exception|
|
|
203
|
+
# Error logic here
|
|
204
|
+
# return false to skip default error handling
|
|
205
|
+
true
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
exit run(ARGV)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
require 'aruba/cucumber'
|
|
2
|
+
|
|
3
|
+
ENV['PATH'] = "#{File.expand_path(File.dirname(__FILE__) + '/../../bin')}#{File::PATH_SEPARATOR}#{ENV['PATH']}"
|
|
4
|
+
LIB_DIR = File.join(File.expand_path(File.dirname(__FILE__)),'..','..','lib')
|
|
5
|
+
|
|
6
|
+
Before do
|
|
7
|
+
# Using "announce" causes massive warnings on 1.9.2
|
|
8
|
+
@puts = true
|
|
9
|
+
@original_rubylib = ENV['RUBYLIB']
|
|
10
|
+
ENV['RUBYLIB'] = LIB_DIR + File::PATH_SEPARATOR + ENV['RUBYLIB'].to_s
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
After do
|
|
14
|
+
ENV['RUBYLIB'] = @original_rubylib
|
|
15
|
+
end
|