mode 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +63 -9
- data/lib/mode.rb +9 -3
- data/lib/mode/cli.rb +2 -1
- data/lib/mode/cli/analyze.rb +3 -3
- data/lib/mode/cli/package.rb +9 -6
- data/lib/mode/commands/analyze_schema.rb +5 -5
- data/lib/mode/commands/package.rb +13 -17
- data/lib/mode/package_builder.rb +57 -0
- data/lib/mode/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6bfab7e5e64907e822459c3d0750038e99ea0ce7
|
4
|
+
data.tar.gz: 6dfeb3b3138921cf3b95439d3b5d7775cee76025
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e69bfae6c5ba1335c3755c376871e2406a1e1615427d5578913fd41799c9808d606034727691bf87f71b6affe0c82a137f6e665e52e32c640256c12e4c6b7c18
|
7
|
+
data.tar.gz: 38a4242c9208533c02730ce09382810104b00769c9eec976f137de5a1accb5e7f2f53d55a1f692c154b9ad52be591efa81c027b6638bc0f2cd2302081023cbb7
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -6,23 +6,77 @@ This package provides command line tools for managing datasets and connecting da
|
|
6
6
|
* Personal data warehouse connectivity
|
7
7
|
* Dataset formatting and importing (CSV)
|
8
8
|
|
9
|
-
##
|
9
|
+
## Prerequisites
|
10
10
|
|
11
|
-
|
11
|
+
This package requires at least Ruby 1.9 and Ruby 2.0 is recommended.
|
12
12
|
|
13
|
-
|
13
|
+
If you don't have an up to date version of Ruby or you're not sure then use the instructions below to get going.
|
14
14
|
|
15
|
-
|
15
|
+
### Mac OSX
|
16
16
|
|
17
|
-
|
17
|
+
To install Ruby on OSX complete the 4 steps below.
|
18
18
|
|
19
|
-
|
19
|
+
1\. Install Homebrew
|
20
20
|
|
21
|
-
|
21
|
+
```
|
22
|
+
ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
|
23
|
+
```
|
24
|
+
|
25
|
+
2\. Install rbenv
|
26
|
+
|
27
|
+
```
|
28
|
+
brew update
|
29
|
+
brew install rbenv ruby-build
|
30
|
+
echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
|
31
|
+
source ~/.bash_profile
|
32
|
+
```
|
33
|
+
|
34
|
+
3\. Install Ruby 2.0
|
35
|
+
|
36
|
+
Note: This usually takes several minutes
|
37
|
+
|
38
|
+
```
|
39
|
+
rbenv install 2.0.0-p353
|
40
|
+
rbenv global 2.0.0-p353
|
41
|
+
rbenv rehash
|
42
|
+
```
|
43
|
+
|
44
|
+
4\. Install the mode gem
|
45
|
+
|
46
|
+
```
|
47
|
+
gem install mode
|
48
|
+
rbenv rehash
|
49
|
+
```
|
50
|
+
|
51
|
+
### OSX Combined
|
52
|
+
|
53
|
+
For convenience you can just copy and paste all the lines at once into your terminal
|
54
|
+
|
55
|
+
```
|
56
|
+
ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
|
57
|
+
brew update
|
58
|
+
brew install rbenv ruby-build
|
59
|
+
echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
|
60
|
+
source ~/.bash_profile
|
61
|
+
rbenv install 2.0.0-p353
|
62
|
+
rbenv global 2.0.0-p353
|
63
|
+
gem install mode
|
64
|
+
rbenv rehash
|
65
|
+
```
|
22
66
|
|
23
|
-
|
67
|
+
### Windows
|
24
68
|
|
25
|
-
|
69
|
+
To install a current version of Ruby on windows complete the 2 steps below.
|
70
|
+
|
71
|
+
1\. Install Ruby 2.0
|
72
|
+
|
73
|
+
You can download the latest ruby version from [RubyInstaller](http://rubyinstaller.org/downloads/).
|
74
|
+
|
75
|
+
2\. Install the mode gem
|
76
|
+
|
77
|
+
```
|
78
|
+
$ gem install mode
|
79
|
+
```
|
26
80
|
|
27
81
|
## Setup
|
28
82
|
|
data/lib/mode.rb
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
require 'thor'
|
2
2
|
require 'data_kit'
|
3
|
+
require 'data_package'
|
3
4
|
|
4
|
-
|
5
|
-
|
5
|
+
require 'mode/version'
|
6
|
+
|
7
|
+
# Config
|
6
8
|
require 'mode/config'
|
7
9
|
|
8
10
|
# Commands
|
9
11
|
require 'mode/commands/helpers'
|
10
12
|
require 'mode/commands/setup'
|
11
13
|
require 'mode/commands/import'
|
14
|
+
require 'mode/commands/package'
|
12
15
|
require 'mode/commands/analyze_field'
|
13
|
-
require 'mode/commands/analyze_schema'
|
16
|
+
require 'mode/commands/analyze_schema'
|
17
|
+
|
18
|
+
# Utilities
|
19
|
+
require 'mode/package_builder'
|
data/lib/mode/cli.rb
CHANGED
data/lib/mode/cli/analyze.rb
CHANGED
@@ -2,9 +2,9 @@ module Mode
|
|
2
2
|
module CLI
|
3
3
|
class Base < Thor
|
4
4
|
desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyzes a dataset or a field "
|
5
|
-
option :sample, :banner => 'RATE
|
6
|
-
option :field, :banner => 'POSITION
|
7
|
-
option :match_type, :banner => 'TYPE
|
5
|
+
option :sample, :banner => 'RATE', :desc => "Proportion of rows to inspect. Example: 0.5"
|
6
|
+
option :field, :banner => 'POSITION', :desc => "Field positions begin at 0. Example: 10"
|
7
|
+
option :match_type, :banner => 'TYPE', :desc => "One of the following: string, number, integer, datetime, boolean"
|
8
8
|
# option :keys, :banner => 'POSITIONS (ex: 0,2)', :default => String.new
|
9
9
|
def analyze(path = nil)
|
10
10
|
#keys = options[:keys].split(',').collect(&:strip).collect(&:to_i)
|
data/lib/mode/cli/package.rb
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
module Mode
|
2
2
|
module CLI
|
3
3
|
class Base < Thor
|
4
|
-
desc "package
|
5
|
-
option :
|
6
|
-
option :
|
7
|
-
def package(
|
8
|
-
|
9
|
-
|
4
|
+
desc "package SOURCE PACKAGEPATH [--name=NAME] [--resource-name=NAME]", "Creates a new data package from a csv file"
|
5
|
+
option :name, :desc => 'The name of the package'
|
6
|
+
option :resource_name, :default => 'data', :desc => 'The name of the resource in package'
|
7
|
+
def package(source, dest)
|
8
|
+
parts = dest.split(File::Separator)
|
9
|
+
|
10
|
+
name = options[:name] || parts.last
|
11
|
+
package_path = parts.length == 1 ? parts.first : parts[0, parts.length - 1]
|
12
|
+
Mode::Commands::Package.new(source, package_path, name, options[:resource_name]).execute
|
10
13
|
end
|
11
14
|
end
|
12
15
|
end
|
@@ -22,10 +22,10 @@ module Mode
|
|
22
22
|
|
23
23
|
csv = DataKit::CSV::Parser.new(path)
|
24
24
|
|
25
|
-
say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 *
|
25
|
+
say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
|
26
26
|
|
27
27
|
analysis, total_time = timer_block do
|
28
|
-
DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate =>
|
28
|
+
DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
|
29
29
|
end
|
30
30
|
|
31
31
|
puts analysis.use_type_hints
|
@@ -67,7 +67,7 @@ module Mode
|
|
67
67
|
elsif analysis.has_only_numeric_types?(field_name)
|
68
68
|
row << field_type
|
69
69
|
else
|
70
|
-
row << '** ' + field_type
|
70
|
+
row << '** ' + field_type.to_s
|
71
71
|
end
|
72
72
|
|
73
73
|
DataKit::Dataset::Field::Types.each do |type|
|
@@ -81,9 +81,9 @@ module Mode
|
|
81
81
|
say table
|
82
82
|
end
|
83
83
|
|
84
|
-
def
|
84
|
+
def sampling_rate
|
85
85
|
file_size = File.size(path)
|
86
|
-
options[:sample]
|
86
|
+
(options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
|
87
87
|
end
|
88
88
|
end
|
89
89
|
end
|
@@ -4,15 +4,15 @@ module Mode
|
|
4
4
|
module Commands
|
5
5
|
class Package < Thor
|
6
6
|
attr_accessor :data
|
7
|
-
attr_accessor :
|
8
|
-
attr_accessor :
|
9
|
-
attr_accessor :
|
7
|
+
attr_accessor :base_path
|
8
|
+
attr_accessor :package_name
|
9
|
+
attr_accessor :resource_name
|
10
10
|
|
11
|
-
def initialize(data,
|
11
|
+
def initialize(data, base_path, package_name, resource_name)
|
12
12
|
@data = data
|
13
|
-
@
|
14
|
-
@
|
15
|
-
@
|
13
|
+
@base_path = base_path
|
14
|
+
@package_name = package_name
|
15
|
+
@resource_name = resource_name
|
16
16
|
end
|
17
17
|
|
18
18
|
no_commands do
|
@@ -22,20 +22,20 @@ module Mode
|
|
22
22
|
return
|
23
23
|
end
|
24
24
|
|
25
|
-
unless valid_name?(
|
25
|
+
unless valid_name?(package_name)
|
26
26
|
error "Error: Data package names can only contain letters, numbers, hyphens and underscores"
|
27
27
|
return
|
28
28
|
end
|
29
29
|
|
30
|
-
if
|
31
|
-
error "Error: A data package already exists at #{
|
30
|
+
if DataPackage::Package.exist?(base_path)
|
31
|
+
error "Error: A data package already exists at #{base_path}"
|
32
32
|
return
|
33
33
|
else
|
34
|
-
FileUtils.mkdir_p(
|
34
|
+
FileUtils.mkdir_p(File.join(base_path, package_name))
|
35
35
|
end
|
36
36
|
|
37
|
-
csv =
|
38
|
-
package = Mode::
|
37
|
+
csv = DataKit::CSV::Parser.new(data)
|
38
|
+
package = Mode::PackageBuilder.new(csv, base_path, package_name, resource_name).execute
|
39
39
|
|
40
40
|
say "Finished packaging #{name}!"
|
41
41
|
say "Use `mode table create <account>/<tablename> #{name}` to create a table in the public data warehouse"
|
@@ -51,10 +51,6 @@ module Mode
|
|
51
51
|
def valid_data?(data)
|
52
52
|
!data.nil? && File.exist?(data)
|
53
53
|
end
|
54
|
-
|
55
|
-
def sample_rate
|
56
|
-
Mode::CSV::Analyzer.sample_rate(File.size(data))
|
57
|
-
end
|
58
54
|
end
|
59
55
|
end
|
60
56
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Mode
|
2
|
+
class PackageBuilder
|
3
|
+
attr_accessor :data
|
4
|
+
attr_accessor :base_path
|
5
|
+
attr_accessor :package_name
|
6
|
+
attr_accessor :resource_name
|
7
|
+
|
8
|
+
def initialize(data, base_path, package_name, resource_name)
|
9
|
+
@data = data
|
10
|
+
@base_path = base_path
|
11
|
+
@package_name = package_name
|
12
|
+
@resource_name = resource_name
|
13
|
+
end
|
14
|
+
|
15
|
+
def execute
|
16
|
+
package = init_package
|
17
|
+
analysis = build_analysis(data)
|
18
|
+
converter = convert_data(data, analysis, package)
|
19
|
+
package.resources << build_resource(analysis, package, resource_name, "#{resource_name}.csv")
|
20
|
+
package.dump && package
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def valid_name?(name)
|
26
|
+
name =~ /\A[\w\d\-\_]+\z/
|
27
|
+
end
|
28
|
+
|
29
|
+
def md5hash(path)
|
30
|
+
Digest::MD5.hexdigest(File.read(data_path))
|
31
|
+
end
|
32
|
+
|
33
|
+
def init_package
|
34
|
+
DataPackage::Package.init(base_path, package_name)
|
35
|
+
end
|
36
|
+
|
37
|
+
def build_analysis(data)
|
38
|
+
DataKit::CSV::Analyzer.analyze(data, :sampling_rate => 1)
|
39
|
+
end
|
40
|
+
|
41
|
+
def convert_data(data, analysis, package)
|
42
|
+
# say "Converting data into standard format..."
|
43
|
+
dest_path = File.join(package.base_path, "#{resource_name}.csv")
|
44
|
+
converter = DataKit::CSV::Converter.convert(data, analysis, dest_path)
|
45
|
+
end
|
46
|
+
|
47
|
+
def build_resource(analysis, package, name)
|
48
|
+
Mode::Package::Resource.build(
|
49
|
+
analysis.schema.to_package, path, {
|
50
|
+
'name' => name, 'format' => 'csv',
|
51
|
+
'dialect' => Mode::Package::Dialect.new,
|
52
|
+
'hash' => md5hash(File.join(package.base_path, "#{name}.csv"))
|
53
|
+
}
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/mode/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mode
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -181,6 +181,7 @@ files:
|
|
181
181
|
- lib/mode/commands/package.rb
|
182
182
|
- lib/mode/commands/setup.rb
|
183
183
|
- lib/mode/config.rb
|
184
|
+
- lib/mode/package_builder.rb
|
184
185
|
- lib/mode/version.rb
|
185
186
|
- mode.gemspec
|
186
187
|
- spec/commands/analyze_schema_spec.rb
|