mode 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +63 -9
- data/lib/mode.rb +9 -3
- data/lib/mode/cli.rb +2 -1
- data/lib/mode/cli/analyze.rb +3 -3
- data/lib/mode/cli/package.rb +9 -6
- data/lib/mode/commands/analyze_schema.rb +5 -5
- data/lib/mode/commands/package.rb +13 -17
- data/lib/mode/package_builder.rb +57 -0
- data/lib/mode/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6bfab7e5e64907e822459c3d0750038e99ea0ce7
|
4
|
+
data.tar.gz: 6dfeb3b3138921cf3b95439d3b5d7775cee76025
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e69bfae6c5ba1335c3755c376871e2406a1e1615427d5578913fd41799c9808d606034727691bf87f71b6affe0c82a137f6e665e52e32c640256c12e4c6b7c18
|
7
|
+
data.tar.gz: 38a4242c9208533c02730ce09382810104b00769c9eec976f137de5a1accb5e7f2f53d55a1f692c154b9ad52be591efa81c027b6638bc0f2cd2302081023cbb7
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -6,23 +6,77 @@ This package provides command line tools for managing datasets and connecting da
|
|
6
6
|
* Personal data warehouse connectivity
|
7
7
|
* Dataset formatting and importing (CSV)
|
8
8
|
|
9
|
-
##
|
9
|
+
## Prerequisites
|
10
10
|
|
11
|
-
|
11
|
+
This package requires at least Ruby 1.9 and Ruby 2.0 is recommended.
|
12
12
|
|
13
|
-
|
13
|
+
If you don't have an up to date version of Ruby or you're not sure then use the instructions below to get going.
|
14
14
|
|
15
|
-
|
15
|
+
### Mac OSX
|
16
16
|
|
17
|
-
|
17
|
+
To install Ruby on OSX complete the 4 steps below.
|
18
18
|
|
19
|
-
|
19
|
+
1\. Install Homebrew
|
20
20
|
|
21
|
-
|
21
|
+
```
|
22
|
+
ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
|
23
|
+
```
|
24
|
+
|
25
|
+
2\. Install rbenv
|
26
|
+
|
27
|
+
```
|
28
|
+
brew update
|
29
|
+
brew install rbenv ruby-build
|
30
|
+
echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
|
31
|
+
source ~/.bash_profile
|
32
|
+
```
|
33
|
+
|
34
|
+
3\. Install Ruby 2.0
|
35
|
+
|
36
|
+
Note: This usually takes several minutes
|
37
|
+
|
38
|
+
```
|
39
|
+
rbenv install 2.0.0-p353
|
40
|
+
rbenv global 2.0.0-p353
|
41
|
+
rbenv rehash
|
42
|
+
```
|
43
|
+
|
44
|
+
4\. Install the mode gem
|
45
|
+
|
46
|
+
```
|
47
|
+
gem install mode
|
48
|
+
rbenv rehash
|
49
|
+
```
|
50
|
+
|
51
|
+
### OSX Combined
|
52
|
+
|
53
|
+
For convenience you can just copy and paste all the lines at once into your terminal
|
54
|
+
|
55
|
+
```
|
56
|
+
ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
|
57
|
+
brew update
|
58
|
+
brew install rbenv ruby-build
|
59
|
+
echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
|
60
|
+
source ~/.bash_profile
|
61
|
+
rbenv install 2.0.0-p353
|
62
|
+
rbenv global 2.0.0-p353
|
63
|
+
gem install mode
|
64
|
+
rbenv rehash
|
65
|
+
```
|
22
66
|
|
23
|
-
|
67
|
+
### Windows
|
24
68
|
|
25
|
-
|
69
|
+
To install a current version of Ruby on windows complete the 2 steps below.
|
70
|
+
|
71
|
+
1\. Install Ruby 2.0
|
72
|
+
|
73
|
+
You can download the latest ruby version from [RubyInstaller](http://rubyinstaller.org/downloads/).
|
74
|
+
|
75
|
+
2\. Install the mode gem
|
76
|
+
|
77
|
+
```
|
78
|
+
$ gem install mode
|
79
|
+
```
|
26
80
|
|
27
81
|
## Setup
|
28
82
|
|
data/lib/mode.rb
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
require 'thor'
|
2
2
|
require 'data_kit'
|
3
|
+
require 'data_package'
|
3
4
|
|
4
|
-
|
5
|
-
|
5
|
+
require 'mode/version'
|
6
|
+
|
7
|
+
# Config
|
6
8
|
require 'mode/config'
|
7
9
|
|
8
10
|
# Commands
|
9
11
|
require 'mode/commands/helpers'
|
10
12
|
require 'mode/commands/setup'
|
11
13
|
require 'mode/commands/import'
|
14
|
+
require 'mode/commands/package'
|
12
15
|
require 'mode/commands/analyze_field'
|
13
|
-
require 'mode/commands/analyze_schema'
|
16
|
+
require 'mode/commands/analyze_schema'
|
17
|
+
|
18
|
+
# Utilities
|
19
|
+
require 'mode/package_builder'
|
data/lib/mode/cli.rb
CHANGED
data/lib/mode/cli/analyze.rb
CHANGED
@@ -2,9 +2,9 @@ module Mode
|
|
2
2
|
module CLI
|
3
3
|
class Base < Thor
|
4
4
|
desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyzes a dataset or a field "
|
5
|
-
option :sample, :banner => 'RATE
|
6
|
-
option :field, :banner => 'POSITION
|
7
|
-
option :match_type, :banner => 'TYPE
|
5
|
+
option :sample, :banner => 'RATE', :desc => "Proportion of rows to inspect. Example: 0.5"
|
6
|
+
option :field, :banner => 'POSITION', :desc => "Field positions begin at 0. Example: 10"
|
7
|
+
option :match_type, :banner => 'TYPE', :desc => "One of the following: string, number, integer, datetime, boolean"
|
8
8
|
# option :keys, :banner => 'POSITIONS (ex: 0,2)', :default => String.new
|
9
9
|
def analyze(path = nil)
|
10
10
|
#keys = options[:keys].split(',').collect(&:strip).collect(&:to_i)
|
data/lib/mode/cli/package.rb
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
module Mode
|
2
2
|
module CLI
|
3
3
|
class Base < Thor
|
4
|
-
desc "package
|
5
|
-
option :
|
6
|
-
option :
|
7
|
-
def package(
|
8
|
-
|
9
|
-
|
4
|
+
desc "package SOURCE PACKAGEPATH [--name=NAME] [--resource-name=NAME]", "Creates a new data package from a csv file"
|
5
|
+
option :name, :desc => 'The name of the package'
|
6
|
+
option :resource_name, :default => 'data', :desc => 'The name of the resource in package'
|
7
|
+
def package(source, dest)
|
8
|
+
parts = dest.split(File::Separator)
|
9
|
+
|
10
|
+
name = options[:name] || parts.last
|
11
|
+
package_path = parts.length == 1 ? parts.first : parts[0, parts.length - 1]
|
12
|
+
Mode::Commands::Package.new(source, package_path, name, options[:resource_name]).execute
|
10
13
|
end
|
11
14
|
end
|
12
15
|
end
|
@@ -22,10 +22,10 @@ module Mode
|
|
22
22
|
|
23
23
|
csv = DataKit::CSV::Parser.new(path)
|
24
24
|
|
25
|
-
say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 *
|
25
|
+
say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
|
26
26
|
|
27
27
|
analysis, total_time = timer_block do
|
28
|
-
DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate =>
|
28
|
+
DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
|
29
29
|
end
|
30
30
|
|
31
31
|
puts analysis.use_type_hints
|
@@ -67,7 +67,7 @@ module Mode
|
|
67
67
|
elsif analysis.has_only_numeric_types?(field_name)
|
68
68
|
row << field_type
|
69
69
|
else
|
70
|
-
row << '** ' + field_type
|
70
|
+
row << '** ' + field_type.to_s
|
71
71
|
end
|
72
72
|
|
73
73
|
DataKit::Dataset::Field::Types.each do |type|
|
@@ -81,9 +81,9 @@ module Mode
|
|
81
81
|
say table
|
82
82
|
end
|
83
83
|
|
84
|
-
def
|
84
|
+
def sampling_rate
|
85
85
|
file_size = File.size(path)
|
86
|
-
options[:sample]
|
86
|
+
(options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
|
87
87
|
end
|
88
88
|
end
|
89
89
|
end
|
@@ -4,15 +4,15 @@ module Mode
|
|
4
4
|
module Commands
|
5
5
|
class Package < Thor
|
6
6
|
attr_accessor :data
|
7
|
-
attr_accessor :
|
8
|
-
attr_accessor :
|
9
|
-
attr_accessor :
|
7
|
+
attr_accessor :base_path
|
8
|
+
attr_accessor :package_name
|
9
|
+
attr_accessor :resource_name
|
10
10
|
|
11
|
-
def initialize(data,
|
11
|
+
def initialize(data, base_path, package_name, resource_name)
|
12
12
|
@data = data
|
13
|
-
@
|
14
|
-
@
|
15
|
-
@
|
13
|
+
@base_path = base_path
|
14
|
+
@package_name = package_name
|
15
|
+
@resource_name = resource_name
|
16
16
|
end
|
17
17
|
|
18
18
|
no_commands do
|
@@ -22,20 +22,20 @@ module Mode
|
|
22
22
|
return
|
23
23
|
end
|
24
24
|
|
25
|
-
unless valid_name?(
|
25
|
+
unless valid_name?(package_name)
|
26
26
|
error "Error: Data package names can only contain letters, numbers, hyphens and underscores"
|
27
27
|
return
|
28
28
|
end
|
29
29
|
|
30
|
-
if
|
31
|
-
error "Error: A data package already exists at #{
|
30
|
+
if DataPackage::Package.exist?(base_path)
|
31
|
+
error "Error: A data package already exists at #{base_path}"
|
32
32
|
return
|
33
33
|
else
|
34
|
-
FileUtils.mkdir_p(
|
34
|
+
FileUtils.mkdir_p(File.join(base_path, package_name))
|
35
35
|
end
|
36
36
|
|
37
|
-
csv =
|
38
|
-
package = Mode::
|
37
|
+
csv = DataKit::CSV::Parser.new(data)
|
38
|
+
package = Mode::PackageBuilder.new(csv, base_path, package_name, resource_name).execute
|
39
39
|
|
40
40
|
say "Finished packaging #{name}!"
|
41
41
|
say "Use `mode table create <account>/<tablename> #{name}` to create a table in the public data warehouse"
|
@@ -51,10 +51,6 @@ module Mode
|
|
51
51
|
def valid_data?(data)
|
52
52
|
!data.nil? && File.exist?(data)
|
53
53
|
end
|
54
|
-
|
55
|
-
def sample_rate
|
56
|
-
Mode::CSV::Analyzer.sample_rate(File.size(data))
|
57
|
-
end
|
58
54
|
end
|
59
55
|
end
|
60
56
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Mode
|
2
|
+
class PackageBuilder
|
3
|
+
attr_accessor :data
|
4
|
+
attr_accessor :base_path
|
5
|
+
attr_accessor :package_name
|
6
|
+
attr_accessor :resource_name
|
7
|
+
|
8
|
+
def initialize(data, base_path, package_name, resource_name)
|
9
|
+
@data = data
|
10
|
+
@base_path = base_path
|
11
|
+
@package_name = package_name
|
12
|
+
@resource_name = resource_name
|
13
|
+
end
|
14
|
+
|
15
|
+
def execute
|
16
|
+
package = init_package
|
17
|
+
analysis = build_analysis(data)
|
18
|
+
converter = convert_data(data, analysis, package)
|
19
|
+
package.resources << build_resource(analysis, package, resource_name, "#{resource_name}.csv")
|
20
|
+
package.dump && package
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def valid_name?(name)
|
26
|
+
name =~ /\A[\w\d\-\_]+\z/
|
27
|
+
end
|
28
|
+
|
29
|
+
def md5hash(path)
|
30
|
+
Digest::MD5.hexdigest(File.read(data_path))
|
31
|
+
end
|
32
|
+
|
33
|
+
def init_package
|
34
|
+
DataPackage::Package.init(base_path, package_name)
|
35
|
+
end
|
36
|
+
|
37
|
+
def build_analysis(data)
|
38
|
+
DataKit::CSV::Analyzer.analyze(data, :sampling_rate => 1)
|
39
|
+
end
|
40
|
+
|
41
|
+
def convert_data(data, analysis, package)
|
42
|
+
# say "Converting data into standard format..."
|
43
|
+
dest_path = File.join(package.base_path, "#{resource_name}.csv")
|
44
|
+
converter = DataKit::CSV::Converter.convert(data, analysis, dest_path)
|
45
|
+
end
|
46
|
+
|
47
|
+
def build_resource(analysis, package, name)
|
48
|
+
Mode::Package::Resource.build(
|
49
|
+
analysis.schema.to_package, path, {
|
50
|
+
'name' => name, 'format' => 'csv',
|
51
|
+
'dialect' => Mode::Package::Dialect.new,
|
52
|
+
'hash' => md5hash(File.join(package.base_path, "#{name}.csv"))
|
53
|
+
}
|
54
|
+
)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/mode/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mode
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mode Analytics
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -181,6 +181,7 @@ files:
|
|
181
181
|
- lib/mode/commands/package.rb
|
182
182
|
- lib/mode/commands/setup.rb
|
183
183
|
- lib/mode/config.rb
|
184
|
+
- lib/mode/package_builder.rb
|
184
185
|
- lib/mode/version.rb
|
185
186
|
- mode.gemspec
|
186
187
|
- spec/commands/analyze_schema_spec.rb
|