mode 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: efdff2f30299e26fcb53225a2ecc9d067b276d00
4
- data.tar.gz: c7d5c3e22b7cde0a38f51cf2e01d65d60f199542
3
+ metadata.gz: 6bfab7e5e64907e822459c3d0750038e99ea0ce7
4
+ data.tar.gz: 6dfeb3b3138921cf3b95439d3b5d7775cee76025
5
5
  SHA512:
6
- metadata.gz: ba7761578619bfb884531acfc5d836b26b493db2135b496b98f8b52294b84a495251ad27cd9616a489d2618a2f59791defe381987199fa245b67697a5df77e90
7
- data.tar.gz: 49f72b8cdc4fdc5e48299ac4806e7b4eb2e1cae25cedfb29063558001874c9395affa99048e040f65933520924f362dac6f5db374aaed1da6100b0a37b988020
6
+ metadata.gz: e69bfae6c5ba1335c3755c376871e2406a1e1615427d5578913fd41799c9808d606034727691bf87f71b6affe0c82a137f6e665e52e32c640256c12e4c6b7c18
7
+ data.tar.gz: 38a4242c9208533c02730ce09382810104b00769c9eec976f137de5a1accb5e7f2f53d55a1f692c154b9ad52be591efa81c027b6638bc0f2cd2302081023cbb7
data/.gitignore CHANGED
@@ -1,5 +1,6 @@
1
1
  *.gem
2
2
  *.rbc
3
+ *.ruby-version
3
4
  .bundle
4
5
  .config
5
6
  .yardoc
data/README.md CHANGED
@@ -6,23 +6,77 @@ This package provides command line tools for managing datasets and connecting da
6
6
  * Personal data warehouse connectivity
7
7
  * Dataset formatting and importing (CSV)
8
8
 
9
- ## Installation
9
+ ## Prerequisites
10
10
 
11
- ### Standalone
11
+ This package requires at least Ruby 1.9 and Ruby 2.0 is recommended.
12
12
 
13
- From your terminal run:
13
+ If you don't have an up to date version of Ruby or you're not sure then use the instructions below to get going.
14
14
 
15
- $ gem install mode
15
+ ### Mac OSX
16
16
 
17
- ### Bundler
17
+ To install Ruby on OSX complete the 4 steps below.
18
18
 
19
- Add this line to your application's Gemfile:
19
+ 1\. Install Homebrew
20
20
 
21
- gem 'mode'
21
+ ```
22
+ ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
23
+ ```
24
+
25
+ 2\. Install rbenv
26
+
27
+ ```
28
+ brew update
29
+ brew install rbenv ruby-build
30
+ echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
31
+ source ~/.bash_profile
32
+ ```
33
+
34
+ 3\. Install Ruby 2.0
35
+
36
+ Note: This usually takes several minutes
37
+
38
+ ```
39
+ rbenv install 2.0.0-p353
40
+ rbenv global 2.0.0-p353
41
+ rbenv rehash
42
+ ```
43
+
44
+ 4\. Install the mode gem
45
+
46
+ ```
47
+ gem install mode
48
+ rbenv rehash
49
+ ```
50
+
51
+ ### OSX Combined
52
+
53
+ For convenience you can just copy and paste all the lines at once into your terminal
54
+
55
+ ```
56
+ ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
57
+ brew update
58
+ brew install rbenv ruby-build
59
+ echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
60
+ source ~/.bash_profile
61
+ rbenv install 2.0.0-p353
62
+ rbenv global 2.0.0-p353
63
+ gem install mode
64
+ rbenv rehash
65
+ ```
22
66
 
23
- And then execute:
67
+ ### Windows
24
68
 
25
- $ bundle
69
+ To install a current version of Ruby on windows complete the 2 steps below.
70
+
71
+ 1\. Install Ruby 2.0
72
+
73
+ You can download the latest ruby version from [RubyInstaller](http://rubyinstaller.org/downloads/).
74
+
75
+ 2\. Install the mode gem
76
+
77
+ ```
78
+ $ gem install mode
79
+ ```
26
80
 
27
81
  ## Setup
28
82
 
@@ -1,13 +1,19 @@
1
1
  require 'thor'
2
2
  require 'data_kit'
3
+ require 'data_package'
3
4
 
4
- # Base
5
- require "mode/version"
5
+ require 'mode/version'
6
+
7
+ # Config
6
8
  require 'mode/config'
7
9
 
8
10
  # Commands
9
11
  require 'mode/commands/helpers'
10
12
  require 'mode/commands/setup'
11
13
  require 'mode/commands/import'
14
+ require 'mode/commands/package'
12
15
  require 'mode/commands/analyze_field'
13
- require 'mode/commands/analyze_schema'
16
+ require 'mode/commands/analyze_schema'
17
+
18
+ # Utilities
19
+ require 'mode/package_builder'
@@ -3,4 +3,5 @@ require 'mode/cli/helpers'
3
3
  require 'mode/cli/base'
4
4
  require 'mode/cli/setup'
5
5
  require 'mode/cli/import'
6
- require 'mode/cli/analyze'
6
+ require 'mode/cli/analyze'
7
+ require 'mode/cli/package'
@@ -2,9 +2,9 @@ module Mode
2
2
  module CLI
3
3
  class Base < Thor
4
4
  desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyzes a dataset or a field "
5
- option :sample, :banner => 'RATE (Example: 0.5)'
6
- option :field, :banner => 'POSITION (Beginning at 0, Example: 10)'
7
- option :match_type, :banner => 'TYPE (One of: string, number, integer, datetime, boolean)'
5
+ option :sample, :banner => 'RATE', :desc => "Proportion of rows to inspect. Example: 0.5"
6
+ option :field, :banner => 'POSITION', :desc => "Field positions begin at 0. Example: 10"
7
+ option :match_type, :banner => 'TYPE', :desc => "One of the following: string, number, integer, datetime, boolean"
8
8
  # option :keys, :banner => 'POSITIONS (ex: 0,2)', :default => String.new
9
9
  def analyze(path = nil)
10
10
  #keys = options[:keys].split(',').collect(&:strip).collect(&:to_i)
@@ -1,12 +1,15 @@
1
1
  module Mode
2
2
  module CLI
3
3
  class Base < Thor
4
- desc "package <csv path> <package path>", "Creates a new data package from a csv file"
5
- option :sample, :banner => '<sampling rate> (ex: 0.5)'
6
- option :keys, :banner => '<positions> (ex: 0,2)', :default => String.new
7
- def package(csv, path)
8
- keys = options[:keys].split(',').collect(&:strip).collect(&:to_i)
9
- Mode::Commands::Package.new(csv, path, options.merge(keys: keys)).execute
4
+ desc "package SOURCE PACKAGEPATH [--name=NAME] [--resource-name=NAME]", "Creates a new data package from a csv file"
5
+ option :name, :desc => 'The name of the package'
6
+ option :resource_name, :default => 'data', :desc => 'The name of the resource in package'
7
+ def package(source, dest)
8
+ parts = dest.split(File::Separator)
9
+
10
+ name = options[:name] || parts.last
11
+ package_path = parts.length == 1 ? parts.first : parts[0, parts.length - 1]
12
+ Mode::Commands::Package.new(source, package_path, name, options[:resource_name]).execute
10
13
  end
11
14
  end
12
15
  end
@@ -22,10 +22,10 @@ module Mode
22
22
 
23
23
  csv = DataKit::CSV::Parser.new(path)
24
24
 
25
- say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sample_rate)}%)..."
25
+ say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
26
26
 
27
27
  analysis, total_time = timer_block do
28
- DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sample_rate)
28
+ DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
29
29
  end
30
30
 
31
31
  puts analysis.use_type_hints
@@ -67,7 +67,7 @@ module Mode
67
67
  elsif analysis.has_only_numeric_types?(field_name)
68
68
  row << field_type
69
69
  else
70
- row << '** ' + field_type
70
+ row << '** ' + field_type.to_s
71
71
  end
72
72
 
73
73
  DataKit::Dataset::Field::Types.each do |type|
@@ -81,9 +81,9 @@ module Mode
81
81
  say table
82
82
  end
83
83
 
84
- def sample_rate
84
+ def sampling_rate
85
85
  file_size = File.size(path)
86
- options[:sample].to_f || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)
86
+ (options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
87
87
  end
88
88
  end
89
89
  end
@@ -4,15 +4,15 @@ module Mode
4
4
  module Commands
5
5
  class Package < Thor
6
6
  attr_accessor :data
7
- attr_accessor :path
8
- attr_accessor :name
9
- attr_accessor :options
7
+ attr_accessor :base_path
8
+ attr_accessor :package_name
9
+ attr_accessor :resource_name
10
10
 
11
- def initialize(data, name, options = {})
11
+ def initialize(data, base_path, package_name, resource_name)
12
12
  @data = data
13
- @path = name
14
- @name = name
15
- @options = options
13
+ @base_path = base_path
14
+ @package_name = package_name
15
+ @resource_name = resource_name
16
16
  end
17
17
 
18
18
  no_commands do
@@ -22,20 +22,20 @@ module Mode
22
22
  return
23
23
  end
24
24
 
25
- unless valid_name?(name)
25
+ unless valid_name?(package_name)
26
26
  error "Error: Data package names can only contain letters, numbers, hyphens and underscores"
27
27
  return
28
28
  end
29
29
 
30
- if Mode::Package::Base.exist?(name)
31
- error "Error: A data package already exists at #{name}"
30
+ if DataPackage::Package.exist?(base_path)
31
+ error "Error: A data package already exists at #{base_path}"
32
32
  return
33
33
  else
34
- FileUtils.mkdir_p(name)
34
+ FileUtils.mkdir_p(File.join(base_path, package_name))
35
35
  end
36
36
 
37
- csv = Mode::CSV::Parser.new(data)
38
- package = Mode::Package::Builder.new(csv, path, name, sample_rate).execute
37
+ csv = DataKit::CSV::Parser.new(data)
38
+ package = Mode::PackageBuilder.new(csv, base_path, package_name, resource_name).execute
39
39
 
40
40
  say "Finished packaging #{name}!"
41
41
  say "Use `mode table create <account>/<tablename> #{name}` to create a table in the public data warehouse"
@@ -51,10 +51,6 @@ module Mode
51
51
  def valid_data?(data)
52
52
  !data.nil? && File.exist?(data)
53
53
  end
54
-
55
- def sample_rate
56
- Mode::CSV::Analyzer.sample_rate(File.size(data))
57
- end
58
54
  end
59
55
  end
60
56
  end
@@ -0,0 +1,57 @@
1
+ module Mode
2
+ class PackageBuilder
3
+ attr_accessor :data
4
+ attr_accessor :base_path
5
+ attr_accessor :package_name
6
+ attr_accessor :resource_name
7
+
8
+ def initialize(data, base_path, package_name, resource_name)
9
+ @data = data
10
+ @base_path = base_path
11
+ @package_name = package_name
12
+ @resource_name = resource_name
13
+ end
14
+
15
+ def execute
16
+ package = init_package
17
+ analysis = build_analysis(data)
18
+ converter = convert_data(data, analysis, package)
19
+ package.resources << build_resource(analysis, package, resource_name, "#{resource_name}.csv")
20
+ package.dump && package
21
+ end
22
+
23
+ private
24
+
25
+ def valid_name?(name)
26
+ name =~ /\A[\w\d\-\_]+\z/
27
+ end
28
+
29
+ def md5hash(path)
30
+ Digest::MD5.hexdigest(File.read(data_path))
31
+ end
32
+
33
+ def init_package
34
+ DataPackage::Package.init(base_path, package_name)
35
+ end
36
+
37
+ def build_analysis(data)
38
+ DataKit::CSV::Analyzer.analyze(data, :sampling_rate => 1)
39
+ end
40
+
41
+ def convert_data(data, analysis, package)
42
+ # say "Converting data into standard format..."
43
+ dest_path = File.join(package.base_path, "#{resource_name}.csv")
44
+ converter = DataKit::CSV::Converter.convert(data, analysis, dest_path)
45
+ end
46
+
47
+ def build_resource(analysis, package, name)
48
+ Mode::Package::Resource.build(
49
+ analysis.schema.to_package, path, {
50
+ 'name' => name, 'format' => 'csv',
51
+ 'dialect' => Mode::Package::Dialect.new,
52
+ 'hash' => md5hash(File.join(package.base_path, "#{name}.csv"))
53
+ }
54
+ )
55
+ end
56
+ end
57
+ end
@@ -1,3 +1,3 @@
1
1
  module Mode
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mode
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mode Analytics
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-17 00:00:00.000000000 Z
11
+ date: 2013-12-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -181,6 +181,7 @@ files:
181
181
  - lib/mode/commands/package.rb
182
182
  - lib/mode/commands/setup.rb
183
183
  - lib/mode/config.rb
184
+ - lib/mode/package_builder.rb
184
185
  - lib/mode/version.rb
185
186
  - mode.gemspec
186
187
  - spec/commands/analyze_schema_spec.rb