mode 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: efdff2f30299e26fcb53225a2ecc9d067b276d00
4
- data.tar.gz: c7d5c3e22b7cde0a38f51cf2e01d65d60f199542
3
+ metadata.gz: 6bfab7e5e64907e822459c3d0750038e99ea0ce7
4
+ data.tar.gz: 6dfeb3b3138921cf3b95439d3b5d7775cee76025
5
5
  SHA512:
6
- metadata.gz: ba7761578619bfb884531acfc5d836b26b493db2135b496b98f8b52294b84a495251ad27cd9616a489d2618a2f59791defe381987199fa245b67697a5df77e90
7
- data.tar.gz: 49f72b8cdc4fdc5e48299ac4806e7b4eb2e1cae25cedfb29063558001874c9395affa99048e040f65933520924f362dac6f5db374aaed1da6100b0a37b988020
6
+ metadata.gz: e69bfae6c5ba1335c3755c376871e2406a1e1615427d5578913fd41799c9808d606034727691bf87f71b6affe0c82a137f6e665e52e32c640256c12e4c6b7c18
7
+ data.tar.gz: 38a4242c9208533c02730ce09382810104b00769c9eec976f137de5a1accb5e7f2f53d55a1f692c154b9ad52be591efa81c027b6638bc0f2cd2302081023cbb7
data/.gitignore CHANGED
@@ -1,5 +1,6 @@
1
1
  *.gem
2
2
  *.rbc
3
+ *.ruby-version
3
4
  .bundle
4
5
  .config
5
6
  .yardoc
data/README.md CHANGED
@@ -6,23 +6,77 @@ This package provides command line tools for managing datasets and connecting da
6
6
  * Personal data warehouse connectivity
7
7
  * Dataset formatting and importing (CSV)
8
8
 
9
- ## Installation
9
+ ## Prerequisites
10
10
 
11
- ### Standalone
11
+ This package requires at least Ruby 1.9 and Ruby 2.0 is recommended.
12
12
 
13
- From your terminal run:
13
+ If you don't have an up to date version of Ruby or you're not sure then use the instructions below to get going.
14
14
 
15
- $ gem install mode
15
+ ### Mac OSX
16
16
 
17
- ### Bundler
17
+ To install Ruby on OSX complete the 4 steps below.
18
18
 
19
- Add this line to your application's Gemfile:
19
+ 1\. Install Homebrew
20
20
 
21
- gem 'mode'
21
+ ```
22
+ ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
23
+ ```
24
+
25
+ 2\. Install rbenv
26
+
27
+ ```
28
+ brew update
29
+ brew install rbenv ruby-build
30
+ echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
31
+ source ~/.bash_profile
32
+ ```
33
+
34
+ 3\. Install Ruby 2.0
35
+
36
+ Note: This usually takes several minutes
37
+
38
+ ```
39
+ rbenv install 2.0.0-p353
40
+ rbenv global 2.0.0-p353
41
+ rbenv rehash
42
+ ```
43
+
44
+ 4\. Install the mode gem
45
+
46
+ ```
47
+ gem install mode
48
+ rbenv rehash
49
+ ```
50
+
51
+ ### OSX Combined
52
+
53
+ For convenience you can just copy and paste all the lines at once into your terminal
54
+
55
+ ```
56
+ ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"
57
+ brew update
58
+ brew install rbenv ruby-build
59
+ echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
60
+ source ~/.bash_profile
61
+ rbenv install 2.0.0-p353
62
+ rbenv global 2.0.0-p353
63
+ gem install mode
64
+ rbenv rehash
65
+ ```
22
66
 
23
- And then execute:
67
+ ### Windows
24
68
 
25
- $ bundle
69
+ To install a current version of Ruby on windows complete the 2 steps below.
70
+
71
+ 1\. Install Ruby 2.0
72
+
73
+ You can download the latest ruby version from [RubyInstaller](http://rubyinstaller.org/downloads/).
74
+
75
+ 2\. Install the mode gem
76
+
77
+ ```
78
+ $ gem install mode
79
+ ```
26
80
 
27
81
  ## Setup
28
82
 
@@ -1,13 +1,19 @@
1
1
  require 'thor'
2
2
  require 'data_kit'
3
+ require 'data_package'
3
4
 
4
- # Base
5
- require "mode/version"
5
+ require 'mode/version'
6
+
7
+ # Config
6
8
  require 'mode/config'
7
9
 
8
10
  # Commands
9
11
  require 'mode/commands/helpers'
10
12
  require 'mode/commands/setup'
11
13
  require 'mode/commands/import'
14
+ require 'mode/commands/package'
12
15
  require 'mode/commands/analyze_field'
13
- require 'mode/commands/analyze_schema'
16
+ require 'mode/commands/analyze_schema'
17
+
18
+ # Utilities
19
+ require 'mode/package_builder'
@@ -3,4 +3,5 @@ require 'mode/cli/helpers'
3
3
  require 'mode/cli/base'
4
4
  require 'mode/cli/setup'
5
5
  require 'mode/cli/import'
6
- require 'mode/cli/analyze'
6
+ require 'mode/cli/analyze'
7
+ require 'mode/cli/package'
@@ -2,9 +2,9 @@ module Mode
2
2
  module CLI
3
3
  class Base < Thor
4
4
  desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyzes a dataset or a field "
5
- option :sample, :banner => 'RATE (Example: 0.5)'
6
- option :field, :banner => 'POSITION (Beginning at 0, Example: 10)'
7
- option :match_type, :banner => 'TYPE (One of: string, number, integer, datetime, boolean)'
5
+ option :sample, :banner => 'RATE', :desc => "Proportion of rows to inspect. Example: 0.5"
6
+ option :field, :banner => 'POSITION', :desc => "Field positions begin at 0. Example: 10"
7
+ option :match_type, :banner => 'TYPE', :desc => "One of the following: string, number, integer, datetime, boolean"
8
8
  # option :keys, :banner => 'POSITIONS (ex: 0,2)', :default => String.new
9
9
  def analyze(path = nil)
10
10
  #keys = options[:keys].split(',').collect(&:strip).collect(&:to_i)
@@ -1,12 +1,15 @@
1
1
  module Mode
2
2
  module CLI
3
3
  class Base < Thor
4
- desc "package <csv path> <package path>", "Creates a new data package from a csv file"
5
- option :sample, :banner => '<sampling rate> (ex: 0.5)'
6
- option :keys, :banner => '<positions> (ex: 0,2)', :default => String.new
7
- def package(csv, path)
8
- keys = options[:keys].split(',').collect(&:strip).collect(&:to_i)
9
- Mode::Commands::Package.new(csv, path, options.merge(keys: keys)).execute
4
+ desc "package SOURCE PACKAGEPATH [--name=NAME] [--resource-name=NAME]", "Creates a new data package from a csv file"
5
+ option :name, :desc => 'The name of the package'
6
+ option :resource_name, :default => 'data', :desc => 'The name of the resource in package'
7
+ def package(source, dest)
8
+ parts = dest.split(File::Separator)
9
+
10
+ name = options[:name] || parts.last
11
+ package_path = parts.length == 1 ? parts.first : parts[0, parts.length - 1]
12
+ Mode::Commands::Package.new(source, package_path, name, options[:resource_name]).execute
10
13
  end
11
14
  end
12
15
  end
@@ -22,10 +22,10 @@ module Mode
22
22
 
23
23
  csv = DataKit::CSV::Parser.new(path)
24
24
 
25
- say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sample_rate)}%)..."
25
+ say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
26
26
 
27
27
  analysis, total_time = timer_block do
28
- DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sample_rate)
28
+ DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
29
29
  end
30
30
 
31
31
  puts analysis.use_type_hints
@@ -67,7 +67,7 @@ module Mode
67
67
  elsif analysis.has_only_numeric_types?(field_name)
68
68
  row << field_type
69
69
  else
70
- row << '** ' + field_type
70
+ row << '** ' + field_type.to_s
71
71
  end
72
72
 
73
73
  DataKit::Dataset::Field::Types.each do |type|
@@ -81,9 +81,9 @@ module Mode
81
81
  say table
82
82
  end
83
83
 
84
- def sample_rate
84
+ def sampling_rate
85
85
  file_size = File.size(path)
86
- options[:sample].to_f || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)
86
+ (options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
87
87
  end
88
88
  end
89
89
  end
@@ -4,15 +4,15 @@ module Mode
4
4
  module Commands
5
5
  class Package < Thor
6
6
  attr_accessor :data
7
- attr_accessor :path
8
- attr_accessor :name
9
- attr_accessor :options
7
+ attr_accessor :base_path
8
+ attr_accessor :package_name
9
+ attr_accessor :resource_name
10
10
 
11
- def initialize(data, name, options = {})
11
+ def initialize(data, base_path, package_name, resource_name)
12
12
  @data = data
13
- @path = name
14
- @name = name
15
- @options = options
13
+ @base_path = base_path
14
+ @package_name = package_name
15
+ @resource_name = resource_name
16
16
  end
17
17
 
18
18
  no_commands do
@@ -22,20 +22,20 @@ module Mode
22
22
  return
23
23
  end
24
24
 
25
- unless valid_name?(name)
25
+ unless valid_name?(package_name)
26
26
  error "Error: Data package names can only contain letters, numbers, hyphens and underscores"
27
27
  return
28
28
  end
29
29
 
30
- if Mode::Package::Base.exist?(name)
31
- error "Error: A data package already exists at #{name}"
30
+ if DataPackage::Package.exist?(base_path)
31
+ error "Error: A data package already exists at #{base_path}"
32
32
  return
33
33
  else
34
- FileUtils.mkdir_p(name)
34
+ FileUtils.mkdir_p(File.join(base_path, package_name))
35
35
  end
36
36
 
37
- csv = Mode::CSV::Parser.new(data)
38
- package = Mode::Package::Builder.new(csv, path, name, sample_rate).execute
37
+ csv = DataKit::CSV::Parser.new(data)
38
+ package = Mode::PackageBuilder.new(csv, base_path, package_name, resource_name).execute
39
39
 
40
40
  say "Finished packaging #{name}!"
41
41
  say "Use `mode table create <account>/<tablename> #{name}` to create a table in the public data warehouse"
@@ -51,10 +51,6 @@ module Mode
51
51
  def valid_data?(data)
52
52
  !data.nil? && File.exist?(data)
53
53
  end
54
-
55
- def sample_rate
56
- Mode::CSV::Analyzer.sample_rate(File.size(data))
57
- end
58
54
  end
59
55
  end
60
56
  end
@@ -0,0 +1,57 @@
1
+ module Mode
2
+ class PackageBuilder
3
+ attr_accessor :data
4
+ attr_accessor :base_path
5
+ attr_accessor :package_name
6
+ attr_accessor :resource_name
7
+
8
+ def initialize(data, base_path, package_name, resource_name)
9
+ @data = data
10
+ @base_path = base_path
11
+ @package_name = package_name
12
+ @resource_name = resource_name
13
+ end
14
+
15
+ def execute
16
+ package = init_package
17
+ analysis = build_analysis(data)
18
+ converter = convert_data(data, analysis, package)
19
+ package.resources << build_resource(analysis, package, resource_name, "#{resource_name}.csv")
20
+ package.dump && package
21
+ end
22
+
23
+ private
24
+
25
+ def valid_name?(name)
26
+ name =~ /\A[\w\d\-\_]+\z/
27
+ end
28
+
29
+ def md5hash(path)
30
+ Digest::MD5.hexdigest(File.read(data_path))
31
+ end
32
+
33
+ def init_package
34
+ DataPackage::Package.init(base_path, package_name)
35
+ end
36
+
37
+ def build_analysis(data)
38
+ DataKit::CSV::Analyzer.analyze(data, :sampling_rate => 1)
39
+ end
40
+
41
+ def convert_data(data, analysis, package)
42
+ # say "Converting data into standard format..."
43
+ dest_path = File.join(package.base_path, "#{resource_name}.csv")
44
+ converter = DataKit::CSV::Converter.convert(data, analysis, dest_path)
45
+ end
46
+
47
+ def build_resource(analysis, package, name)
48
+ Mode::Package::Resource.build(
49
+ analysis.schema.to_package, path, {
50
+ 'name' => name, 'format' => 'csv',
51
+ 'dialect' => Mode::Package::Dialect.new,
52
+ 'hash' => md5hash(File.join(package.base_path, "#{name}.csv"))
53
+ }
54
+ )
55
+ end
56
+ end
57
+ end
@@ -1,3 +1,3 @@
1
1
  module Mode
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mode
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mode Analytics
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-17 00:00:00.000000000 Z
11
+ date: 2013-12-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -181,6 +181,7 @@ files:
181
181
  - lib/mode/commands/package.rb
182
182
  - lib/mode/commands/setup.rb
183
183
  - lib/mode/config.rb
184
+ - lib/mode/package_builder.rb
184
185
  - lib/mode/version.rb
185
186
  - mode.gemspec
186
187
  - spec/commands/analyze_schema_spec.rb