mode 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ module Mode
2
+ module CLI
3
+ class Base < Thor
4
+ #
5
+ # CLI commands are split out into separate files that just reopen this class
6
+ # An alternative method for doing this would be to extend this class on include
7
+ # And then I'd have an explicit list of the commands we were including
8
+ #
9
+
10
+ private
11
+
12
+ include Mode::CLI::Helpers
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,22 @@
1
+ module Mode
2
+ module CLI
3
+ module Helpers
4
+ def valid_file?(file)
5
+ !file.nil? && File.exist?(file)
6
+ end
7
+
8
+ def valid_table?(table)
9
+ table =~ /[\w\d\_\-]+\/[\w\d\_\-]+/
10
+ end
11
+
12
+ def pkg_name(path)
13
+ path.split('/').last.split('.').first
14
+ end
15
+
16
+ def sample_rate(path)
17
+ file_size = File.size(path)
18
+ DataKit::CSV::Analyzer.sample_rate(file_size)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,62 @@
1
+ module Mode
2
+ module CLI
3
+ class Base < Thor
4
+ desc "import SOURCE ACCOUNT/TABLENAME [--update | --replace]", "Import a flat file into the Mode data warehouse"
5
+ long_desc <<-LONGDESC
6
+ The import commands allows you to create, update and
7
+ replace tables in the Mode data warehouse with data from
8
+ flat files and data packages. The default action is create
9
+ with optional flags to update or replace a table.
10
+
11
+ Data can be imported from one of three source types
12
+
13
+ 1. CSV File
14
+ \x5> $ mode import gdp_quarterly.csv besquared/gdp_quarterly
15
+
16
+ 2. JSON File (LD-JSON)
17
+ \x5> $ mode import gdp_quarterly.json besquared/gdp_quarterly
18
+
19
+ 3. Data Package
20
+ \x5> $ mode import gdp_us/quarterly besquared/gdp_quarterly
21
+
22
+
23
+ Note: If you do not specify a data package resource name we'll attempt to use the first resource in the package.
24
+ LONGDESC
25
+ option :update, :type => :boolean
26
+ option :replace, :type => :boolean
27
+ option :primary_key, :banner => 'pos1[,pos2,...] (ex: 0,2)'
28
+ def import(source, table)
29
+ unless valid_table?(table)
30
+ error "Error: Invalid account or table name given"
31
+ return
32
+ end
33
+
34
+ if File.directory?(source)
35
+ unless Mode::Package::Base.exist?(source)
36
+ error "Error: Invalid package given"
37
+ return
38
+ end
39
+
40
+ package, resource_name = *source.split('/')
41
+ package = Mode::Package::Base.open(source)
42
+ else
43
+ unless valid_file?(source)
44
+ error "Error: Invalid source file given"
45
+ return
46
+ end
47
+
48
+ dst_path = Dir.mktmpdir
49
+ src_data = Mode::CSV::Parser.new(source)
50
+ builder = Mode::Package::Builder.new(src_data, dst_path, pkg_name(source), sample_rate(source))
51
+
52
+ package = builder.execute # make the package
53
+ end
54
+
55
+ account, table_name = *table.split('/')
56
+ resource_name = package.resources.first.name
57
+
58
+ Mode::Commands::Import.new(account, table_name, package, resource_name).execute
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,13 @@
1
+ module Mode
2
+ module CLI
3
+ class Base < Thor
4
+ desc "package <csv path> <package path>", "Creates a new data package from a csv file"
5
+ option :sample, :banner => '<sampling rate> (ex: 0.5)'
6
+ option :keys, :banner => '<positions> (ex: 0,2)', :default => String.new
7
+ def package(csv, path)
8
+ keys = options[:keys].split(',').collect(&:strip).collect(&:to_i)
9
+ Mode::Commands::Package.new(csv, path, options.merge(keys: keys)).execute
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,12 @@
1
+ module Mode
2
+ module CLI
3
+ class Base < Thor
4
+ desc "setup", "Setup a new mode configuration in the given directory (defaults to home)"
5
+ option :dir, :default => '~'
6
+ option :host, :default => 'www.modeanalytics.com'
7
+ def setup
8
+ Mode::Commands::Init.new(options).execute
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,70 @@
1
+ require 'terminal-table'
2
+
3
+ module Mode
4
+ module Commands
5
+ class AnalyzeField < Thor
6
+ attr_accessor :path
7
+ attr_accessor :field_pos
8
+ attr_accessor :options
9
+
10
+ def initialize(path, field_pos, options = {})
11
+ @path = path
12
+ @field_pos = field_pos.to_i
13
+ @options = options
14
+ end
15
+
16
+ no_commands do
17
+ include Mode::Commands::Helpers
18
+
19
+ def execute
20
+ if path.nil? || !File.exist?(path)
21
+ error "Couldn't find file at #{path}"
22
+ return
23
+ end
24
+
25
+ csv = DataKit::CSV::Parser.new(path)
26
+
27
+ field_name = csv.headers[field_pos]
28
+
29
+ say "Analyzing #{field_name} at #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
30
+
31
+ analysis, total_time = timer_block do
32
+ DataKit::CSV::FieldAnalyzer.analyze(csv, field_pos, {
33
+ :match_type => match_type,
34
+ :sampling_rate => sampling_rate
35
+ })
36
+ end
37
+
38
+ say "Analyzed #{analysis.sample_count} of #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
39
+
40
+ display(analysis)
41
+ end
42
+ end
43
+
44
+ private
45
+ def display(analysis)
46
+ table = Terminal::Table.new(:headings => [
47
+ 'Row No.', 'Type', 'Value'
48
+ ])
49
+
50
+ analysis.types.each do |type, rows|
51
+ rows.each do |row_num|
52
+ table.add_row [row_num, type, analysis.value_at(row_num)]
53
+ end
54
+ end
55
+
56
+ say table
57
+ end
58
+
59
+ def match_type
60
+ options[:match_type] ? options[:match_type].to_sym : :any
61
+ end
62
+
63
+ def sampling_rate
64
+ 1
65
+ # file_size = File.size(path)
66
+ # options[:sample].to_f || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,88 @@
1
+ require 'terminal-table'
2
+
3
+ module Mode
4
+ module Commands
5
+ class AnalyzeSchema < Thor
6
+ attr_accessor :path
7
+ attr_accessor :options
8
+
9
+ def initialize(path, options = {})
10
+ @path = path
11
+ @options = options
12
+ end
13
+
14
+ no_commands do
15
+ include Mode::Commands::Helpers
16
+
17
+ def execute
18
+ if path.nil? || !File.exist?(path)
19
+ error "Couldn't find file at #{path}"
20
+ return
21
+ end
22
+
23
+ csv = DataKit::CSV::Parser.new(path)
24
+
25
+ say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sample_rate)}%)..."
26
+
27
+ analyzer, total_time = timer_block do
28
+ DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sample_rate)
29
+ end
30
+
31
+ say "Analyzed #{analyzer.sample_count} of #{analyzer.row_count} rows in #{'%.2f' % total_time} seconds\n"
32
+
33
+ display(analyzer)
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def format_percentage_cell(numerator, denominator)
40
+ cell = { :alignment => :right }
41
+
42
+ if numerator == 0
43
+ cell[:value] = nil
44
+ else
45
+ cell[:value] = '%.2f' % (100 * (numerator / denominator.to_f)) + '%'
46
+ end
47
+
48
+ cell
49
+ end
50
+
51
+ def display(analysis)
52
+ table = Terminal::Table.new(:headings => [
53
+ 'Field No.', 'Field', 'Type',
54
+ 'String (%)', 'Integer (%)', 'Number (%)',
55
+ 'Date/Time (%)', 'Boolean (%)', 'Empty (%)'
56
+ ])
57
+
58
+ analysis.fields.each_with_index do |field_name, index|
59
+ row = [index, field_name]
60
+
61
+ field_type = analysis.type?(field_name)
62
+
63
+ if analysis.has_single_type?(field_name)
64
+ row << field_type
65
+ elsif analysis.has_only_numeric_types?(field_name)
66
+ row << field_type
67
+ else
68
+ row << '** ' + field_type
69
+ end
70
+
71
+ DataKit::Dataset::Field::Types.each do |type|
72
+ type_count = analysis.type_count(field_name, type)
73
+ row << format_percentage_cell(type_count, analysis.sample_count)
74
+ end
75
+
76
+ table.add_row(row)
77
+ end
78
+
79
+ say table
80
+ end
81
+
82
+ def sample_rate
83
+ file_size = File.size(path)
84
+ options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,10 @@
1
+ module Mode
2
+ module Commands
3
+ module Helpers
4
+ def timer_block(&block)
5
+ started_at = Time.now
6
+ return yield, (Time.now.to_f - started_at.to_f)
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,37 @@
1
+ module Mode
2
+ module Commands
3
+ class Import < Thor
4
+ attr_accessor :account
5
+ attr_accessor :table_name
6
+ attr_accessor :package
7
+ attr_accessor :resource_name
8
+
9
+ def initialize(account, table_name, package, resource_name)
10
+ @account = account
11
+ @table_name = table_name
12
+ @package = package
13
+ @resource_name = resource_name
14
+ end
15
+
16
+ no_commands do
17
+ def execute
18
+
19
+ puts "Importing #{package.name}/#{resource_name} to #{account}/#{table_name}"
20
+ # We need to check the datapackage.md5
21
+ # We need to check the md5 of the source
22
+ # If either of these have changed the package needs to be reverified
23
+
24
+ # 1. Compress to temporary dir
25
+ # 2. POST /imports with name and zipfile
26
+ # 3. Poll for execution status until finished or timeout (what's the timeout?)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def valid_package?(package)
33
+ # package.valid?
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,60 @@
1
+ require 'digest/md5'
2
+
3
+ module Mode
4
+ module Commands
5
+ class Package < Thor
6
+ attr_accessor :data
7
+ attr_accessor :path
8
+ attr_accessor :name
9
+ attr_accessor :options
10
+
11
+ def initialize(data, name, options = {})
12
+ @data = data
13
+ @path = name
14
+ @name = name
15
+ @options = options
16
+ end
17
+
18
+ no_commands do
19
+ def execute
20
+ unless valid_data?(data)
21
+ error "Error: Couldn't find valid data file at #{data}"
22
+ return
23
+ end
24
+
25
+ unless valid_name?(name)
26
+ error "Error: Data package names can only contain letters, numbers, hyphens and underscores"
27
+ return
28
+ end
29
+
30
+ if Mode::Package::Base.exist?(name)
31
+ error "Error: A data package already exists at #{name}"
32
+ return
33
+ else
34
+ FileUtils.mkdir_p(name)
35
+ end
36
+
37
+ csv = Mode::CSV::Parser.new(data)
38
+ package = Mode::Package::Builder.new(csv, path, name, sample_rate).execute
39
+
40
+ say "Finished packaging #{name}!"
41
+ say "Use `mode table create <account>/<tablename> #{name}` to create a table in the public data warehouse"
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def valid_name?(name)
48
+ name =~ /\A[\w\d\-\_]+\z/
49
+ end
50
+
51
+ def valid_data?(data)
52
+ !data.nil? && File.exist?(data)
53
+ end
54
+
55
+ def sample_rate
56
+ Mode::CSV::Analyzer.sample_rate(File.size(data))
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,36 @@
1
+ module Mode
2
+ module Commands
3
+ class Setup < Thor
4
+ attr_accessor :options
5
+
6
+ def initialize(options = {})
7
+ @options = options
8
+ end
9
+
10
+ no_commands do
11
+ def execute
12
+ config_path = Mode::Config.full_path(options[:dir])
13
+
14
+ if File.exist?(config_path)
15
+ say "Configuration at #{config_path} already exists"
16
+ unless yes? "Would you like to modify the existing configuration? (y/n):"
17
+ return
18
+ end
19
+ end
20
+
21
+ say "Initializing configuration at #{config_path}"
22
+
23
+ config = Mode::Config.init(options[:dir])
24
+
25
+ config.username = ask "Mode username:"
26
+
27
+ say "Your can view your access tokens at http://#{options[:host]}/accounts/#{config.username}/access_tokens"
28
+
29
+ config.access_token = ask "Access token for #{config.username}:"
30
+
31
+ config.save
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,54 @@
1
+ require 'yaml'
2
+
3
+ module Mode
4
+ class Config
5
+ FILENAME = '.mode.yml'
6
+
7
+ attr_accessor :path
8
+
9
+ attr_accessor :username, :access_token
10
+
11
+ def initialize(path, filename = nil)
12
+ @path = self.class.full_path(path, filename)
13
+
14
+ if File.exist?(@path)
15
+ configure YAML.load_file(@path)
16
+ else
17
+ raise "Could not load configuration file from #{@path}"
18
+ end
19
+ end
20
+
21
+ def save
22
+ File.open(path, 'w+') do |file|
23
+ file.write(to_yaml)
24
+ end
25
+ end
26
+
27
+ class << self
28
+ def init(path, filename = nil)
29
+ File.open(full_path(path, filename), 'w+') do |file|
30
+ file.write({}.to_yaml)
31
+ end
32
+
33
+ new(path, filename)
34
+ end
35
+
36
+ def full_path(path, filename = nil)
37
+ File.expand_path(File.join(path, filename || Mode::Config::FILENAME))
38
+ end
39
+ end
40
+
41
+ private
42
+ def configure(config)
43
+ @username = config['username']
44
+ @access_token = config['access_token']
45
+ end
46
+
47
+ def to_yaml
48
+ {
49
+ 'username' => username,
50
+ 'access_token' => access_token
51
+ }.to_yaml
52
+ end
53
+ end
54
+ end