mode 0.0.5 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +17 -22
- data/bin/mode +1 -1
- data/lib/mode.rb +34 -6
- data/lib/mode/api/form.rb +53 -0
- data/lib/mode/api/link.rb +31 -0
- data/lib/mode/api/request.rb +181 -0
- data/lib/mode/api/resource.rb +67 -0
- data/lib/mode/auth/access_token.rb +23 -0
- data/lib/mode/cli.rb +3 -3
- data/lib/mode/cli/analyze.rb +1 -1
- data/lib/mode/cli/base.rb +5 -0
- data/lib/mode/cli/connect.rb +18 -0
- data/lib/mode/cli/helpers.rb +0 -9
- data/lib/mode/cli/import.rb +9 -38
- data/lib/mode/cli/login.rb +13 -0
- data/lib/mode/cli/package.rb +2 -5
- data/lib/mode/commands/analyze_field.rb +20 -21
- data/lib/mode/commands/analyze_schema.rb +69 -48
- data/lib/mode/commands/connect.rb +78 -0
- data/lib/mode/commands/helpers.rb +54 -0
- data/lib/mode/commands/import.rb +209 -20
- data/lib/mode/commands/login.rb +111 -0
- data/lib/mode/config.rb +13 -33
- data/lib/mode/configurable.rb +46 -0
- data/lib/mode/connector/config.rb +31 -0
- data/lib/mode/connector/daemon.rb +27 -0
- data/lib/mode/connector/data_source.rb +75 -0
- data/lib/mode/connector/dataset.rb +13 -0
- data/lib/mode/connector/message.rb +31 -0
- data/lib/mode/connector/poller.rb +27 -0
- data/lib/mode/connector/processor.rb +58 -0
- data/lib/mode/connector/registrar.rb +36 -0
- data/lib/mode/connector/scheduler.rb +62 -0
- data/lib/mode/connector/selector.rb +47 -0
- data/lib/mode/connector/type_map.rb +45 -0
- data/lib/mode/connector/uploader.rb +50 -0
- data/lib/mode/logger.rb +202 -0
- data/lib/mode/version.rb +1 -1
- data/mode.gemspec +13 -2
- data/spec/api/form_spec.rb +51 -0
- data/spec/api/link_spec.rb +23 -0
- data/spec/api/request_spec.rb +111 -0
- data/spec/api/resource_spec.rb +70 -0
- data/spec/auth/access_token_spec.rb +22 -0
- data/spec/commands/analyze_field_spec.rb +26 -0
- data/spec/commands/analyze_schema_spec.rb +7 -5
- data/spec/commands/connect_spec.rb +80 -0
- data/spec/commands/helpers_spec.rb +69 -0
- data/spec/commands/import_spec.rb +155 -0
- data/spec/commands/login_spec.rb +178 -0
- data/spec/config_spec.rb +9 -7
- data/spec/connector/config_spec.rb +46 -0
- data/spec/connector/daemon_spec.rb +30 -0
- data/spec/connector/data_source_spec.rb +73 -0
- data/spec/connector/message_spec.rb +22 -0
- data/spec/connector/poller_spec.rb +26 -0
- data/spec/connector/processor_spec.rb +93 -0
- data/spec/connector/registrar_spec.rb +53 -0
- data/spec/connector/scheduler_spec.rb +93 -0
- data/spec/connector/selector_spec.rb +54 -0
- data/spec/connector/type_map_spec.rb +45 -0
- data/spec/connector/uploader_spec.rb +55 -0
- data/spec/fixtures/country-codes/README.md +71 -0
- data/spec/fixtures/country-codes/data/country-codes.csv +250 -0
- data/spec/fixtures/country-codes/datapackage.json +142 -0
- data/spec/fixtures/country-codes/scripts/get_countries_of_earth.py +370 -0
- data/spec/fixtures/country-codes/scripts/reorder_columns.py +8 -0
- data/spec/fixtures/country-codes/scripts/requirements.pip +2 -0
- data/spec/fixtures/espn_draft.csv +473 -1
- data/spec/fixtures/espn_draft/data.csv +473 -0
- data/spec/fixtures/espn_draft/datapackage.json +43 -0
- data/spec/logger_spec.rb +79 -0
- data/spec/spec_helper.rb +6 -1
- metadata +156 -19
- data/lib/mode/cli/setup.rb +0 -12
- data/lib/mode/commands/package.rb +0 -56
- data/lib/mode/commands/setup.rb +0 -36
- data/lib/mode/package_builder.rb +0 -57
- data/spec/commands/setup_spec.rb +0 -62
- data/spec/fixtures/MOCK_DATA.csv +0 -100001
- data/spec/fixtures/cb_clean_small.csv +0 -100000
- data/spec/fixtures/duplicate_keys.csv +0 -3
- data/spec/fixtures/format_examples.csv.txt +0 -6
- data/spec/fixtures/format_examples_after_excel.csv.txt +0 -1
@@ -0,0 +1,23 @@
|
|
1
|
+
module Mode
|
2
|
+
module Auth
|
3
|
+
class AccessToken
|
4
|
+
attr_reader :resource
|
5
|
+
|
6
|
+
def initialize(resource)
|
7
|
+
@resource = resource
|
8
|
+
end
|
9
|
+
|
10
|
+
def name
|
11
|
+
resource.name
|
12
|
+
end
|
13
|
+
|
14
|
+
def token
|
15
|
+
resource.token
|
16
|
+
end
|
17
|
+
|
18
|
+
def account_name
|
19
|
+
resource.account_name
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/mode/cli.rb
CHANGED
data/lib/mode/cli/analyze.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Mode
|
2
2
|
module CLI
|
3
3
|
class Base < Thor
|
4
|
-
desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "
|
4
|
+
desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyze a CSV file"
|
5
5
|
option :sample, :banner => 'RATE', :desc => "Proportion of rows to inspect. Example: 0.5"
|
6
6
|
option :field, :banner => 'POSITION', :desc => "Field positions begin at 0. Example: 10"
|
7
7
|
option :match_type, :banner => 'TYPE', :desc => "One of the following: string, number, integer, datetime, boolean"
|
data/lib/mode/cli/base.rb
CHANGED
@@ -7,6 +7,11 @@ module Mode
|
|
7
7
|
# And then I'd have an explicit list of the commands we were including
|
8
8
|
#
|
9
9
|
|
10
|
+
desc "version", "Print the version of the mode installed mode CLI"
|
11
|
+
def version
|
12
|
+
say "Mode CLI Version #{Mode::VERSION}"
|
13
|
+
end
|
14
|
+
|
10
15
|
private
|
11
16
|
|
12
17
|
include Mode::CLI::Helpers
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Mode
|
2
|
+
module CLI
|
3
|
+
class Base < Thor
|
4
|
+
desc "connect start|stop|restart [-c CONCURRENCY]", "Connect external databases to Mode"
|
5
|
+
option :concurrency, :aliases => :c, :default => 4
|
6
|
+
option :host, :default => 'www.modeanalytics.com'
|
7
|
+
def connect(command)
|
8
|
+
if ['start', 'stop', 'restart'].include?(command)
|
9
|
+
Mode::Commands::Connect.new(command, options).execute
|
10
|
+
else
|
11
|
+
say "Error: valid commands for connect are start, stop and restart"
|
12
|
+
end
|
13
|
+
rescue => err
|
14
|
+
say "Error: #{err.message}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/mode/cli/helpers.rb
CHANGED
@@ -8,15 +8,6 @@ module Mode
|
|
8
8
|
def valid_table?(table)
|
9
9
|
table =~ /[\w\d\_\-]+\/[\w\d\_\-]+/
|
10
10
|
end
|
11
|
-
|
12
|
-
def pkg_name(path)
|
13
|
-
path.split('/').last.split('.').first
|
14
|
-
end
|
15
|
-
|
16
|
-
def sample_rate(path)
|
17
|
-
file_size = File.size(path)
|
18
|
-
DataKit::CSV::Analyzer.sample_rate(file_size)
|
19
|
-
end
|
20
11
|
end
|
21
12
|
end
|
22
13
|
end
|
data/lib/mode/cli/import.rb
CHANGED
@@ -1,28 +1,22 @@
|
|
1
1
|
module Mode
|
2
2
|
module CLI
|
3
3
|
class Base < Thor
|
4
|
-
desc "import SOURCE ACCOUNT/TABLENAME [--
|
4
|
+
desc "import SOURCE ACCOUNT/TABLENAME [--replace]", "Import a flat file into the Mode data warehouse", :hide => true
|
5
5
|
long_desc <<-LONGDESC
|
6
|
-
The import commands allows you to create
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
The import commands allows you to create or replace tables
|
7
|
+
in the Mode data warehouse with data from flat files and
|
8
|
+
data packages. The default action is create with an optional
|
9
|
+
flag to replace the table.
|
10
10
|
|
11
|
-
Data can be imported from
|
11
|
+
Data can be imported from CSV files or data packages with the following command
|
12
12
|
|
13
13
|
1. CSV File
|
14
14
|
\x5> $ mode import gdp_quarterly.csv besquared/gdp_quarterly
|
15
15
|
|
16
|
-
2.
|
17
|
-
\x5> $ mode import
|
16
|
+
2. Data Package File
|
17
|
+
\x5> $ mode import gdp/data/quarterly.json besqaured/quarterly_gdp
|
18
18
|
|
19
|
-
3. Data Package
|
20
|
-
\x5> $ mode import gdp_us/quarterly besquared/gdp_quarterly
|
21
|
-
|
22
|
-
|
23
|
-
Note: If you do not specify a data package resource name we'll attempt to use the first resource in the package.
|
24
19
|
LONGDESC
|
25
|
-
option :update, :type => :boolean
|
26
20
|
option :replace, :type => :boolean
|
27
21
|
option :primary_key, :banner => 'pos1[,pos2,...] (ex: 0,2)'
|
28
22
|
def import(source, table)
|
@@ -31,31 +25,8 @@ module Mode
|
|
31
25
|
return
|
32
26
|
end
|
33
27
|
|
34
|
-
if File.directory?(source)
|
35
|
-
unless Mode::Package::Base.exist?(source)
|
36
|
-
error "Error: Invalid package given"
|
37
|
-
return
|
38
|
-
end
|
39
|
-
|
40
|
-
package, resource_name = *source.split('/')
|
41
|
-
package = Mode::Package::Base.open(source)
|
42
|
-
else
|
43
|
-
unless valid_file?(source)
|
44
|
-
error "Error: Invalid source file given"
|
45
|
-
return
|
46
|
-
end
|
47
|
-
|
48
|
-
dst_path = Dir.mktmpdir
|
49
|
-
src_data = Mode::CSV::Parser.new(source)
|
50
|
-
builder = Mode::Package::Builder.new(src_data, dst_path, pkg_name(source), sample_rate(source))
|
51
|
-
|
52
|
-
package = builder.execute # make the package
|
53
|
-
end
|
54
|
-
|
55
28
|
account, table_name = *table.split('/')
|
56
|
-
|
57
|
-
|
58
|
-
Mode::Commands::Import.new(account, table_name, package, resource_name).execute
|
29
|
+
Mode::Commands::Import.new(source, account, table_name).execute
|
59
30
|
end
|
60
31
|
end
|
61
32
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Mode
|
2
|
+
module CLI
|
3
|
+
class Base < Thor
|
4
|
+
desc "login", "Setup a new mode configuration in the given directory (defaults to home)"
|
5
|
+
option :host, :default => 'www.modeanalytics.com'
|
6
|
+
option :staging, :type => :boolean
|
7
|
+
option :development, :type => :boolean
|
8
|
+
def login
|
9
|
+
Mode::Commands::Login.new(options).execute
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/mode/cli/package.rb
CHANGED
@@ -4,11 +4,8 @@ module Mode
|
|
4
4
|
desc "package SOURCE PACKAGEPATH [--name=NAME] [--resource-name=NAME]", "Creates a new data package from a csv file"
|
5
5
|
option :name, :desc => 'The name of the package'
|
6
6
|
option :resource_name, :default => 'data', :desc => 'The name of the resource in package'
|
7
|
-
def package(source,
|
8
|
-
|
9
|
-
|
10
|
-
name = options[:name] || parts.last
|
11
|
-
package_path = parts.length == 1 ? parts.first : parts[0, parts.length - 1]
|
7
|
+
def package(source, package_path)
|
8
|
+
name = options[:name] || package_path.split(File::Separator).last
|
12
9
|
Mode::Commands::Package.new(source, package_path, name, options[:resource_name]).execute
|
13
10
|
end
|
14
11
|
end
|
@@ -2,7 +2,9 @@ require 'terminal-table'
|
|
2
2
|
|
3
3
|
module Mode
|
4
4
|
module Commands
|
5
|
-
class AnalyzeField
|
5
|
+
class AnalyzeField
|
6
|
+
include Mode::Commands::Helpers
|
7
|
+
|
6
8
|
attr_accessor :path
|
7
9
|
attr_accessor :field_pos
|
8
10
|
attr_accessor :options
|
@@ -13,34 +15,31 @@ module Mode
|
|
13
15
|
@options = options
|
14
16
|
end
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
error "Couldn't find file at #{path}"
|
22
|
-
return
|
23
|
-
end
|
18
|
+
def execute
|
19
|
+
if path.nil? || !File.exist?(path)
|
20
|
+
puts "Error: Couldn't find file at #{path}"
|
21
|
+
return
|
22
|
+
end
|
24
23
|
|
25
|
-
|
24
|
+
csv = DataKit::CSV::Parser.new(path)
|
26
25
|
|
27
|
-
|
26
|
+
field_name = csv.headers[field_pos]
|
28
27
|
|
29
|
-
|
28
|
+
puts "Analyzing #{field_name} at #{path || 'input'}"
|
30
29
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
30
|
+
analysis, total_time = timer_block do
|
31
|
+
DataKit::CSV::FieldAnalyzer.analyze(csv, field_pos, {
|
32
|
+
:match_type => match_type, :sampling_rate => 1
|
33
|
+
})
|
34
|
+
end
|
36
35
|
|
37
|
-
|
36
|
+
puts "Analyzed #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
|
38
37
|
|
39
|
-
|
40
|
-
end
|
38
|
+
display(analysis)
|
41
39
|
end
|
42
40
|
|
43
41
|
private
|
42
|
+
|
44
43
|
def display(analysis)
|
45
44
|
table = Terminal::Table.new(:headings => [
|
46
45
|
'Row No.', 'Type', 'Value'
|
@@ -52,7 +51,7 @@ module Mode
|
|
52
51
|
end
|
53
52
|
end
|
54
53
|
|
55
|
-
|
54
|
+
puts table
|
56
55
|
end
|
57
56
|
|
58
57
|
def match_type
|
@@ -2,7 +2,9 @@ require 'terminal-table'
|
|
2
2
|
|
3
3
|
module Mode
|
4
4
|
module Commands
|
5
|
-
class AnalyzeSchema
|
5
|
+
class AnalyzeSchema
|
6
|
+
include Mode::Commands::Helpers
|
7
|
+
|
6
8
|
attr_accessor :path
|
7
9
|
attr_accessor :options
|
8
10
|
|
@@ -11,79 +13,98 @@ module Mode
|
|
11
13
|
@options = options
|
12
14
|
end
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
error "Couldn't find file at #{path}"
|
20
|
-
return
|
21
|
-
end
|
22
|
-
|
23
|
-
csv = DataKit::CSV::Parser.new(path)
|
16
|
+
def execute
|
17
|
+
if path.nil? || !File.exist?(path)
|
18
|
+
puts "Error: Couldn't find file at #{path}"
|
19
|
+
return
|
20
|
+
end
|
24
21
|
|
25
|
-
|
22
|
+
csv = build_csv
|
23
|
+
analysis = build_analysis(csv)
|
24
|
+
display_analysis(analysis)
|
25
|
+
end
|
26
26
|
|
27
|
-
|
28
|
-
DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
|
29
|
-
end
|
27
|
+
private
|
30
28
|
|
31
|
-
|
29
|
+
def build_csv
|
30
|
+
DataKit::CSV::Parser.new(path)
|
31
|
+
end
|
32
32
|
|
33
|
-
|
33
|
+
def build_analysis(csv)
|
34
|
+
puts "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
|
34
35
|
|
35
|
-
|
36
|
+
analysis, total_time = timer_block do
|
37
|
+
DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
|
36
38
|
end
|
37
|
-
end
|
38
39
|
|
39
|
-
|
40
|
+
puts "Analyzed #{analysis.sample_count} of #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
|
40
41
|
|
41
|
-
|
42
|
-
|
42
|
+
analysis
|
43
|
+
end
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
end
|
45
|
+
def sampling_rate
|
46
|
+
file_size = File.size(path)
|
47
|
+
(options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
|
48
|
+
end
|
49
49
|
|
50
|
-
|
50
|
+
def display_analysis(analysis)
|
51
|
+
table = build_table
|
52
|
+
populate_table(analysis, table)
|
53
|
+
puts table
|
51
54
|
end
|
52
55
|
|
53
|
-
def
|
54
|
-
|
56
|
+
def build_table
|
57
|
+
Terminal::Table.new(:headings => [
|
55
58
|
'Field No.', 'Field', 'Type',
|
56
59
|
'String (%)', 'Integer (%)', 'Number (%)',
|
57
60
|
'Date/Time (%)', 'Boolean (%)', 'Empty (%)'
|
58
61
|
])
|
62
|
+
end
|
59
63
|
|
64
|
+
def populate_table(analysis, table)
|
60
65
|
analysis.fields.each_with_index do |field_name, index|
|
61
|
-
|
66
|
+
build_table_row(analysis, table, field_name, index)
|
67
|
+
end
|
68
|
+
end
|
62
69
|
|
63
|
-
|
70
|
+
def build_table_row(analysis, table, field_name, index)
|
71
|
+
row = [index, field_name]
|
64
72
|
|
65
|
-
|
66
|
-
|
67
|
-
elsif analysis.has_only_numeric_types?(field_name)
|
68
|
-
row << field_type
|
69
|
-
else
|
70
|
-
row << '** ' + field_type.to_s
|
71
|
-
end
|
73
|
+
append_row_type(analysis, field_name, row)
|
74
|
+
append_row_type_counts(analysis, field_name, row)
|
72
75
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
76
|
+
table.add_row(row)
|
77
|
+
end
|
78
|
+
|
79
|
+
def append_row_type(analysis, field_name, row)
|
80
|
+
field_type = analysis.type?(field_name)
|
77
81
|
|
78
|
-
|
82
|
+
if analysis.has_single_type?(field_name)
|
83
|
+
row << field_type
|
84
|
+
elsif analysis.has_only_numeric_types?(field_name)
|
85
|
+
row << field_type
|
86
|
+
else
|
87
|
+
row << '** ' + field_type.to_s
|
79
88
|
end
|
89
|
+
end
|
80
90
|
|
81
|
-
|
91
|
+
def append_row_type_counts(analysis, field_name, row)
|
92
|
+
DataKit::Dataset::Field::Types.each do |type|
|
93
|
+
type_count = analysis.type_count(field_name, type)
|
94
|
+
row << format_percentage_cell(type_count, analysis.sample_count)
|
95
|
+
end
|
82
96
|
end
|
83
97
|
|
84
|
-
def
|
85
|
-
|
86
|
-
|
98
|
+
def format_percentage_cell(numerator, denominator)
|
99
|
+
cell = { :alignment => :right }
|
100
|
+
|
101
|
+
if numerator == 0
|
102
|
+
cell[:value] = nil
|
103
|
+
else
|
104
|
+
cell[:value] = '%.2f' % (100 * (numerator / denominator.to_f)) + '%'
|
105
|
+
end
|
106
|
+
|
107
|
+
cell
|
87
108
|
end
|
88
109
|
end
|
89
110
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Mode
|
2
|
+
module Commands
|
3
|
+
class Connect
|
4
|
+
|
5
|
+
include Mode::Commands::Helpers
|
6
|
+
|
7
|
+
attr_reader :command
|
8
|
+
attr_reader :concurrency
|
9
|
+
attr_reader :configuration
|
10
|
+
|
11
|
+
def initialize(command, options = {})
|
12
|
+
@command = command
|
13
|
+
@concurrency = options[:concurrency] || 4
|
14
|
+
|
15
|
+
validate!
|
16
|
+
|
17
|
+
@configuration = Mode::Connector::Config.new(config_dir)
|
18
|
+
end
|
19
|
+
|
20
|
+
def execute
|
21
|
+
register! if ['start', 'restart'].include?(command)
|
22
|
+
spawn!
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def validate!
|
28
|
+
require_config!
|
29
|
+
require_credentials!
|
30
|
+
require_connect_config!
|
31
|
+
configure_api_requests!
|
32
|
+
end
|
33
|
+
|
34
|
+
def register!
|
35
|
+
Mode::Connector::Registrar.new(configuration).perform!
|
36
|
+
end
|
37
|
+
|
38
|
+
def spawn!
|
39
|
+
if command == 'restart'
|
40
|
+
spawn_restart!
|
41
|
+
elsif ['stop', 'start'].include?(command)
|
42
|
+
spawn_command!(command)
|
43
|
+
else
|
44
|
+
raise "Unknown command #{command}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def spawn_restart!
|
49
|
+
begin
|
50
|
+
spawn_command!('stop')
|
51
|
+
ensure
|
52
|
+
spawn_command!('start')
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def spawn_command!(command)
|
57
|
+
Mode::Connector::Daemon.spawn!(spawn_opts, spawn_args(command))
|
58
|
+
end
|
59
|
+
|
60
|
+
def spawn_opts
|
61
|
+
{
|
62
|
+
:sync_log => true,
|
63
|
+
:working_dir => working_dir,
|
64
|
+
:log_file => File.join(working_dir, 'connect.log'),
|
65
|
+
:pid_file => File.join(working_dir, 'connect.pid')
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
def spawn_args(command)
|
70
|
+
[command, concurrency, configuration.data_sources]
|
71
|
+
end
|
72
|
+
|
73
|
+
def working_dir
|
74
|
+
@working_dir ||= File.expand_path('~/.mode')
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|