mode 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +17 -22
- data/bin/mode +1 -1
- data/lib/mode.rb +34 -6
- data/lib/mode/api/form.rb +53 -0
- data/lib/mode/api/link.rb +31 -0
- data/lib/mode/api/request.rb +181 -0
- data/lib/mode/api/resource.rb +67 -0
- data/lib/mode/auth/access_token.rb +23 -0
- data/lib/mode/cli.rb +3 -3
- data/lib/mode/cli/analyze.rb +1 -1
- data/lib/mode/cli/base.rb +5 -0
- data/lib/mode/cli/connect.rb +18 -0
- data/lib/mode/cli/helpers.rb +0 -9
- data/lib/mode/cli/import.rb +9 -38
- data/lib/mode/cli/login.rb +13 -0
- data/lib/mode/cli/package.rb +2 -5
- data/lib/mode/commands/analyze_field.rb +20 -21
- data/lib/mode/commands/analyze_schema.rb +69 -48
- data/lib/mode/commands/connect.rb +78 -0
- data/lib/mode/commands/helpers.rb +54 -0
- data/lib/mode/commands/import.rb +209 -20
- data/lib/mode/commands/login.rb +111 -0
- data/lib/mode/config.rb +13 -33
- data/lib/mode/configurable.rb +46 -0
- data/lib/mode/connector/config.rb +31 -0
- data/lib/mode/connector/daemon.rb +27 -0
- data/lib/mode/connector/data_source.rb +75 -0
- data/lib/mode/connector/dataset.rb +13 -0
- data/lib/mode/connector/message.rb +31 -0
- data/lib/mode/connector/poller.rb +27 -0
- data/lib/mode/connector/processor.rb +58 -0
- data/lib/mode/connector/registrar.rb +36 -0
- data/lib/mode/connector/scheduler.rb +62 -0
- data/lib/mode/connector/selector.rb +47 -0
- data/lib/mode/connector/type_map.rb +45 -0
- data/lib/mode/connector/uploader.rb +50 -0
- data/lib/mode/logger.rb +202 -0
- data/lib/mode/version.rb +1 -1
- data/mode.gemspec +13 -2
- data/spec/api/form_spec.rb +51 -0
- data/spec/api/link_spec.rb +23 -0
- data/spec/api/request_spec.rb +111 -0
- data/spec/api/resource_spec.rb +70 -0
- data/spec/auth/access_token_spec.rb +22 -0
- data/spec/commands/analyze_field_spec.rb +26 -0
- data/spec/commands/analyze_schema_spec.rb +7 -5
- data/spec/commands/connect_spec.rb +80 -0
- data/spec/commands/helpers_spec.rb +69 -0
- data/spec/commands/import_spec.rb +155 -0
- data/spec/commands/login_spec.rb +178 -0
- data/spec/config_spec.rb +9 -7
- data/spec/connector/config_spec.rb +46 -0
- data/spec/connector/daemon_spec.rb +30 -0
- data/spec/connector/data_source_spec.rb +73 -0
- data/spec/connector/message_spec.rb +22 -0
- data/spec/connector/poller_spec.rb +26 -0
- data/spec/connector/processor_spec.rb +93 -0
- data/spec/connector/registrar_spec.rb +53 -0
- data/spec/connector/scheduler_spec.rb +93 -0
- data/spec/connector/selector_spec.rb +54 -0
- data/spec/connector/type_map_spec.rb +45 -0
- data/spec/connector/uploader_spec.rb +55 -0
- data/spec/fixtures/country-codes/README.md +71 -0
- data/spec/fixtures/country-codes/data/country-codes.csv +250 -0
- data/spec/fixtures/country-codes/datapackage.json +142 -0
- data/spec/fixtures/country-codes/scripts/get_countries_of_earth.py +370 -0
- data/spec/fixtures/country-codes/scripts/reorder_columns.py +8 -0
- data/spec/fixtures/country-codes/scripts/requirements.pip +2 -0
- data/spec/fixtures/espn_draft.csv +473 -1
- data/spec/fixtures/espn_draft/data.csv +473 -0
- data/spec/fixtures/espn_draft/datapackage.json +43 -0
- data/spec/logger_spec.rb +79 -0
- data/spec/spec_helper.rb +6 -1
- metadata +156 -19
- data/lib/mode/cli/setup.rb +0 -12
- data/lib/mode/commands/package.rb +0 -56
- data/lib/mode/commands/setup.rb +0 -36
- data/lib/mode/package_builder.rb +0 -57
- data/spec/commands/setup_spec.rb +0 -62
- data/spec/fixtures/MOCK_DATA.csv +0 -100001
- data/spec/fixtures/cb_clean_small.csv +0 -100000
- data/spec/fixtures/duplicate_keys.csv +0 -3
- data/spec/fixtures/format_examples.csv.txt +0 -6
- data/spec/fixtures/format_examples_after_excel.csv.txt +0 -1
@@ -0,0 +1,23 @@
|
|
1
|
+
module Mode
|
2
|
+
module Auth
|
3
|
+
class AccessToken
|
4
|
+
attr_reader :resource
|
5
|
+
|
6
|
+
def initialize(resource)
|
7
|
+
@resource = resource
|
8
|
+
end
|
9
|
+
|
10
|
+
def name
|
11
|
+
resource.name
|
12
|
+
end
|
13
|
+
|
14
|
+
def token
|
15
|
+
resource.token
|
16
|
+
end
|
17
|
+
|
18
|
+
def account_name
|
19
|
+
resource.account_name
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/mode/cli.rb
CHANGED
data/lib/mode/cli/analyze.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Mode
|
2
2
|
module CLI
|
3
3
|
class Base < Thor
|
4
|
-
desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "
|
4
|
+
desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyze a CSV file"
|
5
5
|
option :sample, :banner => 'RATE', :desc => "Proportion of rows to inspect. Example: 0.5"
|
6
6
|
option :field, :banner => 'POSITION', :desc => "Field positions begin at 0. Example: 10"
|
7
7
|
option :match_type, :banner => 'TYPE', :desc => "One of the following: string, number, integer, datetime, boolean"
|
data/lib/mode/cli/base.rb
CHANGED
@@ -7,6 +7,11 @@ module Mode
|
|
7
7
|
# And then I'd have an explicit list of the commands we were including
|
8
8
|
#
|
9
9
|
|
10
|
+
desc "version", "Print the version of the mode installed mode CLI"
|
11
|
+
def version
|
12
|
+
say "Mode CLI Version #{Mode::VERSION}"
|
13
|
+
end
|
14
|
+
|
10
15
|
private
|
11
16
|
|
12
17
|
include Mode::CLI::Helpers
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Mode
|
2
|
+
module CLI
|
3
|
+
class Base < Thor
|
4
|
+
desc "connect start|stop|restart [-c CONCURRENCY]", "Connect external databases to Mode"
|
5
|
+
option :concurrency, :aliases => :c, :default => 4
|
6
|
+
option :host, :default => 'www.modeanalytics.com'
|
7
|
+
def connect(command)
|
8
|
+
if ['start', 'stop', 'restart'].include?(command)
|
9
|
+
Mode::Commands::Connect.new(command, options).execute
|
10
|
+
else
|
11
|
+
say "Error: valid commands for connect are start, stop and restart"
|
12
|
+
end
|
13
|
+
rescue => err
|
14
|
+
say "Error: #{err.message}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/mode/cli/helpers.rb
CHANGED
@@ -8,15 +8,6 @@ module Mode
|
|
8
8
|
def valid_table?(table)
|
9
9
|
table =~ /[\w\d\_\-]+\/[\w\d\_\-]+/
|
10
10
|
end
|
11
|
-
|
12
|
-
def pkg_name(path)
|
13
|
-
path.split('/').last.split('.').first
|
14
|
-
end
|
15
|
-
|
16
|
-
def sample_rate(path)
|
17
|
-
file_size = File.size(path)
|
18
|
-
DataKit::CSV::Analyzer.sample_rate(file_size)
|
19
|
-
end
|
20
11
|
end
|
21
12
|
end
|
22
13
|
end
|
data/lib/mode/cli/import.rb
CHANGED
@@ -1,28 +1,22 @@
|
|
1
1
|
module Mode
|
2
2
|
module CLI
|
3
3
|
class Base < Thor
|
4
|
-
desc "import SOURCE ACCOUNT/TABLENAME [--
|
4
|
+
desc "import SOURCE ACCOUNT/TABLENAME [--replace]", "Import a flat file into the Mode data warehouse", :hide => true
|
5
5
|
long_desc <<-LONGDESC
|
6
|
-
The import commands allows you to create
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
The import commands allows you to create or replace tables
|
7
|
+
in the Mode data warehouse with data from flat files and
|
8
|
+
data packages. The default action is create with an optional
|
9
|
+
flag to replace the table.
|
10
10
|
|
11
|
-
Data can be imported from
|
11
|
+
Data can be imported from CSV files or data packages with the following command
|
12
12
|
|
13
13
|
1. CSV File
|
14
14
|
\x5> $ mode import gdp_quarterly.csv besquared/gdp_quarterly
|
15
15
|
|
16
|
-
2.
|
17
|
-
\x5> $ mode import
|
16
|
+
2. Data Package File
|
17
|
+
\x5> $ mode import gdp/data/quarterly.json besqaured/quarterly_gdp
|
18
18
|
|
19
|
-
3. Data Package
|
20
|
-
\x5> $ mode import gdp_us/quarterly besquared/gdp_quarterly
|
21
|
-
|
22
|
-
|
23
|
-
Note: If you do not specify a data package resource name we'll attempt to use the first resource in the package.
|
24
19
|
LONGDESC
|
25
|
-
option :update, :type => :boolean
|
26
20
|
option :replace, :type => :boolean
|
27
21
|
option :primary_key, :banner => 'pos1[,pos2,...] (ex: 0,2)'
|
28
22
|
def import(source, table)
|
@@ -31,31 +25,8 @@ module Mode
|
|
31
25
|
return
|
32
26
|
end
|
33
27
|
|
34
|
-
if File.directory?(source)
|
35
|
-
unless Mode::Package::Base.exist?(source)
|
36
|
-
error "Error: Invalid package given"
|
37
|
-
return
|
38
|
-
end
|
39
|
-
|
40
|
-
package, resource_name = *source.split('/')
|
41
|
-
package = Mode::Package::Base.open(source)
|
42
|
-
else
|
43
|
-
unless valid_file?(source)
|
44
|
-
error "Error: Invalid source file given"
|
45
|
-
return
|
46
|
-
end
|
47
|
-
|
48
|
-
dst_path = Dir.mktmpdir
|
49
|
-
src_data = Mode::CSV::Parser.new(source)
|
50
|
-
builder = Mode::Package::Builder.new(src_data, dst_path, pkg_name(source), sample_rate(source))
|
51
|
-
|
52
|
-
package = builder.execute # make the package
|
53
|
-
end
|
54
|
-
|
55
28
|
account, table_name = *table.split('/')
|
56
|
-
|
57
|
-
|
58
|
-
Mode::Commands::Import.new(account, table_name, package, resource_name).execute
|
29
|
+
Mode::Commands::Import.new(source, account, table_name).execute
|
59
30
|
end
|
60
31
|
end
|
61
32
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Mode
|
2
|
+
module CLI
|
3
|
+
class Base < Thor
|
4
|
+
desc "login", "Setup a new mode configuration in the given directory (defaults to home)"
|
5
|
+
option :host, :default => 'www.modeanalytics.com'
|
6
|
+
option :staging, :type => :boolean
|
7
|
+
option :development, :type => :boolean
|
8
|
+
def login
|
9
|
+
Mode::Commands::Login.new(options).execute
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/lib/mode/cli/package.rb
CHANGED
@@ -4,11 +4,8 @@ module Mode
|
|
4
4
|
desc "package SOURCE PACKAGEPATH [--name=NAME] [--resource-name=NAME]", "Creates a new data package from a csv file"
|
5
5
|
option :name, :desc => 'The name of the package'
|
6
6
|
option :resource_name, :default => 'data', :desc => 'The name of the resource in package'
|
7
|
-
def package(source,
|
8
|
-
|
9
|
-
|
10
|
-
name = options[:name] || parts.last
|
11
|
-
package_path = parts.length == 1 ? parts.first : parts[0, parts.length - 1]
|
7
|
+
def package(source, package_path)
|
8
|
+
name = options[:name] || package_path.split(File::Separator).last
|
12
9
|
Mode::Commands::Package.new(source, package_path, name, options[:resource_name]).execute
|
13
10
|
end
|
14
11
|
end
|
@@ -2,7 +2,9 @@ require 'terminal-table'
|
|
2
2
|
|
3
3
|
module Mode
|
4
4
|
module Commands
|
5
|
-
class AnalyzeField
|
5
|
+
class AnalyzeField
|
6
|
+
include Mode::Commands::Helpers
|
7
|
+
|
6
8
|
attr_accessor :path
|
7
9
|
attr_accessor :field_pos
|
8
10
|
attr_accessor :options
|
@@ -13,34 +15,31 @@ module Mode
|
|
13
15
|
@options = options
|
14
16
|
end
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
error "Couldn't find file at #{path}"
|
22
|
-
return
|
23
|
-
end
|
18
|
+
def execute
|
19
|
+
if path.nil? || !File.exist?(path)
|
20
|
+
puts "Error: Couldn't find file at #{path}"
|
21
|
+
return
|
22
|
+
end
|
24
23
|
|
25
|
-
|
24
|
+
csv = DataKit::CSV::Parser.new(path)
|
26
25
|
|
27
|
-
|
26
|
+
field_name = csv.headers[field_pos]
|
28
27
|
|
29
|
-
|
28
|
+
puts "Analyzing #{field_name} at #{path || 'input'}"
|
30
29
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
30
|
+
analysis, total_time = timer_block do
|
31
|
+
DataKit::CSV::FieldAnalyzer.analyze(csv, field_pos, {
|
32
|
+
:match_type => match_type, :sampling_rate => 1
|
33
|
+
})
|
34
|
+
end
|
36
35
|
|
37
|
-
|
36
|
+
puts "Analyzed #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
|
38
37
|
|
39
|
-
|
40
|
-
end
|
38
|
+
display(analysis)
|
41
39
|
end
|
42
40
|
|
43
41
|
private
|
42
|
+
|
44
43
|
def display(analysis)
|
45
44
|
table = Terminal::Table.new(:headings => [
|
46
45
|
'Row No.', 'Type', 'Value'
|
@@ -52,7 +51,7 @@ module Mode
|
|
52
51
|
end
|
53
52
|
end
|
54
53
|
|
55
|
-
|
54
|
+
puts table
|
56
55
|
end
|
57
56
|
|
58
57
|
def match_type
|
@@ -2,7 +2,9 @@ require 'terminal-table'
|
|
2
2
|
|
3
3
|
module Mode
|
4
4
|
module Commands
|
5
|
-
class AnalyzeSchema
|
5
|
+
class AnalyzeSchema
|
6
|
+
include Mode::Commands::Helpers
|
7
|
+
|
6
8
|
attr_accessor :path
|
7
9
|
attr_accessor :options
|
8
10
|
|
@@ -11,79 +13,98 @@ module Mode
|
|
11
13
|
@options = options
|
12
14
|
end
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
error "Couldn't find file at #{path}"
|
20
|
-
return
|
21
|
-
end
|
22
|
-
|
23
|
-
csv = DataKit::CSV::Parser.new(path)
|
16
|
+
def execute
|
17
|
+
if path.nil? || !File.exist?(path)
|
18
|
+
puts "Error: Couldn't find file at #{path}"
|
19
|
+
return
|
20
|
+
end
|
24
21
|
|
25
|
-
|
22
|
+
csv = build_csv
|
23
|
+
analysis = build_analysis(csv)
|
24
|
+
display_analysis(analysis)
|
25
|
+
end
|
26
26
|
|
27
|
-
|
28
|
-
DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
|
29
|
-
end
|
27
|
+
private
|
30
28
|
|
31
|
-
|
29
|
+
def build_csv
|
30
|
+
DataKit::CSV::Parser.new(path)
|
31
|
+
end
|
32
32
|
|
33
|
-
|
33
|
+
def build_analysis(csv)
|
34
|
+
puts "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
|
34
35
|
|
35
|
-
|
36
|
+
analysis, total_time = timer_block do
|
37
|
+
DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
|
36
38
|
end
|
37
|
-
end
|
38
39
|
|
39
|
-
|
40
|
+
puts "Analyzed #{analysis.sample_count} of #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
|
40
41
|
|
41
|
-
|
42
|
-
|
42
|
+
analysis
|
43
|
+
end
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
end
|
45
|
+
def sampling_rate
|
46
|
+
file_size = File.size(path)
|
47
|
+
(options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
|
48
|
+
end
|
49
49
|
|
50
|
-
|
50
|
+
def display_analysis(analysis)
|
51
|
+
table = build_table
|
52
|
+
populate_table(analysis, table)
|
53
|
+
puts table
|
51
54
|
end
|
52
55
|
|
53
|
-
def
|
54
|
-
|
56
|
+
def build_table
|
57
|
+
Terminal::Table.new(:headings => [
|
55
58
|
'Field No.', 'Field', 'Type',
|
56
59
|
'String (%)', 'Integer (%)', 'Number (%)',
|
57
60
|
'Date/Time (%)', 'Boolean (%)', 'Empty (%)'
|
58
61
|
])
|
62
|
+
end
|
59
63
|
|
64
|
+
def populate_table(analysis, table)
|
60
65
|
analysis.fields.each_with_index do |field_name, index|
|
61
|
-
|
66
|
+
build_table_row(analysis, table, field_name, index)
|
67
|
+
end
|
68
|
+
end
|
62
69
|
|
63
|
-
|
70
|
+
def build_table_row(analysis, table, field_name, index)
|
71
|
+
row = [index, field_name]
|
64
72
|
|
65
|
-
|
66
|
-
|
67
|
-
elsif analysis.has_only_numeric_types?(field_name)
|
68
|
-
row << field_type
|
69
|
-
else
|
70
|
-
row << '** ' + field_type.to_s
|
71
|
-
end
|
73
|
+
append_row_type(analysis, field_name, row)
|
74
|
+
append_row_type_counts(analysis, field_name, row)
|
72
75
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
76
|
+
table.add_row(row)
|
77
|
+
end
|
78
|
+
|
79
|
+
def append_row_type(analysis, field_name, row)
|
80
|
+
field_type = analysis.type?(field_name)
|
77
81
|
|
78
|
-
|
82
|
+
if analysis.has_single_type?(field_name)
|
83
|
+
row << field_type
|
84
|
+
elsif analysis.has_only_numeric_types?(field_name)
|
85
|
+
row << field_type
|
86
|
+
else
|
87
|
+
row << '** ' + field_type.to_s
|
79
88
|
end
|
89
|
+
end
|
80
90
|
|
81
|
-
|
91
|
+
def append_row_type_counts(analysis, field_name, row)
|
92
|
+
DataKit::Dataset::Field::Types.each do |type|
|
93
|
+
type_count = analysis.type_count(field_name, type)
|
94
|
+
row << format_percentage_cell(type_count, analysis.sample_count)
|
95
|
+
end
|
82
96
|
end
|
83
97
|
|
84
|
-
def
|
85
|
-
|
86
|
-
|
98
|
+
def format_percentage_cell(numerator, denominator)
|
99
|
+
cell = { :alignment => :right }
|
100
|
+
|
101
|
+
if numerator == 0
|
102
|
+
cell[:value] = nil
|
103
|
+
else
|
104
|
+
cell[:value] = '%.2f' % (100 * (numerator / denominator.to_f)) + '%'
|
105
|
+
end
|
106
|
+
|
107
|
+
cell
|
87
108
|
end
|
88
109
|
end
|
89
110
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Mode
|
2
|
+
module Commands
|
3
|
+
class Connect
|
4
|
+
|
5
|
+
include Mode::Commands::Helpers
|
6
|
+
|
7
|
+
attr_reader :command
|
8
|
+
attr_reader :concurrency
|
9
|
+
attr_reader :configuration
|
10
|
+
|
11
|
+
def initialize(command, options = {})
|
12
|
+
@command = command
|
13
|
+
@concurrency = options[:concurrency] || 4
|
14
|
+
|
15
|
+
validate!
|
16
|
+
|
17
|
+
@configuration = Mode::Connector::Config.new(config_dir)
|
18
|
+
end
|
19
|
+
|
20
|
+
def execute
|
21
|
+
register! if ['start', 'restart'].include?(command)
|
22
|
+
spawn!
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def validate!
|
28
|
+
require_config!
|
29
|
+
require_credentials!
|
30
|
+
require_connect_config!
|
31
|
+
configure_api_requests!
|
32
|
+
end
|
33
|
+
|
34
|
+
def register!
|
35
|
+
Mode::Connector::Registrar.new(configuration).perform!
|
36
|
+
end
|
37
|
+
|
38
|
+
def spawn!
|
39
|
+
if command == 'restart'
|
40
|
+
spawn_restart!
|
41
|
+
elsif ['stop', 'start'].include?(command)
|
42
|
+
spawn_command!(command)
|
43
|
+
else
|
44
|
+
raise "Unknown command #{command}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def spawn_restart!
|
49
|
+
begin
|
50
|
+
spawn_command!('stop')
|
51
|
+
ensure
|
52
|
+
spawn_command!('start')
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def spawn_command!(command)
|
57
|
+
Mode::Connector::Daemon.spawn!(spawn_opts, spawn_args(command))
|
58
|
+
end
|
59
|
+
|
60
|
+
def spawn_opts
|
61
|
+
{
|
62
|
+
:sync_log => true,
|
63
|
+
:working_dir => working_dir,
|
64
|
+
:log_file => File.join(working_dir, 'connect.log'),
|
65
|
+
:pid_file => File.join(working_dir, 'connect.pid')
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
def spawn_args(command)
|
70
|
+
[command, concurrency, configuration.data_sources]
|
71
|
+
end
|
72
|
+
|
73
|
+
def working_dir
|
74
|
+
@working_dir ||= File.expand_path('~/.mode')
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|