mode 0.0.5 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/README.md +17 -22
  4. data/bin/mode +1 -1
  5. data/lib/mode.rb +34 -6
  6. data/lib/mode/api/form.rb +53 -0
  7. data/lib/mode/api/link.rb +31 -0
  8. data/lib/mode/api/request.rb +181 -0
  9. data/lib/mode/api/resource.rb +67 -0
  10. data/lib/mode/auth/access_token.rb +23 -0
  11. data/lib/mode/cli.rb +3 -3
  12. data/lib/mode/cli/analyze.rb +1 -1
  13. data/lib/mode/cli/base.rb +5 -0
  14. data/lib/mode/cli/connect.rb +18 -0
  15. data/lib/mode/cli/helpers.rb +0 -9
  16. data/lib/mode/cli/import.rb +9 -38
  17. data/lib/mode/cli/login.rb +13 -0
  18. data/lib/mode/cli/package.rb +2 -5
  19. data/lib/mode/commands/analyze_field.rb +20 -21
  20. data/lib/mode/commands/analyze_schema.rb +69 -48
  21. data/lib/mode/commands/connect.rb +78 -0
  22. data/lib/mode/commands/helpers.rb +54 -0
  23. data/lib/mode/commands/import.rb +209 -20
  24. data/lib/mode/commands/login.rb +111 -0
  25. data/lib/mode/config.rb +13 -33
  26. data/lib/mode/configurable.rb +46 -0
  27. data/lib/mode/connector/config.rb +31 -0
  28. data/lib/mode/connector/daemon.rb +27 -0
  29. data/lib/mode/connector/data_source.rb +75 -0
  30. data/lib/mode/connector/dataset.rb +13 -0
  31. data/lib/mode/connector/message.rb +31 -0
  32. data/lib/mode/connector/poller.rb +27 -0
  33. data/lib/mode/connector/processor.rb +58 -0
  34. data/lib/mode/connector/registrar.rb +36 -0
  35. data/lib/mode/connector/scheduler.rb +62 -0
  36. data/lib/mode/connector/selector.rb +47 -0
  37. data/lib/mode/connector/type_map.rb +45 -0
  38. data/lib/mode/connector/uploader.rb +50 -0
  39. data/lib/mode/logger.rb +202 -0
  40. data/lib/mode/version.rb +1 -1
  41. data/mode.gemspec +13 -2
  42. data/spec/api/form_spec.rb +51 -0
  43. data/spec/api/link_spec.rb +23 -0
  44. data/spec/api/request_spec.rb +111 -0
  45. data/spec/api/resource_spec.rb +70 -0
  46. data/spec/auth/access_token_spec.rb +22 -0
  47. data/spec/commands/analyze_field_spec.rb +26 -0
  48. data/spec/commands/analyze_schema_spec.rb +7 -5
  49. data/spec/commands/connect_spec.rb +80 -0
  50. data/spec/commands/helpers_spec.rb +69 -0
  51. data/spec/commands/import_spec.rb +155 -0
  52. data/spec/commands/login_spec.rb +178 -0
  53. data/spec/config_spec.rb +9 -7
  54. data/spec/connector/config_spec.rb +46 -0
  55. data/spec/connector/daemon_spec.rb +30 -0
  56. data/spec/connector/data_source_spec.rb +73 -0
  57. data/spec/connector/message_spec.rb +22 -0
  58. data/spec/connector/poller_spec.rb +26 -0
  59. data/spec/connector/processor_spec.rb +93 -0
  60. data/spec/connector/registrar_spec.rb +53 -0
  61. data/spec/connector/scheduler_spec.rb +93 -0
  62. data/spec/connector/selector_spec.rb +54 -0
  63. data/spec/connector/type_map_spec.rb +45 -0
  64. data/spec/connector/uploader_spec.rb +55 -0
  65. data/spec/fixtures/country-codes/README.md +71 -0
  66. data/spec/fixtures/country-codes/data/country-codes.csv +250 -0
  67. data/spec/fixtures/country-codes/datapackage.json +142 -0
  68. data/spec/fixtures/country-codes/scripts/get_countries_of_earth.py +370 -0
  69. data/spec/fixtures/country-codes/scripts/reorder_columns.py +8 -0
  70. data/spec/fixtures/country-codes/scripts/requirements.pip +2 -0
  71. data/spec/fixtures/espn_draft.csv +473 -1
  72. data/spec/fixtures/espn_draft/data.csv +473 -0
  73. data/spec/fixtures/espn_draft/datapackage.json +43 -0
  74. data/spec/logger_spec.rb +79 -0
  75. data/spec/spec_helper.rb +6 -1
  76. metadata +156 -19
  77. data/lib/mode/cli/setup.rb +0 -12
  78. data/lib/mode/commands/package.rb +0 -56
  79. data/lib/mode/commands/setup.rb +0 -36
  80. data/lib/mode/package_builder.rb +0 -57
  81. data/spec/commands/setup_spec.rb +0 -62
  82. data/spec/fixtures/MOCK_DATA.csv +0 -100001
  83. data/spec/fixtures/cb_clean_small.csv +0 -100000
  84. data/spec/fixtures/duplicate_keys.csv +0 -3
  85. data/spec/fixtures/format_examples.csv.txt +0 -6
  86. data/spec/fixtures/format_examples_after_excel.csv.txt +0 -1
@@ -0,0 +1,23 @@
1
+ module Mode
2
+ module Auth
3
+ class AccessToken
4
+ attr_reader :resource
5
+
6
+ def initialize(resource)
7
+ @resource = resource
8
+ end
9
+
10
+ def name
11
+ resource.name
12
+ end
13
+
14
+ def token
15
+ resource.token
16
+ end
17
+
18
+ def account_name
19
+ resource.account_name
20
+ end
21
+ end
22
+ end
23
+ end
@@ -1,7 +1,7 @@
1
1
  require 'mode/cli/helpers'
2
2
 
3
3
  require 'mode/cli/base'
4
- require 'mode/cli/setup'
5
- require 'mode/cli/import'
4
+ require 'mode/cli/login'
6
5
  require 'mode/cli/analyze'
7
- require 'mode/cli/package'
6
+ require 'mode/cli/import'
7
+ require 'mode/cli/connect'
@@ -1,7 +1,7 @@
1
1
  module Mode
2
2
  module CLI
3
3
  class Base < Thor
4
- desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyzes a dataset or a field "
4
+ desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyze a CSV file"
5
5
  option :sample, :banner => 'RATE', :desc => "Proportion of rows to inspect. Example: 0.5"
6
6
  option :field, :banner => 'POSITION', :desc => "Field positions begin at 0. Example: 10"
7
7
  option :match_type, :banner => 'TYPE', :desc => "One of the following: string, number, integer, datetime, boolean"
@@ -7,6 +7,11 @@ module Mode
7
7
  # And then I'd have an explicit list of the commands we were including
8
8
  #
9
9
 
10
+ desc "version", "Print the version of the mode installed mode CLI"
11
+ def version
12
+ say "Mode CLI Version #{Mode::VERSION}"
13
+ end
14
+
10
15
  private
11
16
 
12
17
  include Mode::CLI::Helpers
@@ -0,0 +1,18 @@
1
+ module Mode
2
+ module CLI
3
+ class Base < Thor
4
+ desc "connect start|stop|restart [-c CONCURRENCY]", "Connect external databases to Mode"
5
+ option :concurrency, :aliases => :c, :default => 4
6
+ option :host, :default => 'www.modeanalytics.com'
7
+ def connect(command)
8
+ if ['start', 'stop', 'restart'].include?(command)
9
+ Mode::Commands::Connect.new(command, options).execute
10
+ else
11
+ say "Error: valid commands for connect are start, stop and restart"
12
+ end
13
+ rescue => err
14
+ say "Error: #{err.message}"
15
+ end
16
+ end
17
+ end
18
+ end
@@ -8,15 +8,6 @@ module Mode
8
8
  def valid_table?(table)
9
9
  table =~ /[\w\d\_\-]+\/[\w\d\_\-]+/
10
10
  end
11
-
12
- def pkg_name(path)
13
- path.split('/').last.split('.').first
14
- end
15
-
16
- def sample_rate(path)
17
- file_size = File.size(path)
18
- DataKit::CSV::Analyzer.sample_rate(file_size)
19
- end
20
11
  end
21
12
  end
22
13
  end
@@ -1,28 +1,22 @@
1
1
  module Mode
2
2
  module CLI
3
3
  class Base < Thor
4
- desc "import SOURCE ACCOUNT/TABLENAME [--update | --replace]", "Import a flat file into the Mode data warehouse"
4
+ desc "import SOURCE ACCOUNT/TABLENAME [--replace]", "Import a flat file into the Mode data warehouse", :hide => true
5
5
  long_desc <<-LONGDESC
6
- The import commands allows you to create, update and
7
- replace tables in the Mode data warehouse with data from
8
- flat files and data packages. The default action is create
9
- with optional flags to update or replace a table.
6
+ The import commands allows you to create or replace tables
7
+ in the Mode data warehouse with data from flat files and
8
+ data packages. The default action is create with an optional
9
+ flag to replace the table.
10
10
 
11
- Data can be imported from one of three source types
11
+ Data can be imported from CSV files or data packages with the following command
12
12
 
13
13
  1. CSV File
14
14
  \x5> $ mode import gdp_quarterly.csv besquared/gdp_quarterly
15
15
 
16
- 2. JSON File (LD-JSON)
17
- \x5> $ mode import gdp_quarterly.json besquared/gdp_quarterly
16
+ 2. Data Package File
17
+ \x5> $ mode import gdp/data/quarterly.json besqaured/quarterly_gdp
18
18
 
19
- 3. Data Package
20
- \x5> $ mode import gdp_us/quarterly besquared/gdp_quarterly
21
-
22
-
23
- Note: If you do not specify a data package resource name we'll attempt to use the first resource in the package.
24
19
  LONGDESC
25
- option :update, :type => :boolean
26
20
  option :replace, :type => :boolean
27
21
  option :primary_key, :banner => 'pos1[,pos2,...] (ex: 0,2)'
28
22
  def import(source, table)
@@ -31,31 +25,8 @@ module Mode
31
25
  return
32
26
  end
33
27
 
34
- if File.directory?(source)
35
- unless Mode::Package::Base.exist?(source)
36
- error "Error: Invalid package given"
37
- return
38
- end
39
-
40
- package, resource_name = *source.split('/')
41
- package = Mode::Package::Base.open(source)
42
- else
43
- unless valid_file?(source)
44
- error "Error: Invalid source file given"
45
- return
46
- end
47
-
48
- dst_path = Dir.mktmpdir
49
- src_data = Mode::CSV::Parser.new(source)
50
- builder = Mode::Package::Builder.new(src_data, dst_path, pkg_name(source), sample_rate(source))
51
-
52
- package = builder.execute # make the package
53
- end
54
-
55
28
  account, table_name = *table.split('/')
56
- resource_name = package.resources.first.name
57
-
58
- Mode::Commands::Import.new(account, table_name, package, resource_name).execute
29
+ Mode::Commands::Import.new(source, account, table_name).execute
59
30
  end
60
31
  end
61
32
  end
@@ -0,0 +1,13 @@
1
+ module Mode
2
+ module CLI
3
+ class Base < Thor
4
+ desc "login", "Setup a new mode configuration in the given directory (defaults to home)"
5
+ option :host, :default => 'www.modeanalytics.com'
6
+ option :staging, :type => :boolean
7
+ option :development, :type => :boolean
8
+ def login
9
+ Mode::Commands::Login.new(options).execute
10
+ end
11
+ end
12
+ end
13
+ end
@@ -4,11 +4,8 @@ module Mode
4
4
  desc "package SOURCE PACKAGEPATH [--name=NAME] [--resource-name=NAME]", "Creates a new data package from a csv file"
5
5
  option :name, :desc => 'The name of the package'
6
6
  option :resource_name, :default => 'data', :desc => 'The name of the resource in package'
7
- def package(source, dest)
8
- parts = dest.split(File::Separator)
9
-
10
- name = options[:name] || parts.last
11
- package_path = parts.length == 1 ? parts.first : parts[0, parts.length - 1]
7
+ def package(source, package_path)
8
+ name = options[:name] || package_path.split(File::Separator).last
12
9
  Mode::Commands::Package.new(source, package_path, name, options[:resource_name]).execute
13
10
  end
14
11
  end
@@ -2,7 +2,9 @@ require 'terminal-table'
2
2
 
3
3
  module Mode
4
4
  module Commands
5
- class AnalyzeField < Thor
5
+ class AnalyzeField
6
+ include Mode::Commands::Helpers
7
+
6
8
  attr_accessor :path
7
9
  attr_accessor :field_pos
8
10
  attr_accessor :options
@@ -13,34 +15,31 @@ module Mode
13
15
  @options = options
14
16
  end
15
17
 
16
- no_commands do
17
- include Mode::Commands::Helpers
18
-
19
- def execute
20
- if path.nil? || !File.exist?(path)
21
- error "Couldn't find file at #{path}"
22
- return
23
- end
18
+ def execute
19
+ if path.nil? || !File.exist?(path)
20
+ puts "Error: Couldn't find file at #{path}"
21
+ return
22
+ end
24
23
 
25
- csv = DataKit::CSV::Parser.new(path)
24
+ csv = DataKit::CSV::Parser.new(path)
26
25
 
27
- field_name = csv.headers[field_pos]
26
+ field_name = csv.headers[field_pos]
28
27
 
29
- say "Analyzing #{field_name} at #{path || 'input'}"
28
+ puts "Analyzing #{field_name} at #{path || 'input'}"
30
29
 
31
- analysis, total_time = timer_block do
32
- DataKit::CSV::FieldAnalyzer.analyze(csv, field_pos, {
33
- :match_type => match_type, :sampling_rate => 1
34
- })
35
- end
30
+ analysis, total_time = timer_block do
31
+ DataKit::CSV::FieldAnalyzer.analyze(csv, field_pos, {
32
+ :match_type => match_type, :sampling_rate => 1
33
+ })
34
+ end
36
35
 
37
- say "Analyzed #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
36
+ puts "Analyzed #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
38
37
 
39
- display(analysis)
40
- end
38
+ display(analysis)
41
39
  end
42
40
 
43
41
  private
42
+
44
43
  def display(analysis)
45
44
  table = Terminal::Table.new(:headings => [
46
45
  'Row No.', 'Type', 'Value'
@@ -52,7 +51,7 @@ module Mode
52
51
  end
53
52
  end
54
53
 
55
- say table
54
+ puts table
56
55
  end
57
56
 
58
57
  def match_type
@@ -2,7 +2,9 @@ require 'terminal-table'
2
2
 
3
3
  module Mode
4
4
  module Commands
5
- class AnalyzeSchema < Thor
5
+ class AnalyzeSchema
6
+ include Mode::Commands::Helpers
7
+
6
8
  attr_accessor :path
7
9
  attr_accessor :options
8
10
 
@@ -11,79 +13,98 @@ module Mode
11
13
  @options = options
12
14
  end
13
15
 
14
- no_commands do
15
- include Mode::Commands::Helpers
16
-
17
- def execute
18
- if path.nil? || !File.exist?(path)
19
- error "Couldn't find file at #{path}"
20
- return
21
- end
22
-
23
- csv = DataKit::CSV::Parser.new(path)
16
+ def execute
17
+ if path.nil? || !File.exist?(path)
18
+ puts "Error: Couldn't find file at #{path}"
19
+ return
20
+ end
24
21
 
25
- say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
22
+ csv = build_csv
23
+ analysis = build_analysis(csv)
24
+ display_analysis(analysis)
25
+ end
26
26
 
27
- analysis, total_time = timer_block do
28
- DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
29
- end
27
+ private
30
28
 
31
- puts analysis.use_type_hints
29
+ def build_csv
30
+ DataKit::CSV::Parser.new(path)
31
+ end
32
32
 
33
- say "Analyzed #{analysis.sample_count} of #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
33
+ def build_analysis(csv)
34
+ puts "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
34
35
 
35
- display(analysis)
36
+ analysis, total_time = timer_block do
37
+ DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
36
38
  end
37
- end
38
39
 
39
- private
40
+ puts "Analyzed #{analysis.sample_count} of #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
40
41
 
41
- def format_percentage_cell(numerator, denominator)
42
- cell = { :alignment => :right }
42
+ analysis
43
+ end
43
44
 
44
- if numerator == 0
45
- cell[:value] = nil
46
- else
47
- cell[:value] = '%.2f' % (100 * (numerator / denominator.to_f)) + '%'
48
- end
45
+ def sampling_rate
46
+ file_size = File.size(path)
47
+ (options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
48
+ end
49
49
 
50
- cell
50
+ def display_analysis(analysis)
51
+ table = build_table
52
+ populate_table(analysis, table)
53
+ puts table
51
54
  end
52
55
 
53
- def display(analysis)
54
- table = Terminal::Table.new(:headings => [
56
+ def build_table
57
+ Terminal::Table.new(:headings => [
55
58
  'Field No.', 'Field', 'Type',
56
59
  'String (%)', 'Integer (%)', 'Number (%)',
57
60
  'Date/Time (%)', 'Boolean (%)', 'Empty (%)'
58
61
  ])
62
+ end
59
63
 
64
+ def populate_table(analysis, table)
60
65
  analysis.fields.each_with_index do |field_name, index|
61
- row = [index, field_name]
66
+ build_table_row(analysis, table, field_name, index)
67
+ end
68
+ end
62
69
 
63
- field_type = analysis.type?(field_name)
70
+ def build_table_row(analysis, table, field_name, index)
71
+ row = [index, field_name]
64
72
 
65
- if analysis.has_single_type?(field_name)
66
- row << field_type
67
- elsif analysis.has_only_numeric_types?(field_name)
68
- row << field_type
69
- else
70
- row << '** ' + field_type.to_s
71
- end
73
+ append_row_type(analysis, field_name, row)
74
+ append_row_type_counts(analysis, field_name, row)
72
75
 
73
- DataKit::Dataset::Field::Types.each do |type|
74
- type_count = analysis.type_count(field_name, type)
75
- row << format_percentage_cell(type_count, analysis.sample_count)
76
- end
76
+ table.add_row(row)
77
+ end
78
+
79
+ def append_row_type(analysis, field_name, row)
80
+ field_type = analysis.type?(field_name)
77
81
 
78
- table.add_row(row)
82
+ if analysis.has_single_type?(field_name)
83
+ row << field_type
84
+ elsif analysis.has_only_numeric_types?(field_name)
85
+ row << field_type
86
+ else
87
+ row << '** ' + field_type.to_s
79
88
  end
89
+ end
80
90
 
81
- say table
91
+ def append_row_type_counts(analysis, field_name, row)
92
+ DataKit::Dataset::Field::Types.each do |type|
93
+ type_count = analysis.type_count(field_name, type)
94
+ row << format_percentage_cell(type_count, analysis.sample_count)
95
+ end
82
96
  end
83
97
 
84
- def sampling_rate
85
- file_size = File.size(path)
86
- (options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
98
+ def format_percentage_cell(numerator, denominator)
99
+ cell = { :alignment => :right }
100
+
101
+ if numerator == 0
102
+ cell[:value] = nil
103
+ else
104
+ cell[:value] = '%.2f' % (100 * (numerator / denominator.to_f)) + '%'
105
+ end
106
+
107
+ cell
87
108
  end
88
109
  end
89
110
  end
@@ -0,0 +1,78 @@
1
+ module Mode
2
+ module Commands
3
+ class Connect
4
+
5
+ include Mode::Commands::Helpers
6
+
7
+ attr_reader :command
8
+ attr_reader :concurrency
9
+ attr_reader :configuration
10
+
11
+ def initialize(command, options = {})
12
+ @command = command
13
+ @concurrency = options[:concurrency] || 4
14
+
15
+ validate!
16
+
17
+ @configuration = Mode::Connector::Config.new(config_dir)
18
+ end
19
+
20
+ def execute
21
+ register! if ['start', 'restart'].include?(command)
22
+ spawn!
23
+ end
24
+
25
+ private
26
+
27
+ def validate!
28
+ require_config!
29
+ require_credentials!
30
+ require_connect_config!
31
+ configure_api_requests!
32
+ end
33
+
34
+ def register!
35
+ Mode::Connector::Registrar.new(configuration).perform!
36
+ end
37
+
38
+ def spawn!
39
+ if command == 'restart'
40
+ spawn_restart!
41
+ elsif ['stop', 'start'].include?(command)
42
+ spawn_command!(command)
43
+ else
44
+ raise "Unknown command #{command}"
45
+ end
46
+ end
47
+
48
+ def spawn_restart!
49
+ begin
50
+ spawn_command!('stop')
51
+ ensure
52
+ spawn_command!('start')
53
+ end
54
+ end
55
+
56
+ def spawn_command!(command)
57
+ Mode::Connector::Daemon.spawn!(spawn_opts, spawn_args(command))
58
+ end
59
+
60
+ def spawn_opts
61
+ {
62
+ :sync_log => true,
63
+ :working_dir => working_dir,
64
+ :log_file => File.join(working_dir, 'connect.log'),
65
+ :pid_file => File.join(working_dir, 'connect.pid')
66
+ }
67
+ end
68
+
69
+ def spawn_args(command)
70
+ [command, concurrency, configuration.data_sources]
71
+ end
72
+
73
+ def working_dir
74
+ @working_dir ||= File.expand_path('~/.mode')
75
+ end
76
+ end
77
+ end
78
+ end