mode 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/README.md +17 -22
  4. data/bin/mode +1 -1
  5. data/lib/mode.rb +34 -6
  6. data/lib/mode/api/form.rb +53 -0
  7. data/lib/mode/api/link.rb +31 -0
  8. data/lib/mode/api/request.rb +181 -0
  9. data/lib/mode/api/resource.rb +67 -0
  10. data/lib/mode/auth/access_token.rb +23 -0
  11. data/lib/mode/cli.rb +3 -3
  12. data/lib/mode/cli/analyze.rb +1 -1
  13. data/lib/mode/cli/base.rb +5 -0
  14. data/lib/mode/cli/connect.rb +18 -0
  15. data/lib/mode/cli/helpers.rb +0 -9
  16. data/lib/mode/cli/import.rb +9 -38
  17. data/lib/mode/cli/login.rb +13 -0
  18. data/lib/mode/cli/package.rb +2 -5
  19. data/lib/mode/commands/analyze_field.rb +20 -21
  20. data/lib/mode/commands/analyze_schema.rb +69 -48
  21. data/lib/mode/commands/connect.rb +78 -0
  22. data/lib/mode/commands/helpers.rb +54 -0
  23. data/lib/mode/commands/import.rb +209 -20
  24. data/lib/mode/commands/login.rb +111 -0
  25. data/lib/mode/config.rb +13 -33
  26. data/lib/mode/configurable.rb +46 -0
  27. data/lib/mode/connector/config.rb +31 -0
  28. data/lib/mode/connector/daemon.rb +27 -0
  29. data/lib/mode/connector/data_source.rb +75 -0
  30. data/lib/mode/connector/dataset.rb +13 -0
  31. data/lib/mode/connector/message.rb +31 -0
  32. data/lib/mode/connector/poller.rb +27 -0
  33. data/lib/mode/connector/processor.rb +58 -0
  34. data/lib/mode/connector/registrar.rb +36 -0
  35. data/lib/mode/connector/scheduler.rb +62 -0
  36. data/lib/mode/connector/selector.rb +47 -0
  37. data/lib/mode/connector/type_map.rb +45 -0
  38. data/lib/mode/connector/uploader.rb +50 -0
  39. data/lib/mode/logger.rb +202 -0
  40. data/lib/mode/version.rb +1 -1
  41. data/mode.gemspec +13 -2
  42. data/spec/api/form_spec.rb +51 -0
  43. data/spec/api/link_spec.rb +23 -0
  44. data/spec/api/request_spec.rb +111 -0
  45. data/spec/api/resource_spec.rb +70 -0
  46. data/spec/auth/access_token_spec.rb +22 -0
  47. data/spec/commands/analyze_field_spec.rb +26 -0
  48. data/spec/commands/analyze_schema_spec.rb +7 -5
  49. data/spec/commands/connect_spec.rb +80 -0
  50. data/spec/commands/helpers_spec.rb +69 -0
  51. data/spec/commands/import_spec.rb +155 -0
  52. data/spec/commands/login_spec.rb +178 -0
  53. data/spec/config_spec.rb +9 -7
  54. data/spec/connector/config_spec.rb +46 -0
  55. data/spec/connector/daemon_spec.rb +30 -0
  56. data/spec/connector/data_source_spec.rb +73 -0
  57. data/spec/connector/message_spec.rb +22 -0
  58. data/spec/connector/poller_spec.rb +26 -0
  59. data/spec/connector/processor_spec.rb +93 -0
  60. data/spec/connector/registrar_spec.rb +53 -0
  61. data/spec/connector/scheduler_spec.rb +93 -0
  62. data/spec/connector/selector_spec.rb +54 -0
  63. data/spec/connector/type_map_spec.rb +45 -0
  64. data/spec/connector/uploader_spec.rb +55 -0
  65. data/spec/fixtures/country-codes/README.md +71 -0
  66. data/spec/fixtures/country-codes/data/country-codes.csv +250 -0
  67. data/spec/fixtures/country-codes/datapackage.json +142 -0
  68. data/spec/fixtures/country-codes/scripts/get_countries_of_earth.py +370 -0
  69. data/spec/fixtures/country-codes/scripts/reorder_columns.py +8 -0
  70. data/spec/fixtures/country-codes/scripts/requirements.pip +2 -0
  71. data/spec/fixtures/espn_draft.csv +473 -1
  72. data/spec/fixtures/espn_draft/data.csv +473 -0
  73. data/spec/fixtures/espn_draft/datapackage.json +43 -0
  74. data/spec/logger_spec.rb +79 -0
  75. data/spec/spec_helper.rb +6 -1
  76. metadata +156 -19
  77. data/lib/mode/cli/setup.rb +0 -12
  78. data/lib/mode/commands/package.rb +0 -56
  79. data/lib/mode/commands/setup.rb +0 -36
  80. data/lib/mode/package_builder.rb +0 -57
  81. data/spec/commands/setup_spec.rb +0 -62
  82. data/spec/fixtures/MOCK_DATA.csv +0 -100001
  83. data/spec/fixtures/cb_clean_small.csv +0 -100000
  84. data/spec/fixtures/duplicate_keys.csv +0 -3
  85. data/spec/fixtures/format_examples.csv.txt +0 -6
  86. data/spec/fixtures/format_examples_after_excel.csv.txt +0 -1
@@ -0,0 +1,23 @@
1
+ module Mode
2
+ module Auth
3
+ class AccessToken
4
+ attr_reader :resource
5
+
6
+ def initialize(resource)
7
+ @resource = resource
8
+ end
9
+
10
+ def name
11
+ resource.name
12
+ end
13
+
14
+ def token
15
+ resource.token
16
+ end
17
+
18
+ def account_name
19
+ resource.account_name
20
+ end
21
+ end
22
+ end
23
+ end
@@ -1,7 +1,7 @@
1
1
  require 'mode/cli/helpers'
2
2
 
3
3
  require 'mode/cli/base'
4
- require 'mode/cli/setup'
5
- require 'mode/cli/import'
4
+ require 'mode/cli/login'
6
5
  require 'mode/cli/analyze'
7
- require 'mode/cli/package'
6
+ require 'mode/cli/import'
7
+ require 'mode/cli/connect'
@@ -1,7 +1,7 @@
1
1
  module Mode
2
2
  module CLI
3
3
  class Base < Thor
4
- desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyzes a dataset or a field "
4
+ desc "analyze PATH [--field=POSITION] [--match-type=TYPE]", "Analyze a CSV file"
5
5
  option :sample, :banner => 'RATE', :desc => "Proportion of rows to inspect. Example: 0.5"
6
6
  option :field, :banner => 'POSITION', :desc => "Field positions begin at 0. Example: 10"
7
7
  option :match_type, :banner => 'TYPE', :desc => "One of the following: string, number, integer, datetime, boolean"
@@ -7,6 +7,11 @@ module Mode
7
7
  # And then I'd have an explicit list of the commands we were including
8
8
  #
9
9
 
10
+ desc "version", "Print the version of the mode installed mode CLI"
11
+ def version
12
+ say "Mode CLI Version #{Mode::VERSION}"
13
+ end
14
+
10
15
  private
11
16
 
12
17
  include Mode::CLI::Helpers
@@ -0,0 +1,18 @@
1
+ module Mode
2
+ module CLI
3
+ class Base < Thor
4
+ desc "connect start|stop|restart [-c CONCURRENCY]", "Connect external databases to Mode"
5
+ option :concurrency, :aliases => :c, :default => 4
6
+ option :host, :default => 'www.modeanalytics.com'
7
+ def connect(command)
8
+ if ['start', 'stop', 'restart'].include?(command)
9
+ Mode::Commands::Connect.new(command, options).execute
10
+ else
11
+ say "Error: valid commands for connect are start, stop and restart"
12
+ end
13
+ rescue => err
14
+ say "Error: #{err.message}"
15
+ end
16
+ end
17
+ end
18
+ end
@@ -8,15 +8,6 @@ module Mode
8
8
  def valid_table?(table)
9
9
  table =~ /[\w\d\_\-]+\/[\w\d\_\-]+/
10
10
  end
11
-
12
- def pkg_name(path)
13
- path.split('/').last.split('.').first
14
- end
15
-
16
- def sample_rate(path)
17
- file_size = File.size(path)
18
- DataKit::CSV::Analyzer.sample_rate(file_size)
19
- end
20
11
  end
21
12
  end
22
13
  end
@@ -1,28 +1,22 @@
1
1
  module Mode
2
2
  module CLI
3
3
  class Base < Thor
4
- desc "import SOURCE ACCOUNT/TABLENAME [--update | --replace]", "Import a flat file into the Mode data warehouse"
4
+ desc "import SOURCE ACCOUNT/TABLENAME [--replace]", "Import a flat file into the Mode data warehouse", :hide => true
5
5
  long_desc <<-LONGDESC
6
- The import commands allows you to create, update and
7
- replace tables in the Mode data warehouse with data from
8
- flat files and data packages. The default action is create
9
- with optional flags to update or replace a table.
6
+ The import commands allows you to create or replace tables
7
+ in the Mode data warehouse with data from flat files and
8
+ data packages. The default action is create with an optional
9
+ flag to replace the table.
10
10
 
11
- Data can be imported from one of three source types
11
+ Data can be imported from CSV files or data packages with the following command
12
12
 
13
13
  1. CSV File
14
14
  \x5> $ mode import gdp_quarterly.csv besquared/gdp_quarterly
15
15
 
16
- 2. JSON File (LD-JSON)
17
- \x5> $ mode import gdp_quarterly.json besquared/gdp_quarterly
16
+ 2. Data Package File
17
+ \x5> $ mode import gdp/data/quarterly.json besqaured/quarterly_gdp
18
18
 
19
- 3. Data Package
20
- \x5> $ mode import gdp_us/quarterly besquared/gdp_quarterly
21
-
22
-
23
- Note: If you do not specify a data package resource name we'll attempt to use the first resource in the package.
24
19
  LONGDESC
25
- option :update, :type => :boolean
26
20
  option :replace, :type => :boolean
27
21
  option :primary_key, :banner => 'pos1[,pos2,...] (ex: 0,2)'
28
22
  def import(source, table)
@@ -31,31 +25,8 @@ module Mode
31
25
  return
32
26
  end
33
27
 
34
- if File.directory?(source)
35
- unless Mode::Package::Base.exist?(source)
36
- error "Error: Invalid package given"
37
- return
38
- end
39
-
40
- package, resource_name = *source.split('/')
41
- package = Mode::Package::Base.open(source)
42
- else
43
- unless valid_file?(source)
44
- error "Error: Invalid source file given"
45
- return
46
- end
47
-
48
- dst_path = Dir.mktmpdir
49
- src_data = Mode::CSV::Parser.new(source)
50
- builder = Mode::Package::Builder.new(src_data, dst_path, pkg_name(source), sample_rate(source))
51
-
52
- package = builder.execute # make the package
53
- end
54
-
55
28
  account, table_name = *table.split('/')
56
- resource_name = package.resources.first.name
57
-
58
- Mode::Commands::Import.new(account, table_name, package, resource_name).execute
29
+ Mode::Commands::Import.new(source, account, table_name).execute
59
30
  end
60
31
  end
61
32
  end
@@ -0,0 +1,13 @@
1
+ module Mode
2
+ module CLI
3
+ class Base < Thor
4
+ desc "login", "Setup a new mode configuration in the given directory (defaults to home)"
5
+ option :host, :default => 'www.modeanalytics.com'
6
+ option :staging, :type => :boolean
7
+ option :development, :type => :boolean
8
+ def login
9
+ Mode::Commands::Login.new(options).execute
10
+ end
11
+ end
12
+ end
13
+ end
@@ -4,11 +4,8 @@ module Mode
4
4
  desc "package SOURCE PACKAGEPATH [--name=NAME] [--resource-name=NAME]", "Creates a new data package from a csv file"
5
5
  option :name, :desc => 'The name of the package'
6
6
  option :resource_name, :default => 'data', :desc => 'The name of the resource in package'
7
- def package(source, dest)
8
- parts = dest.split(File::Separator)
9
-
10
- name = options[:name] || parts.last
11
- package_path = parts.length == 1 ? parts.first : parts[0, parts.length - 1]
7
+ def package(source, package_path)
8
+ name = options[:name] || package_path.split(File::Separator).last
12
9
  Mode::Commands::Package.new(source, package_path, name, options[:resource_name]).execute
13
10
  end
14
11
  end
@@ -2,7 +2,9 @@ require 'terminal-table'
2
2
 
3
3
  module Mode
4
4
  module Commands
5
- class AnalyzeField < Thor
5
+ class AnalyzeField
6
+ include Mode::Commands::Helpers
7
+
6
8
  attr_accessor :path
7
9
  attr_accessor :field_pos
8
10
  attr_accessor :options
@@ -13,34 +15,31 @@ module Mode
13
15
  @options = options
14
16
  end
15
17
 
16
- no_commands do
17
- include Mode::Commands::Helpers
18
-
19
- def execute
20
- if path.nil? || !File.exist?(path)
21
- error "Couldn't find file at #{path}"
22
- return
23
- end
18
+ def execute
19
+ if path.nil? || !File.exist?(path)
20
+ puts "Error: Couldn't find file at #{path}"
21
+ return
22
+ end
24
23
 
25
- csv = DataKit::CSV::Parser.new(path)
24
+ csv = DataKit::CSV::Parser.new(path)
26
25
 
27
- field_name = csv.headers[field_pos]
26
+ field_name = csv.headers[field_pos]
28
27
 
29
- say "Analyzing #{field_name} at #{path || 'input'}"
28
+ puts "Analyzing #{field_name} at #{path || 'input'}"
30
29
 
31
- analysis, total_time = timer_block do
32
- DataKit::CSV::FieldAnalyzer.analyze(csv, field_pos, {
33
- :match_type => match_type, :sampling_rate => 1
34
- })
35
- end
30
+ analysis, total_time = timer_block do
31
+ DataKit::CSV::FieldAnalyzer.analyze(csv, field_pos, {
32
+ :match_type => match_type, :sampling_rate => 1
33
+ })
34
+ end
36
35
 
37
- say "Analyzed #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
36
+ puts "Analyzed #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
38
37
 
39
- display(analysis)
40
- end
38
+ display(analysis)
41
39
  end
42
40
 
43
41
  private
42
+
44
43
  def display(analysis)
45
44
  table = Terminal::Table.new(:headings => [
46
45
  'Row No.', 'Type', 'Value'
@@ -52,7 +51,7 @@ module Mode
52
51
  end
53
52
  end
54
53
 
55
- say table
54
+ puts table
56
55
  end
57
56
 
58
57
  def match_type
@@ -2,7 +2,9 @@ require 'terminal-table'
2
2
 
3
3
  module Mode
4
4
  module Commands
5
- class AnalyzeSchema < Thor
5
+ class AnalyzeSchema
6
+ include Mode::Commands::Helpers
7
+
6
8
  attr_accessor :path
7
9
  attr_accessor :options
8
10
 
@@ -11,79 +13,98 @@ module Mode
11
13
  @options = options
12
14
  end
13
15
 
14
- no_commands do
15
- include Mode::Commands::Helpers
16
-
17
- def execute
18
- if path.nil? || !File.exist?(path)
19
- error "Couldn't find file at #{path}"
20
- return
21
- end
22
-
23
- csv = DataKit::CSV::Parser.new(path)
16
+ def execute
17
+ if path.nil? || !File.exist?(path)
18
+ puts "Error: Couldn't find file at #{path}"
19
+ return
20
+ end
24
21
 
25
- say "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
22
+ csv = build_csv
23
+ analysis = build_analysis(csv)
24
+ display_analysis(analysis)
25
+ end
26
26
 
27
- analysis, total_time = timer_block do
28
- DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
29
- end
27
+ private
30
28
 
31
- puts analysis.use_type_hints
29
+ def build_csv
30
+ DataKit::CSV::Parser.new(path)
31
+ end
32
32
 
33
- say "Analyzed #{analysis.sample_count} of #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
33
+ def build_analysis(csv)
34
+ puts "Analyzing #{path || 'input'} (Sampling #{'%.2f' % (100 * sampling_rate)}%)..."
34
35
 
35
- display(analysis)
36
+ analysis, total_time = timer_block do
37
+ DataKit::CSV::SchemaAnalyzer.analyze(csv, :sampling_rate => sampling_rate)
36
38
  end
37
- end
38
39
 
39
- private
40
+ puts "Analyzed #{analysis.sample_count} of #{analysis.row_count} rows in #{'%.2f' % total_time} seconds\n"
40
41
 
41
- def format_percentage_cell(numerator, denominator)
42
- cell = { :alignment => :right }
42
+ analysis
43
+ end
43
44
 
44
- if numerator == 0
45
- cell[:value] = nil
46
- else
47
- cell[:value] = '%.2f' % (100 * (numerator / denominator.to_f)) + '%'
48
- end
45
+ def sampling_rate
46
+ file_size = File.size(path)
47
+ (options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
48
+ end
49
49
 
50
- cell
50
+ def display_analysis(analysis)
51
+ table = build_table
52
+ populate_table(analysis, table)
53
+ puts table
51
54
  end
52
55
 
53
- def display(analysis)
54
- table = Terminal::Table.new(:headings => [
56
+ def build_table
57
+ Terminal::Table.new(:headings => [
55
58
  'Field No.', 'Field', 'Type',
56
59
  'String (%)', 'Integer (%)', 'Number (%)',
57
60
  'Date/Time (%)', 'Boolean (%)', 'Empty (%)'
58
61
  ])
62
+ end
59
63
 
64
+ def populate_table(analysis, table)
60
65
  analysis.fields.each_with_index do |field_name, index|
61
- row = [index, field_name]
66
+ build_table_row(analysis, table, field_name, index)
67
+ end
68
+ end
62
69
 
63
- field_type = analysis.type?(field_name)
70
+ def build_table_row(analysis, table, field_name, index)
71
+ row = [index, field_name]
64
72
 
65
- if analysis.has_single_type?(field_name)
66
- row << field_type
67
- elsif analysis.has_only_numeric_types?(field_name)
68
- row << field_type
69
- else
70
- row << '** ' + field_type.to_s
71
- end
73
+ append_row_type(analysis, field_name, row)
74
+ append_row_type_counts(analysis, field_name, row)
72
75
 
73
- DataKit::Dataset::Field::Types.each do |type|
74
- type_count = analysis.type_count(field_name, type)
75
- row << format_percentage_cell(type_count, analysis.sample_count)
76
- end
76
+ table.add_row(row)
77
+ end
78
+
79
+ def append_row_type(analysis, field_name, row)
80
+ field_type = analysis.type?(field_name)
77
81
 
78
- table.add_row(row)
82
+ if analysis.has_single_type?(field_name)
83
+ row << field_type
84
+ elsif analysis.has_only_numeric_types?(field_name)
85
+ row << field_type
86
+ else
87
+ row << '** ' + field_type.to_s
79
88
  end
89
+ end
80
90
 
81
- say table
91
+ def append_row_type_counts(analysis, field_name, row)
92
+ DataKit::Dataset::Field::Types.each do |type|
93
+ type_count = analysis.type_count(field_name, type)
94
+ row << format_percentage_cell(type_count, analysis.sample_count)
95
+ end
82
96
  end
83
97
 
84
- def sampling_rate
85
- file_size = File.size(path)
86
- (options[:sample] || DataKit::CSV::SchemaAnalyzer.sampling_rate(file_size)).to_f
98
+ def format_percentage_cell(numerator, denominator)
99
+ cell = { :alignment => :right }
100
+
101
+ if numerator == 0
102
+ cell[:value] = nil
103
+ else
104
+ cell[:value] = '%.2f' % (100 * (numerator / denominator.to_f)) + '%'
105
+ end
106
+
107
+ cell
87
108
  end
88
109
  end
89
110
  end
@@ -0,0 +1,78 @@
1
+ module Mode
2
+ module Commands
3
+ class Connect
4
+
5
+ include Mode::Commands::Helpers
6
+
7
+ attr_reader :command
8
+ attr_reader :concurrency
9
+ attr_reader :configuration
10
+
11
+ def initialize(command, options = {})
12
+ @command = command
13
+ @concurrency = options[:concurrency] || 4
14
+
15
+ validate!
16
+
17
+ @configuration = Mode::Connector::Config.new(config_dir)
18
+ end
19
+
20
+ def execute
21
+ register! if ['start', 'restart'].include?(command)
22
+ spawn!
23
+ end
24
+
25
+ private
26
+
27
+ def validate!
28
+ require_config!
29
+ require_credentials!
30
+ require_connect_config!
31
+ configure_api_requests!
32
+ end
33
+
34
+ def register!
35
+ Mode::Connector::Registrar.new(configuration).perform!
36
+ end
37
+
38
+ def spawn!
39
+ if command == 'restart'
40
+ spawn_restart!
41
+ elsif ['stop', 'start'].include?(command)
42
+ spawn_command!(command)
43
+ else
44
+ raise "Unknown command #{command}"
45
+ end
46
+ end
47
+
48
+ def spawn_restart!
49
+ begin
50
+ spawn_command!('stop')
51
+ ensure
52
+ spawn_command!('start')
53
+ end
54
+ end
55
+
56
+ def spawn_command!(command)
57
+ Mode::Connector::Daemon.spawn!(spawn_opts, spawn_args(command))
58
+ end
59
+
60
+ def spawn_opts
61
+ {
62
+ :sync_log => true,
63
+ :working_dir => working_dir,
64
+ :log_file => File.join(working_dir, 'connect.log'),
65
+ :pid_file => File.join(working_dir, 'connect.pid')
66
+ }
67
+ end
68
+
69
+ def spawn_args(command)
70
+ [command, concurrency, configuration.data_sources]
71
+ end
72
+
73
+ def working_dir
74
+ @working_dir ||= File.expand_path('~/.mode')
75
+ end
76
+ end
77
+ end
78
+ end