mode 0.0.5 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/README.md +17 -22
  4. data/bin/mode +1 -1
  5. data/lib/mode.rb +34 -6
  6. data/lib/mode/api/form.rb +53 -0
  7. data/lib/mode/api/link.rb +31 -0
  8. data/lib/mode/api/request.rb +181 -0
  9. data/lib/mode/api/resource.rb +67 -0
  10. data/lib/mode/auth/access_token.rb +23 -0
  11. data/lib/mode/cli.rb +3 -3
  12. data/lib/mode/cli/analyze.rb +1 -1
  13. data/lib/mode/cli/base.rb +5 -0
  14. data/lib/mode/cli/connect.rb +18 -0
  15. data/lib/mode/cli/helpers.rb +0 -9
  16. data/lib/mode/cli/import.rb +9 -38
  17. data/lib/mode/cli/login.rb +13 -0
  18. data/lib/mode/cli/package.rb +2 -5
  19. data/lib/mode/commands/analyze_field.rb +20 -21
  20. data/lib/mode/commands/analyze_schema.rb +69 -48
  21. data/lib/mode/commands/connect.rb +78 -0
  22. data/lib/mode/commands/helpers.rb +54 -0
  23. data/lib/mode/commands/import.rb +209 -20
  24. data/lib/mode/commands/login.rb +111 -0
  25. data/lib/mode/config.rb +13 -33
  26. data/lib/mode/configurable.rb +46 -0
  27. data/lib/mode/connector/config.rb +31 -0
  28. data/lib/mode/connector/daemon.rb +27 -0
  29. data/lib/mode/connector/data_source.rb +75 -0
  30. data/lib/mode/connector/dataset.rb +13 -0
  31. data/lib/mode/connector/message.rb +31 -0
  32. data/lib/mode/connector/poller.rb +27 -0
  33. data/lib/mode/connector/processor.rb +58 -0
  34. data/lib/mode/connector/registrar.rb +36 -0
  35. data/lib/mode/connector/scheduler.rb +62 -0
  36. data/lib/mode/connector/selector.rb +47 -0
  37. data/lib/mode/connector/type_map.rb +45 -0
  38. data/lib/mode/connector/uploader.rb +50 -0
  39. data/lib/mode/logger.rb +202 -0
  40. data/lib/mode/version.rb +1 -1
  41. data/mode.gemspec +13 -2
  42. data/spec/api/form_spec.rb +51 -0
  43. data/spec/api/link_spec.rb +23 -0
  44. data/spec/api/request_spec.rb +111 -0
  45. data/spec/api/resource_spec.rb +70 -0
  46. data/spec/auth/access_token_spec.rb +22 -0
  47. data/spec/commands/analyze_field_spec.rb +26 -0
  48. data/spec/commands/analyze_schema_spec.rb +7 -5
  49. data/spec/commands/connect_spec.rb +80 -0
  50. data/spec/commands/helpers_spec.rb +69 -0
  51. data/spec/commands/import_spec.rb +155 -0
  52. data/spec/commands/login_spec.rb +178 -0
  53. data/spec/config_spec.rb +9 -7
  54. data/spec/connector/config_spec.rb +46 -0
  55. data/spec/connector/daemon_spec.rb +30 -0
  56. data/spec/connector/data_source_spec.rb +73 -0
  57. data/spec/connector/message_spec.rb +22 -0
  58. data/spec/connector/poller_spec.rb +26 -0
  59. data/spec/connector/processor_spec.rb +93 -0
  60. data/spec/connector/registrar_spec.rb +53 -0
  61. data/spec/connector/scheduler_spec.rb +93 -0
  62. data/spec/connector/selector_spec.rb +54 -0
  63. data/spec/connector/type_map_spec.rb +45 -0
  64. data/spec/connector/uploader_spec.rb +55 -0
  65. data/spec/fixtures/country-codes/README.md +71 -0
  66. data/spec/fixtures/country-codes/data/country-codes.csv +250 -0
  67. data/spec/fixtures/country-codes/datapackage.json +142 -0
  68. data/spec/fixtures/country-codes/scripts/get_countries_of_earth.py +370 -0
  69. data/spec/fixtures/country-codes/scripts/reorder_columns.py +8 -0
  70. data/spec/fixtures/country-codes/scripts/requirements.pip +2 -0
  71. data/spec/fixtures/espn_draft.csv +473 -1
  72. data/spec/fixtures/espn_draft/data.csv +473 -0
  73. data/spec/fixtures/espn_draft/datapackage.json +43 -0
  74. data/spec/logger_spec.rb +79 -0
  75. data/spec/spec_helper.rb +6 -1
  76. metadata +156 -19
  77. data/lib/mode/cli/setup.rb +0 -12
  78. data/lib/mode/commands/package.rb +0 -56
  79. data/lib/mode/commands/setup.rb +0 -36
  80. data/lib/mode/package_builder.rb +0 -57
  81. data/spec/commands/setup_spec.rb +0 -62
  82. data/spec/fixtures/MOCK_DATA.csv +0 -100001
  83. data/spec/fixtures/cb_clean_small.csv +0 -100000
  84. data/spec/fixtures/duplicate_keys.csv +0 -3
  85. data/spec/fixtures/format_examples.csv.txt +0 -6
  86. data/spec/fixtures/format_examples_after_excel.csv.txt +0 -1
@@ -0,0 +1,46 @@
1
+ module Mode
2
+ module Configurable
3
+ def self.included(klass)
4
+ klass.send :attr_reader, :path
5
+ klass.send :extend, ClassMethods
6
+ end
7
+
8
+ module ClassMethods
9
+ def exists?(path)
10
+ File.exist?(full_path(path))
11
+ end
12
+
13
+ def init(path, filename = nil)
14
+ File.open(full_path(path, filename), 'w+') do |file|
15
+ file.write({}.to_yaml)
16
+ end
17
+
18
+ new(path, filename)
19
+ end
20
+
21
+ def default_dir
22
+ File.expand_path("~/.mode")
23
+ end
24
+
25
+ def full_path(path, filename = nil)
26
+ File.expand_path(File.join(path, filename || default_filename))
27
+ end
28
+ end
29
+
30
+ def initialize(path, filename = nil)
31
+ @path = self.class.full_path(path, filename)
32
+
33
+ if File.exist?(@path)
34
+ configure YAML.load_file(@path)
35
+ else
36
+ raise "Could not load configuration file from #{@path}"
37
+ end
38
+ end
39
+
40
+ def save
41
+ File.open(path, 'w+') do |file|
42
+ file.write(to_yaml)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,31 @@
1
+ require 'yaml'
2
+
3
+ module Mode
4
+ module Connector
5
+ class Config < Mode::Config
6
+ include Mode::Configurable
7
+
8
+ # Config Variables
9
+ attr_accessor :data_sources
10
+
11
+ class << self
12
+ def default_filename
13
+ 'connect.yml'
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ def configure(config = {})
20
+ @data_sources ||= []
21
+ config.each do |name, props|
22
+ data_sources << Mode::Connector::DataSource.new(name, props)
23
+ end
24
+ end
25
+
26
+ def to_yaml
27
+ data_sources.to_yaml
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,27 @@
1
+ require 'daemon_spawn'
2
+
3
+ module Mode
4
+ module Connector
5
+ class Daemon < DaemonSpawn::Base
6
+ attr_reader :scheduler
7
+
8
+ def start(args)
9
+ max_jobs = args.shift
10
+ data_sources = args.shift
11
+
12
+ @scheduler = Mode::Connector::Scheduler.new(
13
+ data_sources, :max_jobs => max_jobs
14
+ )
15
+
16
+ @scheduler.start!
17
+ rescue => err
18
+ Mode::Logger.instance.error(
19
+ "Connector::Daemon", err.message, err.backtrace)
20
+ end
21
+
22
+ def stop
23
+ scheduler.stop!
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,75 @@
1
+ require 'sequel'
2
+
3
+ module Mode
4
+ module Connector
5
+ class DataSource
6
+
7
+ attr_reader :name
8
+ attr_reader :props
9
+
10
+ attr_reader :connection
11
+
12
+ def initialize(name, props = {})
13
+ @name = name
14
+ @props = props
15
+ end
16
+
17
+ def adapter
18
+ props['adapter']
19
+ end
20
+
21
+ def username
22
+ props['username']
23
+ end
24
+
25
+ def password
26
+ props['password']
27
+ end
28
+
29
+ def host
30
+ props['host']
31
+ end
32
+
33
+ def database
34
+ props['database']
35
+ end
36
+
37
+ def select(query, &block)
38
+ connection_dataset(query).each(&block)
39
+ end
40
+
41
+ def connection
42
+ if redshift?
43
+ @connection ||= Sequel.connect(connection_url,
44
+ :client_min_messages => '',
45
+ :force_standard_strings => false
46
+ )
47
+ else
48
+ @connection ||= Sequel.connect(connection_url)
49
+ end
50
+ end
51
+
52
+ def redshift?
53
+ adapter == 'redshift'
54
+ end
55
+
56
+ private
57
+
58
+ def connection_url
59
+ password_segment = password.nil? ? nil : ":#{password}"
60
+ "#{adapter}://#{username}#{password_segment}@#{host}/#{database}"
61
+ end
62
+
63
+ def connection_dataset(query)
64
+ log_connection_query(query)
65
+ connection.dataset.with_sql(query)
66
+ # figure out how to make cursors work here
67
+ end
68
+
69
+ def log_connection_query(query)
70
+ Mode::Logger.instance.debug(
71
+ "Connect::DataSource", "QUERY", query.split("\n"))
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,13 @@
1
+ module Mode
2
+ module Connector
3
+ class Dataset
4
+ attr_reader :path
5
+ attr_reader :column_types
6
+
7
+ def initialize(path, column_types)
8
+ @path = path
9
+ @column_types = column_types
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,31 @@
1
+ module Mode
2
+ module Connector
3
+ class Message
4
+ attr_reader :resource
5
+
6
+ def initialize(resource)
7
+ @resource = resource
8
+ end
9
+
10
+ def type
11
+ resource.content['type']
12
+ end
13
+
14
+ def name
15
+ resource.content['name']
16
+ end
17
+
18
+ def query
19
+ resource.content['query']
20
+ end
21
+
22
+ def data_source
23
+ resource.embedded('data_source')
24
+ end
25
+
26
+ def execution_path
27
+ resource.links('execution').href
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,27 @@
1
+ module Mode
2
+ module Connector
3
+ class Poller
4
+ def perform!(&block)
5
+ messages = Mode::API::Request.get(path)
6
+
7
+ if messages.is_a?(Mode::API::Resource)
8
+ messages.embedded('messages').each do |message|
9
+ log_message(message)
10
+ yield Mode::Connector::Message.new(message)
11
+ end if messages.has_embedded?('messages')
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ def path
18
+ Mode::API::Request.data_source_connection_messages_path
19
+ end
20
+
21
+ def log_message(message)
22
+ Mode::Logger.instance.info(
23
+ "Connector::Poller", message)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,58 @@
1
+ require 'date'
2
+ require 'time'
3
+ require 'tmpdir'
4
+
5
+ module Mode
6
+ module Connector
7
+ class Processor
8
+ attr_reader :message
9
+ attr_reader :data_sources
10
+
11
+ def initialize(message, data_sources)
12
+ @message = message
13
+ @data_sources = data_sources
14
+ end
15
+
16
+ def perform!
17
+ if data_source
18
+ send!(select!)
19
+ else
20
+ raise "Data source not found #{message.data_source.name}"
21
+ end
22
+ rescue => err
23
+ error!(err.message, err.backtrace.join("\n"))
24
+
25
+ Mode::Logger.instance.error(
26
+ "Connector::Processor", err.message, err.backtrace)
27
+ end
28
+
29
+ private
30
+
31
+ def select!
32
+ Mode::Connector::Selector.new(query, data_source).perform!
33
+ end
34
+
35
+ def send!(dataset)
36
+ Mode::Connector::Uploader.new(upload_path, dataset).perform!
37
+ end
38
+
39
+ def error!(message, detail = nil)
40
+ Mode::Connector::Uploader.error!(upload_path, message, detail)
41
+ end
42
+
43
+ def query
44
+ message.query
45
+ end
46
+
47
+ def upload_path
48
+ message.execution_path
49
+ end
50
+
51
+ def data_source
52
+ data_sources.find do |ds|
53
+ ds.name == message.data_source.name
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,36 @@
1
+ module Mode
2
+ module Connector
3
+ class Registrar
4
+ attr_reader :config
5
+
6
+ def initialize(config)
7
+ @config = config
8
+ end
9
+
10
+ def perform!
11
+ Mode::API::Request.put(path,
12
+ :data_source_connection => {
13
+ :data_sources => data_sources
14
+ }
15
+ )
16
+ end
17
+
18
+ private
19
+
20
+ def data_sources
21
+ [].tap do |data_sources|
22
+ config.data_sources.each do |source|
23
+ data_sources << {
24
+ :name => source.name,
25
+ :adapter => source.adapter
26
+ }
27
+ end
28
+ end
29
+ end
30
+
31
+ def path
32
+ Mode::API::Request.data_source_connection_path
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,62 @@
1
+ require 'rufus-scheduler'
2
+
3
+ module Mode
4
+ module Connector
5
+ class Scheduler
6
+ attr_reader :max_jobs
7
+ attr_reader :scheduler
8
+ attr_reader :data_sources
9
+
10
+ def initialize(data_sources, options = {})
11
+ @data_sources = data_sources
12
+ @scheduler = Rufus::Scheduler.new
13
+ @max_jobs = options[:max_jobs] || 4
14
+ end
15
+
16
+ def start!
17
+ data_sources.each(&:connection)
18
+ # Make sure this stays outside the scheduler
19
+
20
+ scheduler.every('5s') { tick }
21
+ scheduler.join
22
+ end
23
+
24
+ def stop!
25
+ stopper = Thread.new {
26
+ scheduler.stop # Stop polling
27
+ }
28
+
29
+ stopper.join # wait for threads to finish
30
+ end
31
+
32
+ def processors
33
+ scheduler.jobs(:tag => 'processor')
34
+ end
35
+
36
+ private
37
+
38
+ def tick
39
+ if processors.length < max_jobs
40
+ scheduler.in(0, :tag => 'processor') { tock }
41
+ end
42
+ end
43
+
44
+ def tock
45
+ poll_messages do |message|
46
+ process_message(message)
47
+ end
48
+ rescue => err
49
+ Mode::Logger.instance.error(
50
+ "Connector::Scheduler", err.message, err.backtrace)
51
+ end
52
+
53
+ def poll_messages(&block)
54
+ Mode::Connector::Poller.new.perform!(&block)
55
+ end
56
+
57
+ def process_message(message)
58
+ Mode::Connector::Processor.new(message, data_sources).perform!
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,47 @@
1
+ require 'sequel'
2
+
3
+ module Mode
4
+ module Connector
5
+ class Selector
6
+ attr_reader :query
7
+ attr_reader :data_source
8
+ attr_reader :column_types
9
+
10
+ def initialize(query, data_source)
11
+ @query, @data_source = query, data_source
12
+ @column_types = Mode::Connector::TypeMap.new
13
+ end
14
+
15
+ def perform!
16
+ File.open(path, 'w+') { |file| write(file) }
17
+ Mode::Connector::Dataset.new(path, column_types)
18
+ end
19
+
20
+ def path
21
+ @path ||= File.join(tmpdir, 'data.json')
22
+ end
23
+
24
+ private
25
+
26
+ def write(file)
27
+ index = 0
28
+ file.write '['
29
+ data_source.select(query) do |row|
30
+ write_row(file, row, index += 1)
31
+ end
32
+ file.write ']'
33
+ end
34
+
35
+ def write_row(file, row, index)
36
+ column_types.insert(row)
37
+
38
+ file.write ',' unless index == 1
39
+ file.write JSON.generate(row)
40
+ end
41
+
42
+ def tmpdir
43
+ @tmpdir ||= Dir.mktmpdir
44
+ end
45
+ end
46
+ end
47
+ end