mode 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/README.md +17 -22
  4. data/bin/mode +1 -1
  5. data/lib/mode.rb +34 -6
  6. data/lib/mode/api/form.rb +53 -0
  7. data/lib/mode/api/link.rb +31 -0
  8. data/lib/mode/api/request.rb +181 -0
  9. data/lib/mode/api/resource.rb +67 -0
  10. data/lib/mode/auth/access_token.rb +23 -0
  11. data/lib/mode/cli.rb +3 -3
  12. data/lib/mode/cli/analyze.rb +1 -1
  13. data/lib/mode/cli/base.rb +5 -0
  14. data/lib/mode/cli/connect.rb +18 -0
  15. data/lib/mode/cli/helpers.rb +0 -9
  16. data/lib/mode/cli/import.rb +9 -38
  17. data/lib/mode/cli/login.rb +13 -0
  18. data/lib/mode/cli/package.rb +2 -5
  19. data/lib/mode/commands/analyze_field.rb +20 -21
  20. data/lib/mode/commands/analyze_schema.rb +69 -48
  21. data/lib/mode/commands/connect.rb +78 -0
  22. data/lib/mode/commands/helpers.rb +54 -0
  23. data/lib/mode/commands/import.rb +209 -20
  24. data/lib/mode/commands/login.rb +111 -0
  25. data/lib/mode/config.rb +13 -33
  26. data/lib/mode/configurable.rb +46 -0
  27. data/lib/mode/connector/config.rb +31 -0
  28. data/lib/mode/connector/daemon.rb +27 -0
  29. data/lib/mode/connector/data_source.rb +75 -0
  30. data/lib/mode/connector/dataset.rb +13 -0
  31. data/lib/mode/connector/message.rb +31 -0
  32. data/lib/mode/connector/poller.rb +27 -0
  33. data/lib/mode/connector/processor.rb +58 -0
  34. data/lib/mode/connector/registrar.rb +36 -0
  35. data/lib/mode/connector/scheduler.rb +62 -0
  36. data/lib/mode/connector/selector.rb +47 -0
  37. data/lib/mode/connector/type_map.rb +45 -0
  38. data/lib/mode/connector/uploader.rb +50 -0
  39. data/lib/mode/logger.rb +202 -0
  40. data/lib/mode/version.rb +1 -1
  41. data/mode.gemspec +13 -2
  42. data/spec/api/form_spec.rb +51 -0
  43. data/spec/api/link_spec.rb +23 -0
  44. data/spec/api/request_spec.rb +111 -0
  45. data/spec/api/resource_spec.rb +70 -0
  46. data/spec/auth/access_token_spec.rb +22 -0
  47. data/spec/commands/analyze_field_spec.rb +26 -0
  48. data/spec/commands/analyze_schema_spec.rb +7 -5
  49. data/spec/commands/connect_spec.rb +80 -0
  50. data/spec/commands/helpers_spec.rb +69 -0
  51. data/spec/commands/import_spec.rb +155 -0
  52. data/spec/commands/login_spec.rb +178 -0
  53. data/spec/config_spec.rb +9 -7
  54. data/spec/connector/config_spec.rb +46 -0
  55. data/spec/connector/daemon_spec.rb +30 -0
  56. data/spec/connector/data_source_spec.rb +73 -0
  57. data/spec/connector/message_spec.rb +22 -0
  58. data/spec/connector/poller_spec.rb +26 -0
  59. data/spec/connector/processor_spec.rb +93 -0
  60. data/spec/connector/registrar_spec.rb +53 -0
  61. data/spec/connector/scheduler_spec.rb +93 -0
  62. data/spec/connector/selector_spec.rb +54 -0
  63. data/spec/connector/type_map_spec.rb +45 -0
  64. data/spec/connector/uploader_spec.rb +55 -0
  65. data/spec/fixtures/country-codes/README.md +71 -0
  66. data/spec/fixtures/country-codes/data/country-codes.csv +250 -0
  67. data/spec/fixtures/country-codes/datapackage.json +142 -0
  68. data/spec/fixtures/country-codes/scripts/get_countries_of_earth.py +370 -0
  69. data/spec/fixtures/country-codes/scripts/reorder_columns.py +8 -0
  70. data/spec/fixtures/country-codes/scripts/requirements.pip +2 -0
  71. data/spec/fixtures/espn_draft.csv +473 -1
  72. data/spec/fixtures/espn_draft/data.csv +473 -0
  73. data/spec/fixtures/espn_draft/datapackage.json +43 -0
  74. data/spec/logger_spec.rb +79 -0
  75. data/spec/spec_helper.rb +6 -1
  76. metadata +156 -19
  77. data/lib/mode/cli/setup.rb +0 -12
  78. data/lib/mode/commands/package.rb +0 -56
  79. data/lib/mode/commands/setup.rb +0 -36
  80. data/lib/mode/package_builder.rb +0 -57
  81. data/spec/commands/setup_spec.rb +0 -62
  82. data/spec/fixtures/MOCK_DATA.csv +0 -100001
  83. data/spec/fixtures/cb_clean_small.csv +0 -100000
  84. data/spec/fixtures/duplicate_keys.csv +0 -3
  85. data/spec/fixtures/format_examples.csv.txt +0 -6
  86. data/spec/fixtures/format_examples_after_excel.csv.txt +0 -1
@@ -0,0 +1,46 @@
1
+ module Mode
2
+ module Configurable
3
+ def self.included(klass)
4
+ klass.send :attr_reader, :path
5
+ klass.send :extend, ClassMethods
6
+ end
7
+
8
+ module ClassMethods
9
+ def exists?(path)
10
+ File.exist?(full_path(path))
11
+ end
12
+
13
+ def init(path, filename = nil)
14
+ File.open(full_path(path, filename), 'w+') do |file|
15
+ file.write({}.to_yaml)
16
+ end
17
+
18
+ new(path, filename)
19
+ end
20
+
21
+ def default_dir
22
+ File.expand_path("~/.mode")
23
+ end
24
+
25
+ def full_path(path, filename = nil)
26
+ File.expand_path(File.join(path, filename || default_filename))
27
+ end
28
+ end
29
+
30
+ def initialize(path, filename = nil)
31
+ @path = self.class.full_path(path, filename)
32
+
33
+ if File.exist?(@path)
34
+ configure YAML.load_file(@path)
35
+ else
36
+ raise "Could not load configuration file from #{@path}"
37
+ end
38
+ end
39
+
40
+ def save
41
+ File.open(path, 'w+') do |file|
42
+ file.write(to_yaml)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,31 @@
1
+ require 'yaml'
2
+
3
+ module Mode
4
+ module Connector
5
+ class Config < Mode::Config
6
+ include Mode::Configurable
7
+
8
+ # Config Variables
9
+ attr_accessor :data_sources
10
+
11
+ class << self
12
+ def default_filename
13
+ 'connect.yml'
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ def configure(config = {})
20
+ @data_sources ||= []
21
+ config.each do |name, props|
22
+ data_sources << Mode::Connector::DataSource.new(name, props)
23
+ end
24
+ end
25
+
26
+ def to_yaml
27
+ data_sources.to_yaml
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,27 @@
1
+ require 'daemon_spawn'
2
+
3
+ module Mode
4
+ module Connector
5
+ class Daemon < DaemonSpawn::Base
6
+ attr_reader :scheduler
7
+
8
+ def start(args)
9
+ max_jobs = args.shift
10
+ data_sources = args.shift
11
+
12
+ @scheduler = Mode::Connector::Scheduler.new(
13
+ data_sources, :max_jobs => max_jobs
14
+ )
15
+
16
+ @scheduler.start!
17
+ rescue => err
18
+ Mode::Logger.instance.error(
19
+ "Connector::Daemon", err.message, err.backtrace)
20
+ end
21
+
22
+ def stop
23
+ scheduler.stop!
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,75 @@
1
+ require 'sequel'
2
+
3
+ module Mode
4
+ module Connector
5
+ class DataSource
6
+
7
+ attr_reader :name
8
+ attr_reader :props
9
+
10
+ attr_reader :connection
11
+
12
+ def initialize(name, props = {})
13
+ @name = name
14
+ @props = props
15
+ end
16
+
17
+ def adapter
18
+ props['adapter']
19
+ end
20
+
21
+ def username
22
+ props['username']
23
+ end
24
+
25
+ def password
26
+ props['password']
27
+ end
28
+
29
+ def host
30
+ props['host']
31
+ end
32
+
33
+ def database
34
+ props['database']
35
+ end
36
+
37
+ def select(query, &block)
38
+ connection_dataset(query).each(&block)
39
+ end
40
+
41
+ def connection
42
+ if redshift?
43
+ @connection ||= Sequel.connect(connection_url,
44
+ :client_min_messages => '',
45
+ :force_standard_strings => false
46
+ )
47
+ else
48
+ @connection ||= Sequel.connect(connection_url)
49
+ end
50
+ end
51
+
52
+ def redshift?
53
+ adapter == 'redshift'
54
+ end
55
+
56
+ private
57
+
58
+ def connection_url
59
+ password_segment = password.nil? ? nil : ":#{password}"
60
+ "#{adapter}://#{username}#{password_segment}@#{host}/#{database}"
61
+ end
62
+
63
+ def connection_dataset(query)
64
+ log_connection_query(query)
65
+ connection.dataset.with_sql(query)
66
+ # figure out how to make cursors work here
67
+ end
68
+
69
+ def log_connection_query(query)
70
+ Mode::Logger.instance.debug(
71
+ "Connect::DataSource", "QUERY", query.split("\n"))
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,13 @@
1
+ module Mode
2
+ module Connector
3
+ class Dataset
4
+ attr_reader :path
5
+ attr_reader :column_types
6
+
7
+ def initialize(path, column_types)
8
+ @path = path
9
+ @column_types = column_types
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,31 @@
1
+ module Mode
2
+ module Connector
3
+ class Message
4
+ attr_reader :resource
5
+
6
+ def initialize(resource)
7
+ @resource = resource
8
+ end
9
+
10
+ def type
11
+ resource.content['type']
12
+ end
13
+
14
+ def name
15
+ resource.content['name']
16
+ end
17
+
18
+ def query
19
+ resource.content['query']
20
+ end
21
+
22
+ def data_source
23
+ resource.embedded('data_source')
24
+ end
25
+
26
+ def execution_path
27
+ resource.links('execution').href
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,27 @@
1
+ module Mode
2
+ module Connector
3
+ class Poller
4
+ def perform!(&block)
5
+ messages = Mode::API::Request.get(path)
6
+
7
+ if messages.is_a?(Mode::API::Resource)
8
+ messages.embedded('messages').each do |message|
9
+ log_message(message)
10
+ yield Mode::Connector::Message.new(message)
11
+ end if messages.has_embedded?('messages')
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ def path
18
+ Mode::API::Request.data_source_connection_messages_path
19
+ end
20
+
21
+ def log_message(message)
22
+ Mode::Logger.instance.info(
23
+ "Connector::Poller", message)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,58 @@
1
+ require 'date'
2
+ require 'time'
3
+ require 'tmpdir'
4
+
5
+ module Mode
6
+ module Connector
7
+ class Processor
8
+ attr_reader :message
9
+ attr_reader :data_sources
10
+
11
+ def initialize(message, data_sources)
12
+ @message = message
13
+ @data_sources = data_sources
14
+ end
15
+
16
+ def perform!
17
+ if data_source
18
+ send!(select!)
19
+ else
20
+ raise "Data source not found #{message.data_source.name}"
21
+ end
22
+ rescue => err
23
+ error!(err.message, err.backtrace.join("\n"))
24
+
25
+ Mode::Logger.instance.error(
26
+ "Connector::Processor", err.message, err.backtrace)
27
+ end
28
+
29
+ private
30
+
31
+ def select!
32
+ Mode::Connector::Selector.new(query, data_source).perform!
33
+ end
34
+
35
+ def send!(dataset)
36
+ Mode::Connector::Uploader.new(upload_path, dataset).perform!
37
+ end
38
+
39
+ def error!(message, detail = nil)
40
+ Mode::Connector::Uploader.error!(upload_path, message, detail)
41
+ end
42
+
43
+ def query
44
+ message.query
45
+ end
46
+
47
+ def upload_path
48
+ message.execution_path
49
+ end
50
+
51
+ def data_source
52
+ data_sources.find do |ds|
53
+ ds.name == message.data_source.name
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,36 @@
1
+ module Mode
2
+ module Connector
3
+ class Registrar
4
+ attr_reader :config
5
+
6
+ def initialize(config)
7
+ @config = config
8
+ end
9
+
10
+ def perform!
11
+ Mode::API::Request.put(path,
12
+ :data_source_connection => {
13
+ :data_sources => data_sources
14
+ }
15
+ )
16
+ end
17
+
18
+ private
19
+
20
+ def data_sources
21
+ [].tap do |data_sources|
22
+ config.data_sources.each do |source|
23
+ data_sources << {
24
+ :name => source.name,
25
+ :adapter => source.adapter
26
+ }
27
+ end
28
+ end
29
+ end
30
+
31
+ def path
32
+ Mode::API::Request.data_source_connection_path
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,62 @@
1
+ require 'rufus-scheduler'
2
+
3
+ module Mode
4
+ module Connector
5
+ class Scheduler
6
+ attr_reader :max_jobs
7
+ attr_reader :scheduler
8
+ attr_reader :data_sources
9
+
10
+ def initialize(data_sources, options = {})
11
+ @data_sources = data_sources
12
+ @scheduler = Rufus::Scheduler.new
13
+ @max_jobs = options[:max_jobs] || 4
14
+ end
15
+
16
+ def start!
17
+ data_sources.each(&:connection)
18
+ # Make sure this stays outside the scheduler
19
+
20
+ scheduler.every('5s') { tick }
21
+ scheduler.join
22
+ end
23
+
24
+ def stop!
25
+ stopper = Thread.new {
26
+ scheduler.stop # Stop polling
27
+ }
28
+
29
+ stopper.join # wait for threads to finish
30
+ end
31
+
32
+ def processors
33
+ scheduler.jobs(:tag => 'processor')
34
+ end
35
+
36
+ private
37
+
38
+ def tick
39
+ if processors.length < max_jobs
40
+ scheduler.in(0, :tag => 'processor') { tock }
41
+ end
42
+ end
43
+
44
+ def tock
45
+ poll_messages do |message|
46
+ process_message(message)
47
+ end
48
+ rescue => err
49
+ Mode::Logger.instance.error(
50
+ "Connector::Scheduler", err.message, err.backtrace)
51
+ end
52
+
53
+ def poll_messages(&block)
54
+ Mode::Connector::Poller.new.perform!(&block)
55
+ end
56
+
57
+ def process_message(message)
58
+ Mode::Connector::Processor.new(message, data_sources).perform!
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,47 @@
1
+ require 'sequel'
2
+
3
+ module Mode
4
+ module Connector
5
+ class Selector
6
+ attr_reader :query
7
+ attr_reader :data_source
8
+ attr_reader :column_types
9
+
10
+ def initialize(query, data_source)
11
+ @query, @data_source = query, data_source
12
+ @column_types = Mode::Connector::TypeMap.new
13
+ end
14
+
15
+ def perform!
16
+ File.open(path, 'w+') { |file| write(file) }
17
+ Mode::Connector::Dataset.new(path, column_types)
18
+ end
19
+
20
+ def path
21
+ @path ||= File.join(tmpdir, 'data.json')
22
+ end
23
+
24
+ private
25
+
26
+ def write(file)
27
+ index = 0
28
+ file.write '['
29
+ data_source.select(query) do |row|
30
+ write_row(file, row, index += 1)
31
+ end
32
+ file.write ']'
33
+ end
34
+
35
+ def write_row(file, row, index)
36
+ column_types.insert(row)
37
+
38
+ file.write ',' unless index == 1
39
+ file.write JSON.generate(row)
40
+ end
41
+
42
+ def tmpdir
43
+ @tmpdir ||= Dir.mktmpdir
44
+ end
45
+ end
46
+ end
47
+ end