mode 0.0.5 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +17 -22
- data/bin/mode +1 -1
- data/lib/mode.rb +34 -6
- data/lib/mode/api/form.rb +53 -0
- data/lib/mode/api/link.rb +31 -0
- data/lib/mode/api/request.rb +181 -0
- data/lib/mode/api/resource.rb +67 -0
- data/lib/mode/auth/access_token.rb +23 -0
- data/lib/mode/cli.rb +3 -3
- data/lib/mode/cli/analyze.rb +1 -1
- data/lib/mode/cli/base.rb +5 -0
- data/lib/mode/cli/connect.rb +18 -0
- data/lib/mode/cli/helpers.rb +0 -9
- data/lib/mode/cli/import.rb +9 -38
- data/lib/mode/cli/login.rb +13 -0
- data/lib/mode/cli/package.rb +2 -5
- data/lib/mode/commands/analyze_field.rb +20 -21
- data/lib/mode/commands/analyze_schema.rb +69 -48
- data/lib/mode/commands/connect.rb +78 -0
- data/lib/mode/commands/helpers.rb +54 -0
- data/lib/mode/commands/import.rb +209 -20
- data/lib/mode/commands/login.rb +111 -0
- data/lib/mode/config.rb +13 -33
- data/lib/mode/configurable.rb +46 -0
- data/lib/mode/connector/config.rb +31 -0
- data/lib/mode/connector/daemon.rb +27 -0
- data/lib/mode/connector/data_source.rb +75 -0
- data/lib/mode/connector/dataset.rb +13 -0
- data/lib/mode/connector/message.rb +31 -0
- data/lib/mode/connector/poller.rb +27 -0
- data/lib/mode/connector/processor.rb +58 -0
- data/lib/mode/connector/registrar.rb +36 -0
- data/lib/mode/connector/scheduler.rb +62 -0
- data/lib/mode/connector/selector.rb +47 -0
- data/lib/mode/connector/type_map.rb +45 -0
- data/lib/mode/connector/uploader.rb +50 -0
- data/lib/mode/logger.rb +202 -0
- data/lib/mode/version.rb +1 -1
- data/mode.gemspec +13 -2
- data/spec/api/form_spec.rb +51 -0
- data/spec/api/link_spec.rb +23 -0
- data/spec/api/request_spec.rb +111 -0
- data/spec/api/resource_spec.rb +70 -0
- data/spec/auth/access_token_spec.rb +22 -0
- data/spec/commands/analyze_field_spec.rb +26 -0
- data/spec/commands/analyze_schema_spec.rb +7 -5
- data/spec/commands/connect_spec.rb +80 -0
- data/spec/commands/helpers_spec.rb +69 -0
- data/spec/commands/import_spec.rb +155 -0
- data/spec/commands/login_spec.rb +178 -0
- data/spec/config_spec.rb +9 -7
- data/spec/connector/config_spec.rb +46 -0
- data/spec/connector/daemon_spec.rb +30 -0
- data/spec/connector/data_source_spec.rb +73 -0
- data/spec/connector/message_spec.rb +22 -0
- data/spec/connector/poller_spec.rb +26 -0
- data/spec/connector/processor_spec.rb +93 -0
- data/spec/connector/registrar_spec.rb +53 -0
- data/spec/connector/scheduler_spec.rb +93 -0
- data/spec/connector/selector_spec.rb +54 -0
- data/spec/connector/type_map_spec.rb +45 -0
- data/spec/connector/uploader_spec.rb +55 -0
- data/spec/fixtures/country-codes/README.md +71 -0
- data/spec/fixtures/country-codes/data/country-codes.csv +250 -0
- data/spec/fixtures/country-codes/datapackage.json +142 -0
- data/spec/fixtures/country-codes/scripts/get_countries_of_earth.py +370 -0
- data/spec/fixtures/country-codes/scripts/reorder_columns.py +8 -0
- data/spec/fixtures/country-codes/scripts/requirements.pip +2 -0
- data/spec/fixtures/espn_draft.csv +473 -1
- data/spec/fixtures/espn_draft/data.csv +473 -0
- data/spec/fixtures/espn_draft/datapackage.json +43 -0
- data/spec/logger_spec.rb +79 -0
- data/spec/spec_helper.rb +6 -1
- metadata +156 -19
- data/lib/mode/cli/setup.rb +0 -12
- data/lib/mode/commands/package.rb +0 -56
- data/lib/mode/commands/setup.rb +0 -36
- data/lib/mode/package_builder.rb +0 -57
- data/spec/commands/setup_spec.rb +0 -62
- data/spec/fixtures/MOCK_DATA.csv +0 -100001
- data/spec/fixtures/cb_clean_small.csv +0 -100000
- data/spec/fixtures/duplicate_keys.csv +0 -3
- data/spec/fixtures/format_examples.csv.txt +0 -6
- data/spec/fixtures/format_examples_after_excel.csv.txt +0 -1
@@ -0,0 +1,46 @@
|
|
1
|
+
module Mode
|
2
|
+
module Configurable
|
3
|
+
def self.included(klass)
|
4
|
+
klass.send :attr_reader, :path
|
5
|
+
klass.send :extend, ClassMethods
|
6
|
+
end
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def exists?(path)
|
10
|
+
File.exist?(full_path(path))
|
11
|
+
end
|
12
|
+
|
13
|
+
def init(path, filename = nil)
|
14
|
+
File.open(full_path(path, filename), 'w+') do |file|
|
15
|
+
file.write({}.to_yaml)
|
16
|
+
end
|
17
|
+
|
18
|
+
new(path, filename)
|
19
|
+
end
|
20
|
+
|
21
|
+
def default_dir
|
22
|
+
File.expand_path("~/.mode")
|
23
|
+
end
|
24
|
+
|
25
|
+
def full_path(path, filename = nil)
|
26
|
+
File.expand_path(File.join(path, filename || default_filename))
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize(path, filename = nil)
|
31
|
+
@path = self.class.full_path(path, filename)
|
32
|
+
|
33
|
+
if File.exist?(@path)
|
34
|
+
configure YAML.load_file(@path)
|
35
|
+
else
|
36
|
+
raise "Could not load configuration file from #{@path}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def save
|
41
|
+
File.open(path, 'w+') do |file|
|
42
|
+
file.write(to_yaml)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class Config < Mode::Config
|
6
|
+
include Mode::Configurable
|
7
|
+
|
8
|
+
# Config Variables
|
9
|
+
attr_accessor :data_sources
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def default_filename
|
13
|
+
'connect.yml'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def configure(config = {})
|
20
|
+
@data_sources ||= []
|
21
|
+
config.each do |name, props|
|
22
|
+
data_sources << Mode::Connector::DataSource.new(name, props)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_yaml
|
27
|
+
data_sources.to_yaml
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'daemon_spawn'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class Daemon < DaemonSpawn::Base
|
6
|
+
attr_reader :scheduler
|
7
|
+
|
8
|
+
def start(args)
|
9
|
+
max_jobs = args.shift
|
10
|
+
data_sources = args.shift
|
11
|
+
|
12
|
+
@scheduler = Mode::Connector::Scheduler.new(
|
13
|
+
data_sources, :max_jobs => max_jobs
|
14
|
+
)
|
15
|
+
|
16
|
+
@scheduler.start!
|
17
|
+
rescue => err
|
18
|
+
Mode::Logger.instance.error(
|
19
|
+
"Connector::Daemon", err.message, err.backtrace)
|
20
|
+
end
|
21
|
+
|
22
|
+
def stop
|
23
|
+
scheduler.stop!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class DataSource
|
6
|
+
|
7
|
+
attr_reader :name
|
8
|
+
attr_reader :props
|
9
|
+
|
10
|
+
attr_reader :connection
|
11
|
+
|
12
|
+
def initialize(name, props = {})
|
13
|
+
@name = name
|
14
|
+
@props = props
|
15
|
+
end
|
16
|
+
|
17
|
+
def adapter
|
18
|
+
props['adapter']
|
19
|
+
end
|
20
|
+
|
21
|
+
def username
|
22
|
+
props['username']
|
23
|
+
end
|
24
|
+
|
25
|
+
def password
|
26
|
+
props['password']
|
27
|
+
end
|
28
|
+
|
29
|
+
def host
|
30
|
+
props['host']
|
31
|
+
end
|
32
|
+
|
33
|
+
def database
|
34
|
+
props['database']
|
35
|
+
end
|
36
|
+
|
37
|
+
def select(query, &block)
|
38
|
+
connection_dataset(query).each(&block)
|
39
|
+
end
|
40
|
+
|
41
|
+
def connection
|
42
|
+
if redshift?
|
43
|
+
@connection ||= Sequel.connect(connection_url,
|
44
|
+
:client_min_messages => '',
|
45
|
+
:force_standard_strings => false
|
46
|
+
)
|
47
|
+
else
|
48
|
+
@connection ||= Sequel.connect(connection_url)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def redshift?
|
53
|
+
adapter == 'redshift'
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def connection_url
|
59
|
+
password_segment = password.nil? ? nil : ":#{password}"
|
60
|
+
"#{adapter}://#{username}#{password_segment}@#{host}/#{database}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def connection_dataset(query)
|
64
|
+
log_connection_query(query)
|
65
|
+
connection.dataset.with_sql(query)
|
66
|
+
# figure out how to make cursors work here
|
67
|
+
end
|
68
|
+
|
69
|
+
def log_connection_query(query)
|
70
|
+
Mode::Logger.instance.debug(
|
71
|
+
"Connect::DataSource", "QUERY", query.split("\n"))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Mode
|
2
|
+
module Connector
|
3
|
+
class Message
|
4
|
+
attr_reader :resource
|
5
|
+
|
6
|
+
def initialize(resource)
|
7
|
+
@resource = resource
|
8
|
+
end
|
9
|
+
|
10
|
+
def type
|
11
|
+
resource.content['type']
|
12
|
+
end
|
13
|
+
|
14
|
+
def name
|
15
|
+
resource.content['name']
|
16
|
+
end
|
17
|
+
|
18
|
+
def query
|
19
|
+
resource.content['query']
|
20
|
+
end
|
21
|
+
|
22
|
+
def data_source
|
23
|
+
resource.embedded('data_source')
|
24
|
+
end
|
25
|
+
|
26
|
+
def execution_path
|
27
|
+
resource.links('execution').href
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Mode
|
2
|
+
module Connector
|
3
|
+
class Poller
|
4
|
+
def perform!(&block)
|
5
|
+
messages = Mode::API::Request.get(path)
|
6
|
+
|
7
|
+
if messages.is_a?(Mode::API::Resource)
|
8
|
+
messages.embedded('messages').each do |message|
|
9
|
+
log_message(message)
|
10
|
+
yield Mode::Connector::Message.new(message)
|
11
|
+
end if messages.has_embedded?('messages')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def path
|
18
|
+
Mode::API::Request.data_source_connection_messages_path
|
19
|
+
end
|
20
|
+
|
21
|
+
def log_message(message)
|
22
|
+
Mode::Logger.instance.info(
|
23
|
+
"Connector::Poller", message)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'time'
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
module Mode
|
6
|
+
module Connector
|
7
|
+
class Processor
|
8
|
+
attr_reader :message
|
9
|
+
attr_reader :data_sources
|
10
|
+
|
11
|
+
def initialize(message, data_sources)
|
12
|
+
@message = message
|
13
|
+
@data_sources = data_sources
|
14
|
+
end
|
15
|
+
|
16
|
+
def perform!
|
17
|
+
if data_source
|
18
|
+
send!(select!)
|
19
|
+
else
|
20
|
+
raise "Data source not found #{message.data_source.name}"
|
21
|
+
end
|
22
|
+
rescue => err
|
23
|
+
error!(err.message, err.backtrace.join("\n"))
|
24
|
+
|
25
|
+
Mode::Logger.instance.error(
|
26
|
+
"Connector::Processor", err.message, err.backtrace)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def select!
|
32
|
+
Mode::Connector::Selector.new(query, data_source).perform!
|
33
|
+
end
|
34
|
+
|
35
|
+
def send!(dataset)
|
36
|
+
Mode::Connector::Uploader.new(upload_path, dataset).perform!
|
37
|
+
end
|
38
|
+
|
39
|
+
def error!(message, detail = nil)
|
40
|
+
Mode::Connector::Uploader.error!(upload_path, message, detail)
|
41
|
+
end
|
42
|
+
|
43
|
+
def query
|
44
|
+
message.query
|
45
|
+
end
|
46
|
+
|
47
|
+
def upload_path
|
48
|
+
message.execution_path
|
49
|
+
end
|
50
|
+
|
51
|
+
def data_source
|
52
|
+
data_sources.find do |ds|
|
53
|
+
ds.name == message.data_source.name
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Mode
|
2
|
+
module Connector
|
3
|
+
class Registrar
|
4
|
+
attr_reader :config
|
5
|
+
|
6
|
+
def initialize(config)
|
7
|
+
@config = config
|
8
|
+
end
|
9
|
+
|
10
|
+
def perform!
|
11
|
+
Mode::API::Request.put(path,
|
12
|
+
:data_source_connection => {
|
13
|
+
:data_sources => data_sources
|
14
|
+
}
|
15
|
+
)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def data_sources
|
21
|
+
[].tap do |data_sources|
|
22
|
+
config.data_sources.each do |source|
|
23
|
+
data_sources << {
|
24
|
+
:name => source.name,
|
25
|
+
:adapter => source.adapter
|
26
|
+
}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def path
|
32
|
+
Mode::API::Request.data_source_connection_path
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'rufus-scheduler'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class Scheduler
|
6
|
+
attr_reader :max_jobs
|
7
|
+
attr_reader :scheduler
|
8
|
+
attr_reader :data_sources
|
9
|
+
|
10
|
+
def initialize(data_sources, options = {})
|
11
|
+
@data_sources = data_sources
|
12
|
+
@scheduler = Rufus::Scheduler.new
|
13
|
+
@max_jobs = options[:max_jobs] || 4
|
14
|
+
end
|
15
|
+
|
16
|
+
def start!
|
17
|
+
data_sources.each(&:connection)
|
18
|
+
# Make sure this stays outside the scheduler
|
19
|
+
|
20
|
+
scheduler.every('5s') { tick }
|
21
|
+
scheduler.join
|
22
|
+
end
|
23
|
+
|
24
|
+
def stop!
|
25
|
+
stopper = Thread.new {
|
26
|
+
scheduler.stop # Stop polling
|
27
|
+
}
|
28
|
+
|
29
|
+
stopper.join # wait for threads to finish
|
30
|
+
end
|
31
|
+
|
32
|
+
def processors
|
33
|
+
scheduler.jobs(:tag => 'processor')
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def tick
|
39
|
+
if processors.length < max_jobs
|
40
|
+
scheduler.in(0, :tag => 'processor') { tock }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def tock
|
45
|
+
poll_messages do |message|
|
46
|
+
process_message(message)
|
47
|
+
end
|
48
|
+
rescue => err
|
49
|
+
Mode::Logger.instance.error(
|
50
|
+
"Connector::Scheduler", err.message, err.backtrace)
|
51
|
+
end
|
52
|
+
|
53
|
+
def poll_messages(&block)
|
54
|
+
Mode::Connector::Poller.new.perform!(&block)
|
55
|
+
end
|
56
|
+
|
57
|
+
def process_message(message)
|
58
|
+
Mode::Connector::Processor.new(message, data_sources).perform!
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class Selector
|
6
|
+
attr_reader :query
|
7
|
+
attr_reader :data_source
|
8
|
+
attr_reader :column_types
|
9
|
+
|
10
|
+
def initialize(query, data_source)
|
11
|
+
@query, @data_source = query, data_source
|
12
|
+
@column_types = Mode::Connector::TypeMap.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def perform!
|
16
|
+
File.open(path, 'w+') { |file| write(file) }
|
17
|
+
Mode::Connector::Dataset.new(path, column_types)
|
18
|
+
end
|
19
|
+
|
20
|
+
def path
|
21
|
+
@path ||= File.join(tmpdir, 'data.json')
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def write(file)
|
27
|
+
index = 0
|
28
|
+
file.write '['
|
29
|
+
data_source.select(query) do |row|
|
30
|
+
write_row(file, row, index += 1)
|
31
|
+
end
|
32
|
+
file.write ']'
|
33
|
+
end
|
34
|
+
|
35
|
+
def write_row(file, row, index)
|
36
|
+
column_types.insert(row)
|
37
|
+
|
38
|
+
file.write ',' unless index == 1
|
39
|
+
file.write JSON.generate(row)
|
40
|
+
end
|
41
|
+
|
42
|
+
def tmpdir
|
43
|
+
@tmpdir ||= Dir.mktmpdir
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|