mode 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/README.md +17 -22
- data/bin/mode +1 -1
- data/lib/mode.rb +34 -6
- data/lib/mode/api/form.rb +53 -0
- data/lib/mode/api/link.rb +31 -0
- data/lib/mode/api/request.rb +181 -0
- data/lib/mode/api/resource.rb +67 -0
- data/lib/mode/auth/access_token.rb +23 -0
- data/lib/mode/cli.rb +3 -3
- data/lib/mode/cli/analyze.rb +1 -1
- data/lib/mode/cli/base.rb +5 -0
- data/lib/mode/cli/connect.rb +18 -0
- data/lib/mode/cli/helpers.rb +0 -9
- data/lib/mode/cli/import.rb +9 -38
- data/lib/mode/cli/login.rb +13 -0
- data/lib/mode/cli/package.rb +2 -5
- data/lib/mode/commands/analyze_field.rb +20 -21
- data/lib/mode/commands/analyze_schema.rb +69 -48
- data/lib/mode/commands/connect.rb +78 -0
- data/lib/mode/commands/helpers.rb +54 -0
- data/lib/mode/commands/import.rb +209 -20
- data/lib/mode/commands/login.rb +111 -0
- data/lib/mode/config.rb +13 -33
- data/lib/mode/configurable.rb +46 -0
- data/lib/mode/connector/config.rb +31 -0
- data/lib/mode/connector/daemon.rb +27 -0
- data/lib/mode/connector/data_source.rb +75 -0
- data/lib/mode/connector/dataset.rb +13 -0
- data/lib/mode/connector/message.rb +31 -0
- data/lib/mode/connector/poller.rb +27 -0
- data/lib/mode/connector/processor.rb +58 -0
- data/lib/mode/connector/registrar.rb +36 -0
- data/lib/mode/connector/scheduler.rb +62 -0
- data/lib/mode/connector/selector.rb +47 -0
- data/lib/mode/connector/type_map.rb +45 -0
- data/lib/mode/connector/uploader.rb +50 -0
- data/lib/mode/logger.rb +202 -0
- data/lib/mode/version.rb +1 -1
- data/mode.gemspec +13 -2
- data/spec/api/form_spec.rb +51 -0
- data/spec/api/link_spec.rb +23 -0
- data/spec/api/request_spec.rb +111 -0
- data/spec/api/resource_spec.rb +70 -0
- data/spec/auth/access_token_spec.rb +22 -0
- data/spec/commands/analyze_field_spec.rb +26 -0
- data/spec/commands/analyze_schema_spec.rb +7 -5
- data/spec/commands/connect_spec.rb +80 -0
- data/spec/commands/helpers_spec.rb +69 -0
- data/spec/commands/import_spec.rb +155 -0
- data/spec/commands/login_spec.rb +178 -0
- data/spec/config_spec.rb +9 -7
- data/spec/connector/config_spec.rb +46 -0
- data/spec/connector/daemon_spec.rb +30 -0
- data/spec/connector/data_source_spec.rb +73 -0
- data/spec/connector/message_spec.rb +22 -0
- data/spec/connector/poller_spec.rb +26 -0
- data/spec/connector/processor_spec.rb +93 -0
- data/spec/connector/registrar_spec.rb +53 -0
- data/spec/connector/scheduler_spec.rb +93 -0
- data/spec/connector/selector_spec.rb +54 -0
- data/spec/connector/type_map_spec.rb +45 -0
- data/spec/connector/uploader_spec.rb +55 -0
- data/spec/fixtures/country-codes/README.md +71 -0
- data/spec/fixtures/country-codes/data/country-codes.csv +250 -0
- data/spec/fixtures/country-codes/datapackage.json +142 -0
- data/spec/fixtures/country-codes/scripts/get_countries_of_earth.py +370 -0
- data/spec/fixtures/country-codes/scripts/reorder_columns.py +8 -0
- data/spec/fixtures/country-codes/scripts/requirements.pip +2 -0
- data/spec/fixtures/espn_draft.csv +473 -1
- data/spec/fixtures/espn_draft/data.csv +473 -0
- data/spec/fixtures/espn_draft/datapackage.json +43 -0
- data/spec/logger_spec.rb +79 -0
- data/spec/spec_helper.rb +6 -1
- metadata +156 -19
- data/lib/mode/cli/setup.rb +0 -12
- data/lib/mode/commands/package.rb +0 -56
- data/lib/mode/commands/setup.rb +0 -36
- data/lib/mode/package_builder.rb +0 -57
- data/spec/commands/setup_spec.rb +0 -62
- data/spec/fixtures/MOCK_DATA.csv +0 -100001
- data/spec/fixtures/cb_clean_small.csv +0 -100000
- data/spec/fixtures/duplicate_keys.csv +0 -3
- data/spec/fixtures/format_examples.csv.txt +0 -6
- data/spec/fixtures/format_examples_after_excel.csv.txt +0 -1
@@ -0,0 +1,46 @@
|
|
1
|
+
module Mode
|
2
|
+
module Configurable
|
3
|
+
def self.included(klass)
|
4
|
+
klass.send :attr_reader, :path
|
5
|
+
klass.send :extend, ClassMethods
|
6
|
+
end
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def exists?(path)
|
10
|
+
File.exist?(full_path(path))
|
11
|
+
end
|
12
|
+
|
13
|
+
def init(path, filename = nil)
|
14
|
+
File.open(full_path(path, filename), 'w+') do |file|
|
15
|
+
file.write({}.to_yaml)
|
16
|
+
end
|
17
|
+
|
18
|
+
new(path, filename)
|
19
|
+
end
|
20
|
+
|
21
|
+
def default_dir
|
22
|
+
File.expand_path("~/.mode")
|
23
|
+
end
|
24
|
+
|
25
|
+
def full_path(path, filename = nil)
|
26
|
+
File.expand_path(File.join(path, filename || default_filename))
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def initialize(path, filename = nil)
|
31
|
+
@path = self.class.full_path(path, filename)
|
32
|
+
|
33
|
+
if File.exist?(@path)
|
34
|
+
configure YAML.load_file(@path)
|
35
|
+
else
|
36
|
+
raise "Could not load configuration file from #{@path}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def save
|
41
|
+
File.open(path, 'w+') do |file|
|
42
|
+
file.write(to_yaml)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class Config < Mode::Config
|
6
|
+
include Mode::Configurable
|
7
|
+
|
8
|
+
# Config Variables
|
9
|
+
attr_accessor :data_sources
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def default_filename
|
13
|
+
'connect.yml'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def configure(config = {})
|
20
|
+
@data_sources ||= []
|
21
|
+
config.each do |name, props|
|
22
|
+
data_sources << Mode::Connector::DataSource.new(name, props)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_yaml
|
27
|
+
data_sources.to_yaml
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'daemon_spawn'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class Daemon < DaemonSpawn::Base
|
6
|
+
attr_reader :scheduler
|
7
|
+
|
8
|
+
def start(args)
|
9
|
+
max_jobs = args.shift
|
10
|
+
data_sources = args.shift
|
11
|
+
|
12
|
+
@scheduler = Mode::Connector::Scheduler.new(
|
13
|
+
data_sources, :max_jobs => max_jobs
|
14
|
+
)
|
15
|
+
|
16
|
+
@scheduler.start!
|
17
|
+
rescue => err
|
18
|
+
Mode::Logger.instance.error(
|
19
|
+
"Connector::Daemon", err.message, err.backtrace)
|
20
|
+
end
|
21
|
+
|
22
|
+
def stop
|
23
|
+
scheduler.stop!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class DataSource
|
6
|
+
|
7
|
+
attr_reader :name
|
8
|
+
attr_reader :props
|
9
|
+
|
10
|
+
attr_reader :connection
|
11
|
+
|
12
|
+
def initialize(name, props = {})
|
13
|
+
@name = name
|
14
|
+
@props = props
|
15
|
+
end
|
16
|
+
|
17
|
+
def adapter
|
18
|
+
props['adapter']
|
19
|
+
end
|
20
|
+
|
21
|
+
def username
|
22
|
+
props['username']
|
23
|
+
end
|
24
|
+
|
25
|
+
def password
|
26
|
+
props['password']
|
27
|
+
end
|
28
|
+
|
29
|
+
def host
|
30
|
+
props['host']
|
31
|
+
end
|
32
|
+
|
33
|
+
def database
|
34
|
+
props['database']
|
35
|
+
end
|
36
|
+
|
37
|
+
def select(query, &block)
|
38
|
+
connection_dataset(query).each(&block)
|
39
|
+
end
|
40
|
+
|
41
|
+
def connection
|
42
|
+
if redshift?
|
43
|
+
@connection ||= Sequel.connect(connection_url,
|
44
|
+
:client_min_messages => '',
|
45
|
+
:force_standard_strings => false
|
46
|
+
)
|
47
|
+
else
|
48
|
+
@connection ||= Sequel.connect(connection_url)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def redshift?
|
53
|
+
adapter == 'redshift'
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def connection_url
|
59
|
+
password_segment = password.nil? ? nil : ":#{password}"
|
60
|
+
"#{adapter}://#{username}#{password_segment}@#{host}/#{database}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def connection_dataset(query)
|
64
|
+
log_connection_query(query)
|
65
|
+
connection.dataset.with_sql(query)
|
66
|
+
# figure out how to make cursors work here
|
67
|
+
end
|
68
|
+
|
69
|
+
def log_connection_query(query)
|
70
|
+
Mode::Logger.instance.debug(
|
71
|
+
"Connect::DataSource", "QUERY", query.split("\n"))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Mode
|
2
|
+
module Connector
|
3
|
+
class Message
|
4
|
+
attr_reader :resource
|
5
|
+
|
6
|
+
def initialize(resource)
|
7
|
+
@resource = resource
|
8
|
+
end
|
9
|
+
|
10
|
+
def type
|
11
|
+
resource.content['type']
|
12
|
+
end
|
13
|
+
|
14
|
+
def name
|
15
|
+
resource.content['name']
|
16
|
+
end
|
17
|
+
|
18
|
+
def query
|
19
|
+
resource.content['query']
|
20
|
+
end
|
21
|
+
|
22
|
+
def data_source
|
23
|
+
resource.embedded('data_source')
|
24
|
+
end
|
25
|
+
|
26
|
+
def execution_path
|
27
|
+
resource.links('execution').href
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Mode
|
2
|
+
module Connector
|
3
|
+
class Poller
|
4
|
+
def perform!(&block)
|
5
|
+
messages = Mode::API::Request.get(path)
|
6
|
+
|
7
|
+
if messages.is_a?(Mode::API::Resource)
|
8
|
+
messages.embedded('messages').each do |message|
|
9
|
+
log_message(message)
|
10
|
+
yield Mode::Connector::Message.new(message)
|
11
|
+
end if messages.has_embedded?('messages')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def path
|
18
|
+
Mode::API::Request.data_source_connection_messages_path
|
19
|
+
end
|
20
|
+
|
21
|
+
def log_message(message)
|
22
|
+
Mode::Logger.instance.info(
|
23
|
+
"Connector::Poller", message)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'time'
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
module Mode
|
6
|
+
module Connector
|
7
|
+
class Processor
|
8
|
+
attr_reader :message
|
9
|
+
attr_reader :data_sources
|
10
|
+
|
11
|
+
def initialize(message, data_sources)
|
12
|
+
@message = message
|
13
|
+
@data_sources = data_sources
|
14
|
+
end
|
15
|
+
|
16
|
+
def perform!
|
17
|
+
if data_source
|
18
|
+
send!(select!)
|
19
|
+
else
|
20
|
+
raise "Data source not found #{message.data_source.name}"
|
21
|
+
end
|
22
|
+
rescue => err
|
23
|
+
error!(err.message, err.backtrace.join("\n"))
|
24
|
+
|
25
|
+
Mode::Logger.instance.error(
|
26
|
+
"Connector::Processor", err.message, err.backtrace)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def select!
|
32
|
+
Mode::Connector::Selector.new(query, data_source).perform!
|
33
|
+
end
|
34
|
+
|
35
|
+
def send!(dataset)
|
36
|
+
Mode::Connector::Uploader.new(upload_path, dataset).perform!
|
37
|
+
end
|
38
|
+
|
39
|
+
def error!(message, detail = nil)
|
40
|
+
Mode::Connector::Uploader.error!(upload_path, message, detail)
|
41
|
+
end
|
42
|
+
|
43
|
+
def query
|
44
|
+
message.query
|
45
|
+
end
|
46
|
+
|
47
|
+
def upload_path
|
48
|
+
message.execution_path
|
49
|
+
end
|
50
|
+
|
51
|
+
def data_source
|
52
|
+
data_sources.find do |ds|
|
53
|
+
ds.name == message.data_source.name
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Mode
|
2
|
+
module Connector
|
3
|
+
class Registrar
|
4
|
+
attr_reader :config
|
5
|
+
|
6
|
+
def initialize(config)
|
7
|
+
@config = config
|
8
|
+
end
|
9
|
+
|
10
|
+
def perform!
|
11
|
+
Mode::API::Request.put(path,
|
12
|
+
:data_source_connection => {
|
13
|
+
:data_sources => data_sources
|
14
|
+
}
|
15
|
+
)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def data_sources
|
21
|
+
[].tap do |data_sources|
|
22
|
+
config.data_sources.each do |source|
|
23
|
+
data_sources << {
|
24
|
+
:name => source.name,
|
25
|
+
:adapter => source.adapter
|
26
|
+
}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def path
|
32
|
+
Mode::API::Request.data_source_connection_path
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'rufus-scheduler'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class Scheduler
|
6
|
+
attr_reader :max_jobs
|
7
|
+
attr_reader :scheduler
|
8
|
+
attr_reader :data_sources
|
9
|
+
|
10
|
+
def initialize(data_sources, options = {})
|
11
|
+
@data_sources = data_sources
|
12
|
+
@scheduler = Rufus::Scheduler.new
|
13
|
+
@max_jobs = options[:max_jobs] || 4
|
14
|
+
end
|
15
|
+
|
16
|
+
def start!
|
17
|
+
data_sources.each(&:connection)
|
18
|
+
# Make sure this stays outside the scheduler
|
19
|
+
|
20
|
+
scheduler.every('5s') { tick }
|
21
|
+
scheduler.join
|
22
|
+
end
|
23
|
+
|
24
|
+
def stop!
|
25
|
+
stopper = Thread.new {
|
26
|
+
scheduler.stop # Stop polling
|
27
|
+
}
|
28
|
+
|
29
|
+
stopper.join # wait for threads to finish
|
30
|
+
end
|
31
|
+
|
32
|
+
def processors
|
33
|
+
scheduler.jobs(:tag => 'processor')
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def tick
|
39
|
+
if processors.length < max_jobs
|
40
|
+
scheduler.in(0, :tag => 'processor') { tock }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def tock
|
45
|
+
poll_messages do |message|
|
46
|
+
process_message(message)
|
47
|
+
end
|
48
|
+
rescue => err
|
49
|
+
Mode::Logger.instance.error(
|
50
|
+
"Connector::Scheduler", err.message, err.backtrace)
|
51
|
+
end
|
52
|
+
|
53
|
+
def poll_messages(&block)
|
54
|
+
Mode::Connector::Poller.new.perform!(&block)
|
55
|
+
end
|
56
|
+
|
57
|
+
def process_message(message)
|
58
|
+
Mode::Connector::Processor.new(message, data_sources).perform!
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
|
3
|
+
module Mode
|
4
|
+
module Connector
|
5
|
+
class Selector
|
6
|
+
attr_reader :query
|
7
|
+
attr_reader :data_source
|
8
|
+
attr_reader :column_types
|
9
|
+
|
10
|
+
def initialize(query, data_source)
|
11
|
+
@query, @data_source = query, data_source
|
12
|
+
@column_types = Mode::Connector::TypeMap.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def perform!
|
16
|
+
File.open(path, 'w+') { |file| write(file) }
|
17
|
+
Mode::Connector::Dataset.new(path, column_types)
|
18
|
+
end
|
19
|
+
|
20
|
+
def path
|
21
|
+
@path ||= File.join(tmpdir, 'data.json')
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def write(file)
|
27
|
+
index = 0
|
28
|
+
file.write '['
|
29
|
+
data_source.select(query) do |row|
|
30
|
+
write_row(file, row, index += 1)
|
31
|
+
end
|
32
|
+
file.write ']'
|
33
|
+
end
|
34
|
+
|
35
|
+
def write_row(file, row, index)
|
36
|
+
column_types.insert(row)
|
37
|
+
|
38
|
+
file.write ',' unless index == 1
|
39
|
+
file.write JSON.generate(row)
|
40
|
+
end
|
41
|
+
|
42
|
+
def tmpdir
|
43
|
+
@tmpdir ||= Dir.mktmpdir
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|