infobright-loader 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +19 -0
- data/LICENSE-2.0.txt +202 -0
- data/README.md +232 -0
- data/Rakefile +2 -0
- data/bin/infobright-loader +45 -0
- data/control-file/template.yml +23 -0
- data/infobright-loader.gemspec +35 -0
- data/lib/infobright-loader.rb +5 -0
- data/lib/infobright-loader/cli/config.rb +204 -0
- data/lib/infobright-loader/cli/loader.rb +66 -0
- data/lib/infobright-loader/db.rb +90 -0
- data/lib/infobright-loader/loader.rb +175 -0
- data/lib/infobright-loader/version.rb +4 -0
- data/tests/manual/bad-files/.gitignore +1 -0
- data/tests/manual/bad-files/control-file-raw.yml +50 -0
- data/tests/manual/bad-files/data/b/b_1.txt +6 -0
- data/tests/manual/bad-files/data/b/b_2.txt +6 -0
- data/tests/manual/bad-files/data/b/b_3.txt +6 -0
- data/tests/manual/bad-files/data/b/b_4.txt +6 -0
- data/tests/manual/bad-files/data/b/b_5.txt +6 -0
- data/tests/manual/bad-files/data/b/b_6.txt +6 -0
- data/tests/manual/bad-files/data/c/c_1.txt +6 -0
- data/tests/manual/bad-files/data/c/c_2.txt +6 -0
- data/tests/manual/bad-files/data/c/c_3.txt +6 -0
- data/tests/manual/bad-files/data/c/c_4.txt +6 -0
- data/tests/manual/bad-files/data/c/c_5.txt +6 -0
- data/tests/manual/bad-files/data/c/c_6.txt +6 -0
- data/tests/manual/bad-files/data/d/d_1.txt +6 -0
- data/tests/manual/bad-files/data/d/d_2.txt +6 -0
- data/tests/manual/bad-files/data/d/d_3.txt +6 -0
- data/tests/manual/bad-files/data/d/d_4.txt +6 -0
- data/tests/manual/bad-files/data/d/d_5.txt +6 -0
- data/tests/manual/bad-files/data/d/d_6.txt +6 -0
- data/tests/manual/bad-files/data/e/e_1.txt +6 -0
- data/tests/manual/bad-files/data/e/e_2.txt +6 -0
- data/tests/manual/bad-files/data/e/e_3.txt +6 -0
- data/tests/manual/bad-files/data/e/e_4.txt +6 -0
- data/tests/manual/bad-files/data/e/e_5.txt +6 -0
- data/tests/manual/bad-files/data/e/e_6.txt +6 -0
- data/tests/manual/bad-files/data/f/f_1.txt +6 -0
- data/tests/manual/bad-files/data/f/f_2.txt +6 -0
- data/tests/manual/bad-files/data/f/f_3.txt +6 -0
- data/tests/manual/bad-files/data/f/f_4.txt +6 -0
- data/tests/manual/bad-files/data/f/f_5.txt +6 -0
- data/tests/manual/bad-files/data/f/f_6.txt +6 -0
- data/tests/manual/bad-files/run_test.sh +40 -0
- data/tests/manual/bad-files/setup.sql +8 -0
- data/tests/manual/bad-files/verify.sql +10 -0
- data/tests/manual/control-file/.gitignore +1 -0
- data/tests/manual/control-file/control-file-raw.yml +50 -0
- data/tests/manual/control-file/data/b/b_1.txt +6 -0
- data/tests/manual/control-file/data/b/b_2.txt +6 -0
- data/tests/manual/control-file/data/b/b_3.txt +6 -0
- data/tests/manual/control-file/data/b/b_4.txt +6 -0
- data/tests/manual/control-file/data/b/b_5.txt +6 -0
- data/tests/manual/control-file/data/b/b_6.txt +6 -0
- data/tests/manual/control-file/data/c/c_1.txt +6 -0
- data/tests/manual/control-file/data/c/c_2.txt +6 -0
- data/tests/manual/control-file/data/c/c_3.txt +6 -0
- data/tests/manual/control-file/data/c/c_4.txt +6 -0
- data/tests/manual/control-file/data/c/c_5.txt +6 -0
- data/tests/manual/control-file/data/c/c_6.txt +6 -0
- data/tests/manual/control-file/data/d/d_1.txt +6 -0
- data/tests/manual/control-file/data/d/d_2.txt +6 -0
- data/tests/manual/control-file/data/d/d_3.txt +6 -0
- data/tests/manual/control-file/data/d/d_4.txt +6 -0
- data/tests/manual/control-file/data/d/d_5.txt +6 -0
- data/tests/manual/control-file/data/d/d_6.txt +6 -0
- data/tests/manual/control-file/data/e/e_1.txt +6 -0
- data/tests/manual/control-file/data/e/e_2.txt +6 -0
- data/tests/manual/control-file/data/e/e_3.txt +6 -0
- data/tests/manual/control-file/data/e/e_4.txt +6 -0
- data/tests/manual/control-file/data/e/e_5.txt +6 -0
- data/tests/manual/control-file/data/e/e_6.txt +6 -0
- data/tests/manual/control-file/data/f/f_1.txt +6 -0
- data/tests/manual/control-file/data/f/f_2.txt +6 -0
- data/tests/manual/control-file/data/f/f_3.txt +6 -0
- data/tests/manual/control-file/data/f/f_4.txt +6 -0
- data/tests/manual/control-file/data/f/f_5.txt +6 -0
- data/tests/manual/control-file/data/f/f_6.txt +6 -0
- data/tests/manual/control-file/run_test.sh +40 -0
- data/tests/manual/control-file/setup.sql +8 -0
- data/tests/manual/control-file/verify.sql +10 -0
- data/tests/manual/folder/data/a/a_1.txt +6 -0
- data/tests/manual/folder/data/a/a_2.txt +6 -0
- data/tests/manual/folder/data/a/a_3.txt +6 -0
- data/tests/manual/folder/data/a/a_4.txt +6 -0
- data/tests/manual/folder/data/a/a_5.txt +6 -0
- data/tests/manual/folder/data/a/a_6.txt +6 -0
- data/tests/manual/folder/run_test.sh +37 -0
- data/tests/manual/folder/setup.sql +4 -0
- data/tests/manual/folder/verify.sql +2 -0
- metadata +158 -0
data/Rakefile
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
|
4
|
+
#
|
|
5
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
|
6
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
|
7
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
|
10
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
|
11
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
|
13
|
+
|
|
14
|
+
# Author:: Alex Dean (mailto:support@snowplowanalytics.com)
|
|
15
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
|
16
|
+
# License:: Apache License Version 2.0
|
|
17
|
+
|
|
18
|
+
$LOAD_PATH.unshift File.dirname(__FILE__)
|
|
19
|
+
|
|
20
|
+
require 'infobright-loader/cli/config'
|
|
21
|
+
require 'infobright-loader/cli/loader'
|
|
22
|
+
require 'infobright-loader/loader'
|
|
23
|
+
|
|
24
|
+
# This Ruby script is the command-line interface to the
|
|
25
|
+
# Infobright Ruby Loader.
|
|
26
|
+
#
|
|
27
|
+
begin
|
|
28
|
+
config = InfobrightLoader::Cli::Config.get_config()
|
|
29
|
+
InfobrightLoader::Cli::Loader.load(config)
|
|
30
|
+
|
|
31
|
+
rescue InfobrightLoader::Loader::LoadError => le
|
|
32
|
+
$stderr.puts(le.message)
|
|
33
|
+
exit 1
|
|
34
|
+
rescue InfobrightLoader::Cli::Config::ConfigError => e
|
|
35
|
+
$stderr.puts(e.message)
|
|
36
|
+
exit 1
|
|
37
|
+
rescue SystemExit => e
|
|
38
|
+
exit 1
|
|
39
|
+
rescue Exception => e
|
|
40
|
+
$stderr.puts("Unexpected error: " + e.message)
|
|
41
|
+
$stderr.puts(e.backtrace.join("\n"))
|
|
42
|
+
exit 1
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
exit 0
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Example control file for Infobright Ruby Loader
|
|
2
|
+
|
|
3
|
+
# Can be overridden at the command line...
|
|
4
|
+
:load:
|
|
5
|
+
:processes: ADD HERE
|
|
6
|
+
:database:
|
|
7
|
+
:name: ADD HERE
|
|
8
|
+
:username: ADD HERE # Or leave blank to default to the user running the script
|
|
9
|
+
:password: ADD HERE # Or leave blank if no password
|
|
10
|
+
:data_format:
|
|
11
|
+
:separator: ADD HERE
|
|
12
|
+
:encloser: ADD HERE # Or leave blank if no encloser
|
|
13
|
+
# ... end of variables overridable at command line.
|
|
14
|
+
|
|
15
|
+
# Map of tables to populate, along with files to load for each table
|
|
16
|
+
:data_loads:
|
|
17
|
+
# For each table, list the data files to load
|
|
18
|
+
TABLE_NAME_1:
|
|
19
|
+
- PATH/TO/FILE-1
|
|
20
|
+
- PATH/TO/FILE-2
|
|
21
|
+
TABLE_NAME_2:
|
|
22
|
+
- PATH/TO/FILE-3
|
|
23
|
+
- PATH/TO/FILE-4
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
|
11
|
+
|
|
12
|
+
# Author:: Alex Dean (mailto:support@snowplowanalytics.com)
|
|
13
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
|
14
|
+
# License:: Apache License Version 2.0
|
|
15
|
+
|
|
16
|
+
# -*- encoding: utf-8 -*-
|
|
17
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
18
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
19
|
+
require 'infobright-loader/version'
|
|
20
|
+
|
|
21
|
+
Gem::Specification.new do |gem|
|
|
22
|
+
gem.authors = ["Alex Dean <support@snowplowanalytics.com>"]
|
|
23
|
+
gem.email = ["support@snowplowanalytics.com"]
|
|
24
|
+
gem.description = %q{Loads data files into Infobright}
|
|
25
|
+
gem.summary = %{Infobright Ruby Loader (IRL) is a data loader for Infobright Community Edition (ICE) and Enterprise Edition (IEE), built as a Ruby gem. Inspired by ParaFlex (the bash equivalent)}
|
|
26
|
+
gem.homepage = "http://snowplowanalytics.com"
|
|
27
|
+
|
|
28
|
+
gem.files = `git ls-files`.split($\)
|
|
29
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
|
30
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
|
31
|
+
gem.name = InfobrightLoader::NAME
|
|
32
|
+
gem.version = InfobrightLoader::VERSION
|
|
33
|
+
gem.platform = Gem::Platform::RUBY
|
|
34
|
+
gem.require_paths = ["lib"]
|
|
35
|
+
end
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
|
11
|
+
|
|
12
|
+
# Author:: Alex Dean (mailto:support@snowplowanalytics.com)
|
|
13
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
|
14
|
+
# License:: Apache License Version 2.0
|
|
15
|
+
|
|
16
|
+
require 'optparse'
|
|
17
|
+
require 'date'
|
|
18
|
+
require 'yaml'
|
|
19
|
+
|
|
20
|
+
require 'infobright-loader/db'
|
|
21
|
+
|
|
22
|
+
# Config module to hold functions related to CLI argument parsing
|
|
23
|
+
# and config file reading to support the daily ETL job.
|
|
24
|
+
module InfobrightLoader
|
|
25
|
+
module Cli
|
|
26
|
+
module Config
|
|
27
|
+
|
|
28
|
+
# What are we called?
|
|
29
|
+
SCRIPT_NAME = InfobrightLoader::NAME
|
|
30
|
+
|
|
31
|
+
# For errors
|
|
32
|
+
class ConfigError < ArgumentError; end
|
|
33
|
+
|
|
34
|
+
# Configuration for loading all the files from a specific directory into
|
|
35
|
+
# a specific table
|
|
36
|
+
LoadHashConfig = Struct.new(:load_hash, :db, :processes, :separator, :encloser)
|
|
37
|
+
|
|
38
|
+
# Configuration for loading a set of tables from a set of files (where
|
|
39
|
+
# each table can have multiple files loaded into it)
|
|
40
|
+
LoadFolderConfig = Struct.new(:folder, :table, :db, :separator, :encloser)
|
|
41
|
+
|
|
42
|
+
# Validates and returns the configuration.
|
|
43
|
+
#
|
|
44
|
+
# The configuration returned will either be
|
|
45
|
+
# a LoadMapConfig or a LoadFolderConfig.
|
|
46
|
+
def get_config()
|
|
47
|
+
|
|
48
|
+
options = Config.parse_args()
|
|
49
|
+
|
|
50
|
+
if options[:control].nil?
|
|
51
|
+
|
|
52
|
+
config = LoadFolderConfig.new
|
|
53
|
+
config.db = InfobrightLoader::Db::DbConfig.new(options[:db], options[:username], options[:password])
|
|
54
|
+
config.separator = options[:separator]
|
|
55
|
+
config.encloser = options[:encloser]
|
|
56
|
+
config.folder = options[:folder]
|
|
57
|
+
config.table = options[:table]
|
|
58
|
+
|
|
59
|
+
else
|
|
60
|
+
|
|
61
|
+
yaml = YAML.load_file(options[:control])
|
|
62
|
+
|
|
63
|
+
# Set the overridable fields if they haven't been overridden at the command-line
|
|
64
|
+
config = LoadHashConfig.new
|
|
65
|
+
get_or_else = lambda {|x, y| x.nil? ? y : x }
|
|
66
|
+
config.processes = get_or_else.call(options[:processes], yaml[:load][:processes])
|
|
67
|
+
config.separator = get_or_else.call(options[:separator], yaml[:data_format][:separator])
|
|
68
|
+
config.encloser = get_or_else.call(options[:encloser], yaml[:data_format][:encloser])
|
|
69
|
+
|
|
70
|
+
db_name = get_or_else.call(options[:db], yaml[:database][:name])
|
|
71
|
+
db_username = get_or_else.call(options[:username], yaml[:database][:username])
|
|
72
|
+
db_password = get_or_else.call(options[:password], yaml[:database][:password])
|
|
73
|
+
config.db = InfobrightLoader::Db::DbConfig.new(db_name, db_username, db_password)
|
|
74
|
+
|
|
75
|
+
# Finally grab the load map
|
|
76
|
+
config.load_hash = yaml[:data_loads]
|
|
77
|
+
|
|
78
|
+
# Check we have everything now
|
|
79
|
+
if db_name.nil?
|
|
80
|
+
raise ConfigError, "Database name not specified"
|
|
81
|
+
end
|
|
82
|
+
if config.processes.nil?
|
|
83
|
+
raise ConfigError, "Number of processes not specified"
|
|
84
|
+
end
|
|
85
|
+
if config.separator.nil? or config.separator.empty?
|
|
86
|
+
raise ConfigError, "Separator not specified - have you escaped ('\\') it in your control file?"
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Check that number of processes is a positive integer
|
|
90
|
+
unless config.processes.to_i > 0
|
|
91
|
+
raise ConfigError, "Parallel load processes '#{config.processes}' is not a positive integer"
|
|
92
|
+
end
|
|
93
|
+
config.processes = config.processes.to_i # A kitten dies, mutably.
|
|
94
|
+
|
|
95
|
+
# Check that we have some tables to load
|
|
96
|
+
if config.load_hash.nil? or config.load_hash.empty?
|
|
97
|
+
raise ConfigError, "Must specify at least one table to load"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
config # Return either our LoadFolderConfig or our LoadHashConfig
|
|
103
|
+
end
|
|
104
|
+
module_function :get_config
|
|
105
|
+
|
|
106
|
+
# Parse the command-line arguments
|
|
107
|
+
# Returns: the hash of parsed options
|
|
108
|
+
def parse_args()
|
|
109
|
+
|
|
110
|
+
# Handle command-line arguments
|
|
111
|
+
options = {}
|
|
112
|
+
optparse = OptionParser.new do |opts|
|
|
113
|
+
|
|
114
|
+
opts.banner = "Usage: %s [options]" % SCRIPT_NAME
|
|
115
|
+
opts.separator ""
|
|
116
|
+
opts.separator "Specify a control file:"
|
|
117
|
+
|
|
118
|
+
opts.on('-c', '--control FILE', 'control file') { |config| options[:control] = config }
|
|
119
|
+
opts.on('-x', '--processes INT', 'optional number of parallel processes to run *') { |config| options[:processes] = config }
|
|
120
|
+
|
|
121
|
+
opts.separator ""
|
|
122
|
+
opts.separator "Or load a table from a folder of data files:"
|
|
123
|
+
|
|
124
|
+
opts.on('-d', '--db NAME', 'database name *') { |config| options[:db] = config }
|
|
125
|
+
opts.on('-u', '--username NAME', 'database username *') { |config| options[:username] = config }
|
|
126
|
+
opts.on('-p', '--password NAME', 'database password *') { |config| options[:password] = config }
|
|
127
|
+
|
|
128
|
+
opts.on('-t', '--table NAME', 'table to load data files into') { |config| options[:table] = config }
|
|
129
|
+
opts.on('-f', '--folder DIR', 'directory containing data files to load') { |config| options[:folder] = config }
|
|
130
|
+
opts.on('-s', '--separator CHAR', 'optional field separator, defaults to pipe bar (|) *') { |config| options[:separator] = config }
|
|
131
|
+
opts.on('-e', '--encloser CHAR', 'optional field encloser, defaults to none *') { |config| options[:encloser] = config }
|
|
132
|
+
|
|
133
|
+
opts.separator ""
|
|
134
|
+
opts.separator "* overrides the same setting in the control file if control file also specified"
|
|
135
|
+
|
|
136
|
+
opts.separator ""
|
|
137
|
+
opts.separator "Common options:"
|
|
138
|
+
|
|
139
|
+
opts.on_tail('-h', '--help', 'Show this message') { puts opts; exit }
|
|
140
|
+
opts.on_tail('-v', "--version", "Show version") do
|
|
141
|
+
puts "%s %s" % [SCRIPT_NAME, InfobrightLoader::VERSION]
|
|
142
|
+
exit
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Run OptionParser's structural validation
|
|
147
|
+
begin
|
|
148
|
+
optparse.parse!
|
|
149
|
+
rescue OptionParser::InvalidOption, OptionParser::MissingArgument
|
|
150
|
+
raise ConfigError, "#{$!.to_s}\n#{optparse}"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# If no control file given, most of the options are required
|
|
154
|
+
if options[:control].nil?
|
|
155
|
+
|
|
156
|
+
# Set defaults if necessary
|
|
157
|
+
options[:separator] ||= '|'
|
|
158
|
+
options[:encloser] ||= ''
|
|
159
|
+
|
|
160
|
+
# First check we have all the options we need
|
|
161
|
+
mandatory = [:db, :table, :folder]
|
|
162
|
+
missing = mandatory.select{ |param| options[param].nil? }
|
|
163
|
+
if not missing.empty?
|
|
164
|
+
raise ConfigError, "No control file specified, so missing options: #{missing.join(', ')}\n#{optparse}"
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Check our folder exists and is not empty
|
|
168
|
+
unless File.directory?(options[:folder])
|
|
169
|
+
raise ConfigError, "Specified folder '#{options[:folder]}' not found"
|
|
170
|
+
end
|
|
171
|
+
if (Dir.entries(options[:folder]) - %w{ . .. }).empty?
|
|
172
|
+
raise ConfigError, "Specified folder '#{options[:folder]}' is empty"
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Check user didn't try to override processes
|
|
176
|
+
unless options[:processes].nil?
|
|
177
|
+
raise ConfigError, "Limited to one process when loading only one table"
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Add trailing slash if needed to the folder
|
|
181
|
+
trail = lambda {|str| return str[-1].chr != '/' ? str << '/' : str}
|
|
182
|
+
options[:folder] = trail.call(options[:folder])
|
|
183
|
+
|
|
184
|
+
# We are working with the control file
|
|
185
|
+
else
|
|
186
|
+
|
|
187
|
+
# Check we don't have a conflict of purpose
|
|
188
|
+
unless options[:folder].nil? and options[:table].nil?
|
|
189
|
+
raise ConfigError, "Specifying a control file as well as a folder and table does not make sense"
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Check the control file exists
|
|
193
|
+
unless File.file?(options[:control])
|
|
194
|
+
raise ConfigError, "Control file '#{options[:control]}' does not exist, or is not a file."
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
options
|
|
199
|
+
end
|
|
200
|
+
module_function :parse_args
|
|
201
|
+
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
|
11
|
+
|
|
12
|
+
# Author:: Alex Dean (mailto:support@snowplowanalytics.com)
|
|
13
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
|
14
|
+
# License:: Apache License Version 2.0
|
|
15
|
+
|
|
16
|
+
require 'infobright-loader/cli/config'
|
|
17
|
+
require 'infobright-loader/loader'
|
|
18
|
+
|
|
19
|
+
# Loader determines which load action to run.
|
|
20
|
+
#
|
|
21
|
+
# It's a selective wrapper around
|
|
22
|
+
# load_from_folder() and load_from_map()
|
|
23
|
+
module InfobrightLoader
|
|
24
|
+
module Cli
|
|
25
|
+
module Loader
|
|
26
|
+
|
|
27
|
+
# Determine what type of config we have,
|
|
28
|
+
# and then call the appropriate load
|
|
29
|
+
def load(config)
|
|
30
|
+
|
|
31
|
+
failures = []
|
|
32
|
+
|
|
33
|
+
case config
|
|
34
|
+
when InfobrightLoader::Cli::Config::LoadFolderConfig
|
|
35
|
+
failures = InfobrightLoader::Loader::load_from_folder(
|
|
36
|
+
config.folder,
|
|
37
|
+
config.table,
|
|
38
|
+
config.db,
|
|
39
|
+
config.separator,
|
|
40
|
+
config.encloser
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
when InfobrightLoader::Cli::Config::LoadHashConfig
|
|
44
|
+
failures = InfobrightLoader::Loader::load_from_hash(
|
|
45
|
+
config.load_hash,
|
|
46
|
+
config.db,
|
|
47
|
+
config.processes,
|
|
48
|
+
config.separator,
|
|
49
|
+
config.encloser
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
else
|
|
53
|
+
raise ConfigError, "config argument passed to Cli::Loader::load() must be a LoadFolderConfig or a LoadHashConfig"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
unless failures.empty?
|
|
57
|
+
error = "Load of following files failed (reason in brackets):\n" + \
|
|
58
|
+
failures.map{|f| " - " + f}.join("\n")
|
|
59
|
+
raise InfobrightLoader::Loader::LoadError, error
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
module_function :load
|
|
63
|
+
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Copyright (c) 2012 SnowPlow Analytics Ltd. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
|
6
|
+
#
|
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
|
11
|
+
|
|
12
|
+
# Author:: Alex Dean (mailto:support@snowplowanalytics.com)
|
|
13
|
+
# Copyright:: Copyright (c) 2012 SnowPlow Analytics Ltd
|
|
14
|
+
# License:: Apache License Version 2.0
|
|
15
|
+
|
|
16
|
+
require 'infobright-loader/loader'
|
|
17
|
+
|
|
18
|
+
module InfobrightLoader
|
|
19
|
+
module Db
|
|
20
|
+
|
|
21
|
+
# Configuration for accessing an Infobright database.
|
|
22
|
+
# :username and :password can be nil
|
|
23
|
+
DbConfig = Struct.new(:name, :username, :password)
|
|
24
|
+
|
|
25
|
+
# Is mysql-ib running and the
|
|
26
|
+
# Infobright server accessible?
|
|
27
|
+
def running?(db)
|
|
28
|
+
ib = ib_command_from(db)
|
|
29
|
+
`#{ib} -e \\\\q > /dev/null 2>&1`
|
|
30
|
+
($?.to_i == 0)
|
|
31
|
+
end
|
|
32
|
+
module_function :running?
|
|
33
|
+
|
|
34
|
+
# Does the database exist and can
|
|
35
|
+
# we access it?
|
|
36
|
+
def db_exists?(db)
|
|
37
|
+
ib = ib_command_from(db)
|
|
38
|
+
`#{ib} -D #{db.name} -e \\\\q > /dev/null 2>&1`
|
|
39
|
+
($?.to_i == 0)
|
|
40
|
+
end
|
|
41
|
+
module_function :db_exists?
|
|
42
|
+
|
|
43
|
+
# Does the table exist?
|
|
44
|
+
def table_exists?(table, db)
|
|
45
|
+
ib = ib_command_from(db)
|
|
46
|
+
`echo "desc "#{table}";" | #{ib} -D #{db.name} > /dev/null 2>&1`
|
|
47
|
+
($?.to_i == 0)
|
|
48
|
+
end
|
|
49
|
+
module_function :table_exists?
|
|
50
|
+
|
|
51
|
+
# Load data
|
|
52
|
+
def load_file(file, table, db, separator='|', encloser='')
|
|
53
|
+
|
|
54
|
+
# Make sure seperator and encloser are escaped if either is " or '
|
|
55
|
+
escaper = lambda { |c| (c == '"' || c == "'") ? "\\" + c : c }
|
|
56
|
+
separator = escaper.call(separator)
|
|
57
|
+
encloser = escaper.call(encloser)
|
|
58
|
+
|
|
59
|
+
# Check the file exists
|
|
60
|
+
unless File.file?(file)
|
|
61
|
+
raise InfobrightLoader::Loader::LoadError, "file does not exist, or is not a file"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
ib = ib_command_from(db)
|
|
65
|
+
load = "LOAD DATA INFILE '#{file}' " + \
|
|
66
|
+
"INTO TABLE #{table} " + \
|
|
67
|
+
"FIELDS TERMINATED BY '#{separator}' ENCLOSED BY '#{encloser}'; "
|
|
68
|
+
|
|
69
|
+
stdout_err = `echo "#{load}" | #{ib} -D #{db.name} 2>&1`
|
|
70
|
+
ret_val = $?.to_i
|
|
71
|
+
unless ret_val == 0
|
|
72
|
+
raise InfobrightLoader::Loader::LoadError, "mysql-ib error code #{ret_val}: #{stdout_err}"
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
module_function :load_file
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
# Get path to Infobright's mysql-ib client,
|
|
80
|
+
# then add in username and password as
|
|
81
|
+
# necessary
|
|
82
|
+
def ib_command_from(db)
|
|
83
|
+
`locate mysql-ib`[0...-1] + \
|
|
84
|
+
(db.username.nil? ? '' : " -u #{db.username}") + \
|
|
85
|
+
(db.password.nil? ? '' : " --password=#{db.password}")
|
|
86
|
+
end
|
|
87
|
+
module_function :ib_command_from
|
|
88
|
+
|
|
89
|
+
end
|
|
90
|
+
end
|