hasta 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.cane +1 -0
- data/.gitignore +3 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +20 -0
- data/README.md +150 -0
- data/Rakefile +15 -0
- data/hasta.gemspec +29 -0
- data/lib/hasta.rb +46 -0
- data/lib/hasta/cached_s3_file.rb +21 -0
- data/lib/hasta/combined_data_source.rb +35 -0
- data/lib/hasta/combined_storage.rb +30 -0
- data/lib/hasta/configuration.rb +88 -0
- data/lib/hasta/emr_job_definition.rb +104 -0
- data/lib/hasta/emr_node.rb +103 -0
- data/lib/hasta/env.rb +35 -0
- data/lib/hasta/execution_context.rb +90 -0
- data/lib/hasta/filter.rb +40 -0
- data/lib/hasta/filtered_s3_file.rb +34 -0
- data/lib/hasta/identity_mapper.rb +17 -0
- data/lib/hasta/identity_reducer.rb +18 -0
- data/lib/hasta/in_memory_data_sink.rb +40 -0
- data/lib/hasta/in_memory_data_source.rb +35 -0
- data/lib/hasta/interpolate_string.rb +45 -0
- data/lib/hasta/local_file_path.rb +12 -0
- data/lib/hasta/local_storage.rb +41 -0
- data/lib/hasta/mapper.rb +23 -0
- data/lib/hasta/reducer.rb +29 -0
- data/lib/hasta/resolve_cached_s3_file.rb +29 -0
- data/lib/hasta/resolve_filtered_s3_file.rb +22 -0
- data/lib/hasta/runner.rb +32 -0
- data/lib/hasta/s3_data_sink.rb +48 -0
- data/lib/hasta/s3_data_source.rb +41 -0
- data/lib/hasta/s3_file.rb +56 -0
- data/lib/hasta/s3_file_cache.rb +23 -0
- data/lib/hasta/s3_storage.rb +21 -0
- data/lib/hasta/s3_uri.rb +60 -0
- data/lib/hasta/sorted_data_source.rb +36 -0
- data/lib/hasta/storage.rb +82 -0
- data/lib/hasta/tasks.rb +8 -0
- data/lib/hasta/tasks/runner.rb +84 -0
- data/lib/hasta/version.rb +3 -0
- data/spec/fixtures/hasta/filter_config.txt +1 -0
- data/spec/fixtures/hasta/json/emr_node.json +10 -0
- data/spec/fixtures/hasta/json/pipeline_definition.json +135 -0
- data/spec/fixtures/hasta/lib/failing_mapper.rb +19 -0
- data/spec/fixtures/hasta/lib/test_env_mapper.rb +20 -0
- data/spec/fixtures/hasta/lib/test_identity_mapper.rb +20 -0
- data/spec/fixtures/hasta/lib/test_types_mapper.rb +21 -0
- data/spec/fixtures/hasta/lib/types.rb +1 -0
- data/spec/fixtures/hasta/lib/unconventional_reducer.rb +17 -0
- data/spec/hasta/combined_data_source_spec.rb +25 -0
- data/spec/hasta/combined_storage_spec.rb +54 -0
- data/spec/hasta/configuration_spec.rb +49 -0
- data/spec/hasta/emr_job_definition_spec.rb +181 -0
- data/spec/hasta/emr_node_spec.rb +32 -0
- data/spec/hasta/env_spec.rb +30 -0
- data/spec/hasta/execution_context_spec.rb +67 -0
- data/spec/hasta/filter_spec.rb +66 -0
- data/spec/hasta/filtered_s3_file_spec.rb +45 -0
- data/spec/hasta/identity_mapper_spec.rb +22 -0
- data/spec/hasta/identity_reducer_spec.rb +20 -0
- data/spec/hasta/interpolate_string_spec.rb +44 -0
- data/spec/hasta/local_file_path_spec.rb +18 -0
- data/spec/hasta/local_storage_spec.rb +52 -0
- data/spec/hasta/mapper_spec.rb +26 -0
- data/spec/hasta/reducer_spec.rb +26 -0
- data/spec/hasta/resolved_cached_s3_file_spec.rb +68 -0
- data/spec/hasta/s3_data_source_spec.rb +39 -0
- data/spec/hasta/s3_file_cache_spec.rb +45 -0
- data/spec/hasta/s3_file_spec.rb +122 -0
- data/spec/hasta/s3_storage_spec.rb +24 -0
- data/spec/hasta/s3_uri_spec.rb +151 -0
- data/spec/hasta/sorted_data_source_spec.rb +22 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/shared_contexts/hasta/local_fog_storage.rb +17 -0
- data/spec/support/shared_examples/hasta/storage_examples.rb +103 -0
- metadata +254 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: adeafa9f4dae6acb9dd59dc39432d4e4b3d86b79
|
4
|
+
data.tar.gz: 3f15a082f3c72d9a75df32e728cff7cc0ff88787
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0e0dc797ebb1acab0781324a47ed6184f51797a1b3893da9ac069024dc9cfadd56f6dcb5c1338855e59c1c0a82745c601102fd62e4de58a198ed278332ac5f24
|
7
|
+
data.tar.gz: 84a5056c2c069512a99eb8b4d9b58c9dcb0827732ffe555f7708705da0579706be7659abbfc6f4b97e4e458b02e01d2a25c38489c240e0a166d1e2b316d87132
|
data/.cane
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--style-measure 100
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2014 Swipely, Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
# Hasta
|
2
|
+
|
3
|
+
<b>HA</b>doop <b>S</b>treaming <b>T</b>est h<b>A</b>rness for Amazon EMR
|
4
|
+
|
5
|
+
A test harness for running [Hadoop Streaming](http://hadoop.apache.org/docs/r1.2.1/streaming.html) jobs written in Ruby without running Hadoop.
|
6
|
+
The test harness understands the [Amazon Data Pipeline](http://aws.amazon.com/datapipeline/) and [Elastic Map Reduce](http://aws.amazon.com/elasticmapreduce/) (EMR) JSON definition format and can automatically parse the details of a job out of Data Pipeline configuration file.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
gem 'hasta'
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install hasta
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
1. Require the Hasta Rake tasks in your project's Rakefile
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
require 'hasta/tasks'
|
28
|
+
```
|
29
|
+
|
30
|
+
2. Add the `Hasta::Tasks:Runner` task to your project's Rakefile
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
Hasta::Tasks::Runner.new do |task, opts|
|
34
|
+
task.definition_file = <path-to-AWS-datapipeline-definition-json-file>
|
35
|
+
task.job_id = opts[:job_id]
|
36
|
+
task.scheduled_start_time = Time.parse(opts[:scheduled_start_time])
|
37
|
+
task.project_root = File.dirname(__FILE__)
|
38
|
+
end
|
39
|
+
```
|
40
|
+
3. Run the test from the command line by calling:
|
41
|
+
|
42
|
+
```
|
43
|
+
% rake runner[<job-id>]
|
44
|
+
```
|
45
|
+
or
|
46
|
+
```
|
47
|
+
% rake runner[<job-id>,<scheduled-start-time>]
|
48
|
+
```
|
49
|
+
Where `job-id` is the id of the EMR job you are testing and `scheduled-start-time` is an [ISO 8601](http://en.wikipedia.org/wiki/ISO_8601)-formatted time specifying the time to use when interpolating any `@scheduledStartTime` variable references in your pipeline definition file.
|
50
|
+
If your pipeline definition file has no `@scheduledStartTime` variable references, there is no need to include a `scheduled-start-time` argument.
|
51
|
+
|
52
|
+
## Configuration
|
53
|
+
|
54
|
+
The following code snippet illustrates how to update the global Hasta configuration, which values are mandatory, and which values have defaults.
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
Hasta.configure do |config|
|
58
|
+
# mandatory
|
59
|
+
config.project_root = nil
|
60
|
+
|
61
|
+
# optional
|
62
|
+
config.local_storage_root = '~/fog'
|
63
|
+
config.cache_storage_root = '~/.hasta'
|
64
|
+
config.project_steps = 'steps'
|
65
|
+
config.logger = Logger.new(STDOUT)
|
66
|
+
end
|
67
|
+
```
|
68
|
+
|
69
|
+
## Data
|
70
|
+
|
71
|
+
All of the data read and written by EMR jobs is stored in [S3](http://aws.amazon.com/s3/).
|
72
|
+
Hasta uses the S3 URIs contained in the Data Pipeline definition file for all of its reads and writes.
|
73
|
+
When reading data, it will first look on the local filesystem.
|
74
|
+
If the requested S3 URI is not found locally, it will look for it on S3.
|
75
|
+
Hasta never writes data to S3, only to the local filesytem.
|
76
|
+
|
77
|
+
Hasta uses [Fog](http://fog.io/)'s [local storage provider](https://github.com/fog/fog/blob/master/lib/fog/local/storage.rb) to read and write data to the local filesystem using S3 URIs.
|
78
|
+
The root directory for the local storage is controlled by the `Hasta.local_storage_root` configuration property, which is set to `~/fog` by default.
|
79
|
+
|
80
|
+
Hasta reads all of its input data from the S3 paths specified in the AWS datapipline definition file.
|
81
|
+
If you wish to use different data for an input, you need to put that data into the local directory that corresponds to the S3 path in the definition file.
|
82
|
+
|
83
|
+
To control which credentials Hasta is using when communicating with S3, update your `~/.fog` file or set the `FOG_CREDENTIAL` environment variable to the appropriate credential.
|
84
|
+
|
85
|
+
## Data Filtering
|
86
|
+
|
87
|
+
Hasta automatically filters input data to minimize execution time.
|
88
|
+
By default, Hasta looks for a file in the current directory named `filter_config.txt` for the filtering configuration.
|
89
|
+
You can change the filter configuration file by setting the `HASTA_DATA_FILTER_FILE` environment variable.
|
90
|
+
If you want to disable data filtering, you can set the `HASTA_DATA_FILTERING` environment variable to `OFF`.
|
91
|
+
|
92
|
+
### Filter Configuration
|
93
|
+
|
94
|
+
The filter configuration file contains regular expressions, one per line.
|
95
|
+
Any line of input data that matches at least one of the regular expressions is included in the test input.
|
96
|
+
Lines that do no match any of the regular expressions are excluded from the test input.
|
97
|
+
|
98
|
+
### Caching
|
99
|
+
|
100
|
+
Hasta caches the filtered input data locally to improve performance.
|
101
|
+
The first time a data source is referenced in a test, the filtered results are written locally.
|
102
|
+
Subsequent tests that access the same data source with the same filter are read from the local cache.
|
103
|
+
This results in a significant speedup on subsequent runs when dealing with aggressively filtered large data sets.
|
104
|
+
By default, the files are written to the `~/.hasta/cache` directory, but this can be controlled using the `cache_storage_root` configuration setting.
|
105
|
+
|
106
|
+
## Execution
|
107
|
+
|
108
|
+
Hasta sets up the environent variables specified by the `-cmdenv` switch in the job definition.
|
109
|
+
It also pulls down all cache files from S3 and stores them in local S3 storage.
|
110
|
+
For all cache files that do not have a `.rb` file extension, an environment variable is added to the `ENV` that points to the absolute path of the local file.
|
111
|
+
|
112
|
+
### Example
|
113
|
+
Given the following `cacheFile` parameter:
|
114
|
+
```
|
115
|
+
-cacheFile s3://my-bucket/path/to/abbreviations.json#abbreviations.json
|
116
|
+
```
|
117
|
+
|
118
|
+
The following environment variable will be added to the `ENV`:
|
119
|
+
```
|
120
|
+
ENV['ABBREVIATIONS_FILE_PATH'] #=> "#{Hasta.local_storage_root}/my-bucket/path/to/abbreviations.json"
|
121
|
+
```
|
122
|
+
|
123
|
+
The parent directory of each cache file that has a `.rb` file extension is added to the `$LOAD_PATH`, so the mapper and reducer can use `require` statements to load the code in these files.
|
124
|
+
|
125
|
+
Hasta executes mappers and reducers in subprocesses.
|
126
|
+
This isolates each job and prevents the modifications to `ENV` and `$LOAD_PATH` described above from leaking into the parent process.
|
127
|
+
|
128
|
+
The output of the mapper is sorted before it is processed by the reducer.
|
129
|
+
The mapper output is sorted in ascending order, according to the natural sort order for Ruby Strings.
|
130
|
+
|
131
|
+
## Requirements
|
132
|
+
|
133
|
+
### Mappers and Reducers
|
134
|
+
1. must be stand-alone Ruby scripts
|
135
|
+
2. must be defined in the `Hasta.project_root`/`Hasta.project_steps` directory
|
136
|
+
3. must read their input lines from stdin and write their output lines to stdout
|
137
|
+
4. any data written to stderr will be logged at the error level using `Hasta.logger`
|
138
|
+
|
139
|
+
|
140
|
+
## Contributing
|
141
|
+
|
142
|
+
1. Fork it
|
143
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
144
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
145
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
146
|
+
5. Create new Pull Request
|
147
|
+
|
148
|
+
## License
|
149
|
+
|
150
|
+
Copyright (c) 2014 Swipely, Inc. See [LICENSE.txt](https://github.com/swipely/hasta/blob/master/LICENSE.txt) for further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'cane/rake_task'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new do |t|
|
6
|
+
t.pattern = File.join(File.dirname(__FILE__), 'spec', '**', '*_spec.rb')
|
7
|
+
end
|
8
|
+
|
9
|
+
Cane::RakeTask.new('quality') do |cane|
|
10
|
+
cane.canefile = '.cane'
|
11
|
+
end
|
12
|
+
|
13
|
+
task :all => [:quality, :spec]
|
14
|
+
|
15
|
+
task :default => :all
|
data/hasta.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'hasta/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "hasta"
|
8
|
+
spec.version = Hasta::VERSION
|
9
|
+
spec.authors = ["danhodge"]
|
10
|
+
spec.email = ["dan@swipely.com"]
|
11
|
+
spec.summary = %q{HAdoop Streaming Test hArness}
|
12
|
+
spec.description = %q{Harness for locally testing streaming Hadoop jobs written in Ruby}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "fog"
|
22
|
+
spec.add_dependency "json"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.5"
|
25
|
+
spec.add_development_dependency "cane"
|
26
|
+
spec.add_development_dependency "pry"
|
27
|
+
spec.add_development_dependency "rake"
|
28
|
+
spec.add_development_dependency "rspec"
|
29
|
+
end
|
data/lib/hasta.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
2
|
+
|
3
|
+
require "hasta/version"
|
4
|
+
require "hasta/configuration"
|
5
|
+
|
6
|
+
require "forwardable"
|
7
|
+
|
8
|
+
# The HAdoop Streaming Test hArness
|
9
|
+
module Hasta
|
10
|
+
extend self
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
Error = Class.new(StandardError)
|
14
|
+
NonExistentPath = Class.new(Error)
|
15
|
+
ClassLoadError = Class.new(Error)
|
16
|
+
ExecutionError = Class.new(Error)
|
17
|
+
ConfigurationError = Class.new(Error)
|
18
|
+
|
19
|
+
DELEGATED_ATTRS = [
|
20
|
+
:combined_storage,
|
21
|
+
:local_storage_root,
|
22
|
+
:logger,
|
23
|
+
:project_root,
|
24
|
+
:project_steps,
|
25
|
+
]
|
26
|
+
|
27
|
+
def_delegators :config, *DELEGATED_ATTRS
|
28
|
+
|
29
|
+
def configure
|
30
|
+
yield config
|
31
|
+
end
|
32
|
+
|
33
|
+
def tab_separated_line(line)
|
34
|
+
if line.include?("\t")
|
35
|
+
line
|
36
|
+
else
|
37
|
+
"#{line}\t"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def config
|
44
|
+
@config ||= Configuration.new
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
2
|
+
|
3
|
+
require 'delegate'
|
4
|
+
|
5
|
+
module Hasta
|
6
|
+
# Implements Hasta's S3File interface for files retrieved from the cache
|
7
|
+
class CachedS3File < SimpleDelegator
|
8
|
+
def initialize(cached_file, s3_uri)
|
9
|
+
super(S3File.wrap(cached_file))
|
10
|
+
@s3_uri = s3_uri
|
11
|
+
end
|
12
|
+
|
13
|
+
def key
|
14
|
+
@s3_uri.path
|
15
|
+
end
|
16
|
+
|
17
|
+
def s3_uri
|
18
|
+
@s3_uri
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
2
|
+
|
3
|
+
module Hasta
|
4
|
+
# Combines multiple data sources so they can be iterated over continuously
|
5
|
+
class CombinedDataSource
|
6
|
+
attr_reader :name
|
7
|
+
|
8
|
+
def initialize(sources, name = nil)
|
9
|
+
@sources = sources
|
10
|
+
@name = name || sources.map(&:name).compact.join(', ')
|
11
|
+
end
|
12
|
+
|
13
|
+
def each_line
|
14
|
+
return enum_for(:each_line) unless block_given?
|
15
|
+
|
16
|
+
sources.each do |source|
|
17
|
+
source.each_line do |line|
|
18
|
+
yield line
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_a
|
24
|
+
each_line.to_a
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
"#<#{self.class.name}:#{name}>"
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
attr_reader :sources
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
2
|
+
|
3
|
+
require 'hasta/s3_uri'
|
4
|
+
require 'hasta/storage'
|
5
|
+
|
6
|
+
module Hasta
|
7
|
+
# The file storage interface used by the local map/reduce jobs
|
8
|
+
class CombinedStorage
|
9
|
+
def initialize(s3_storage, local_storage)
|
10
|
+
@s3_storage = s3_storage
|
11
|
+
@local_storage = local_storage
|
12
|
+
end
|
13
|
+
|
14
|
+
def files_for(s3_uri)
|
15
|
+
if local_storage.exists?(s3_uri)
|
16
|
+
local_storage.files_for(s3_uri)
|
17
|
+
else
|
18
|
+
s3_storage.files_for(s3_uri)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def write(s3_uri, data_source)
|
23
|
+
local_storage.write(s3_uri, data_source)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
attr_reader :s3_storage, :local_storage
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
2
|
+
|
3
|
+
require 'fog'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
require 'hasta/local_storage'
|
7
|
+
require 'hasta/s3_storage'
|
8
|
+
require 'hasta/combined_storage'
|
9
|
+
require 'hasta/filter'
|
10
|
+
require 'hasta/resolve_cached_s3_file'
|
11
|
+
require 'hasta/resolve_filtered_s3_file'
|
12
|
+
|
13
|
+
module Hasta
|
14
|
+
# Global configuration settings
|
15
|
+
class Configuration
|
16
|
+
attr_accessor :project_root
|
17
|
+
attr_writer :local_storage_root, :cache_storage_root, :project_steps, :logger, :filter
|
18
|
+
|
19
|
+
def local_storage_root
|
20
|
+
@local_storage_root ||= '~/fog'
|
21
|
+
end
|
22
|
+
|
23
|
+
def cache_storage_root
|
24
|
+
@cache_storage_root ||= '~/.hasta'
|
25
|
+
end
|
26
|
+
|
27
|
+
def project_steps
|
28
|
+
@project_steps ||= 'steps'
|
29
|
+
end
|
30
|
+
|
31
|
+
def logger
|
32
|
+
@logger ||= Logger.new(STDOUT)
|
33
|
+
end
|
34
|
+
|
35
|
+
def project_steps_dir
|
36
|
+
File.join(project_root, project_steps)
|
37
|
+
end
|
38
|
+
|
39
|
+
def combined_storage
|
40
|
+
@combined_storage ||= CombinedStorage.new(
|
41
|
+
S3Storage.new(fog_s3_storage, resolver),
|
42
|
+
LocalStorage.new(fog_local_storage, resolver)
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
def filter
|
47
|
+
unless @filter || ENV['HASTA_DATA_FILTERING'] == 'OFF'
|
48
|
+
filter_file = ENV['HASTA_DATA_FILTER_FILE'] || 'filter_config.txt'
|
49
|
+
@filter ||= Filter.from_file(filter_file)
|
50
|
+
end
|
51
|
+
|
52
|
+
@filter
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def fog_s3_storage
|
58
|
+
# Use FOG_CREDENTIAL env variable to control AWS credentials
|
59
|
+
@fog_s3_storage ||= Fog::Storage::AWS.new
|
60
|
+
end
|
61
|
+
|
62
|
+
def fog_local_storage
|
63
|
+
@fog_local_storage ||= local_fog(local_storage_root)
|
64
|
+
end
|
65
|
+
|
66
|
+
def fog_cache_storage
|
67
|
+
@fog_cache_storage ||= local_fog(cache_storage_root)
|
68
|
+
end
|
69
|
+
|
70
|
+
def local_fog(local_root)
|
71
|
+
Fog::Storage.new(
|
72
|
+
:provider => 'Local',
|
73
|
+
:local_root => local_root,
|
74
|
+
:endpoint => 'http://example.com'
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def resolver
|
79
|
+
if filter
|
80
|
+
ResolveCachedS3File.new(
|
81
|
+
S3FileCache.new(fog_cache_storage), ResolveFilteredS3File.new(filter)
|
82
|
+
)
|
83
|
+
else
|
84
|
+
Hasta::Storage::ResolveS3File
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|