hasta 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.cane +1 -0
- data/.gitignore +3 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +20 -0
- data/README.md +150 -0
- data/Rakefile +15 -0
- data/hasta.gemspec +29 -0
- data/lib/hasta.rb +46 -0
- data/lib/hasta/cached_s3_file.rb +21 -0
- data/lib/hasta/combined_data_source.rb +35 -0
- data/lib/hasta/combined_storage.rb +30 -0
- data/lib/hasta/configuration.rb +88 -0
- data/lib/hasta/emr_job_definition.rb +104 -0
- data/lib/hasta/emr_node.rb +103 -0
- data/lib/hasta/env.rb +35 -0
- data/lib/hasta/execution_context.rb +90 -0
- data/lib/hasta/filter.rb +40 -0
- data/lib/hasta/filtered_s3_file.rb +34 -0
- data/lib/hasta/identity_mapper.rb +17 -0
- data/lib/hasta/identity_reducer.rb +18 -0
- data/lib/hasta/in_memory_data_sink.rb +40 -0
- data/lib/hasta/in_memory_data_source.rb +35 -0
- data/lib/hasta/interpolate_string.rb +45 -0
- data/lib/hasta/local_file_path.rb +12 -0
- data/lib/hasta/local_storage.rb +41 -0
- data/lib/hasta/mapper.rb +23 -0
- data/lib/hasta/reducer.rb +29 -0
- data/lib/hasta/resolve_cached_s3_file.rb +29 -0
- data/lib/hasta/resolve_filtered_s3_file.rb +22 -0
- data/lib/hasta/runner.rb +32 -0
- data/lib/hasta/s3_data_sink.rb +48 -0
- data/lib/hasta/s3_data_source.rb +41 -0
- data/lib/hasta/s3_file.rb +56 -0
- data/lib/hasta/s3_file_cache.rb +23 -0
- data/lib/hasta/s3_storage.rb +21 -0
- data/lib/hasta/s3_uri.rb +60 -0
- data/lib/hasta/sorted_data_source.rb +36 -0
- data/lib/hasta/storage.rb +82 -0
- data/lib/hasta/tasks.rb +8 -0
- data/lib/hasta/tasks/runner.rb +84 -0
- data/lib/hasta/version.rb +3 -0
- data/spec/fixtures/hasta/filter_config.txt +1 -0
- data/spec/fixtures/hasta/json/emr_node.json +10 -0
- data/spec/fixtures/hasta/json/pipeline_definition.json +135 -0
- data/spec/fixtures/hasta/lib/failing_mapper.rb +19 -0
- data/spec/fixtures/hasta/lib/test_env_mapper.rb +20 -0
- data/spec/fixtures/hasta/lib/test_identity_mapper.rb +20 -0
- data/spec/fixtures/hasta/lib/test_types_mapper.rb +21 -0
- data/spec/fixtures/hasta/lib/types.rb +1 -0
- data/spec/fixtures/hasta/lib/unconventional_reducer.rb +17 -0
- data/spec/hasta/combined_data_source_spec.rb +25 -0
- data/spec/hasta/combined_storage_spec.rb +54 -0
- data/spec/hasta/configuration_spec.rb +49 -0
- data/spec/hasta/emr_job_definition_spec.rb +181 -0
- data/spec/hasta/emr_node_spec.rb +32 -0
- data/spec/hasta/env_spec.rb +30 -0
- data/spec/hasta/execution_context_spec.rb +67 -0
- data/spec/hasta/filter_spec.rb +66 -0
- data/spec/hasta/filtered_s3_file_spec.rb +45 -0
- data/spec/hasta/identity_mapper_spec.rb +22 -0
- data/spec/hasta/identity_reducer_spec.rb +20 -0
- data/spec/hasta/interpolate_string_spec.rb +44 -0
- data/spec/hasta/local_file_path_spec.rb +18 -0
- data/spec/hasta/local_storage_spec.rb +52 -0
- data/spec/hasta/mapper_spec.rb +26 -0
- data/spec/hasta/reducer_spec.rb +26 -0
- data/spec/hasta/resolved_cached_s3_file_spec.rb +68 -0
- data/spec/hasta/s3_data_source_spec.rb +39 -0
- data/spec/hasta/s3_file_cache_spec.rb +45 -0
- data/spec/hasta/s3_file_spec.rb +122 -0
- data/spec/hasta/s3_storage_spec.rb +24 -0
- data/spec/hasta/s3_uri_spec.rb +151 -0
- data/spec/hasta/sorted_data_source_spec.rb +22 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/shared_contexts/hasta/local_fog_storage.rb +17 -0
- data/spec/support/shared_examples/hasta/storage_examples.rb +103 -0
- metadata +254 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/env'
|
|
6
|
+
|
|
7
|
+
describe Hasta::Env do
|
|
8
|
+
describe '#setup' do
|
|
9
|
+
subject { described_class.new(variables, files, combined_storage) }
|
|
10
|
+
|
|
11
|
+
let(:combined_storage) { double(Hasta::CombinedStorage) }
|
|
12
|
+
let(:file) { double('File', :body => contents, :key => s3_uri.path) }
|
|
13
|
+
let(:contents) { "one,two,three\nfour,five,six" }
|
|
14
|
+
|
|
15
|
+
let(:variables) { { 'API_KEY' => '123456' } }
|
|
16
|
+
let(:files) { { 'DATA_FILE_PATH' => s3_uri } }
|
|
17
|
+
let(:s3_uri) { Hasta::S3URI.parse('s3://my-bucket/path/to/data.csv') }
|
|
18
|
+
|
|
19
|
+
it 'writes the file locally and includes the path in the ENV' do
|
|
20
|
+
combined_storage.
|
|
21
|
+
should_receive(:write).
|
|
22
|
+
with(s3_uri, kind_of(Hasta::S3DataSource)).
|
|
23
|
+
and_return(s3_uri)
|
|
24
|
+
|
|
25
|
+
expect(subject.setup).to eq(
|
|
26
|
+
variables.merge({ 'DATA_FILE_PATH' => Hasta::LocalFilePath.for(s3_uri) })
|
|
27
|
+
)
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/execution_context'
|
|
6
|
+
require 'hasta/s3_data_source'
|
|
7
|
+
require 'hasta/in_memory_data_sink'
|
|
8
|
+
require 'hasta/in_memory_data_source'
|
|
9
|
+
|
|
10
|
+
describe Hasta::ExecutionContext do
|
|
11
|
+
describe '#execute' do
|
|
12
|
+
let(:source_file) { 'spec/fixtures/hasta/lib/test_identity_mapper.rb' }
|
|
13
|
+
let(:data_source) { Hasta::InMemoryDataSource.new(lines) }
|
|
14
|
+
let(:data_sink) { Hasta::InMemoryDataSink.new }
|
|
15
|
+
let(:lines) { %w[First Second Third] }
|
|
16
|
+
let(:exp_lines) { lines.map { |line| "#{line}\t" } }
|
|
17
|
+
let(:results) { subject.execute(source_file, data_source, data_sink).data_source }
|
|
18
|
+
|
|
19
|
+
it 'returns the execution results' do
|
|
20
|
+
expect(results.to_a).to eq(exp_lines)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
context 'given env variables' do
|
|
24
|
+
subject { described_class.new([], env) }
|
|
25
|
+
|
|
26
|
+
let(:env) { { 'LINE_PREFIX' => prefix } }
|
|
27
|
+
let(:prefix) { 'Copyright 2014 ' }
|
|
28
|
+
let(:source_file) { 'spec/fixtures/hasta/lib/test_env_mapper.rb' }
|
|
29
|
+
let(:exp_lines) { lines.map { |line| "#{prefix}#{line}\t" } }
|
|
30
|
+
|
|
31
|
+
it 'returns the execution results' do
|
|
32
|
+
expect(results.to_a).to eq(exp_lines)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'does not set the ENV of the parent process' do
|
|
36
|
+
expect(ENV['LINE_PREFIX']).to be_nil
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
context 'given additional Ruby files' do
|
|
41
|
+
subject { described_class.new(files) }
|
|
42
|
+
|
|
43
|
+
let(:files) { [file] }
|
|
44
|
+
let(:file) { "#{File.dirname(__FILE__)}/../fixtures/hasta/lib/types.rb" }
|
|
45
|
+
let(:dir) { File.expand_path(File.dirname(file)) }
|
|
46
|
+
let(:source_file) { 'spec/fixtures/hasta/lib/test_types_mapper.rb' }
|
|
47
|
+
|
|
48
|
+
it 'returns the execution results' do
|
|
49
|
+
expect(results.to_a).to eq(exp_lines)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'does not affect the $LOAD_PATH of the parent process' do
|
|
53
|
+
expect($LOAD_PATH).to_not include(dir)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
context 'given job failure' do
|
|
58
|
+
let(:source_file) { 'spec/fixtures/hasta/lib/failing_mapper.rb' }
|
|
59
|
+
|
|
60
|
+
it 'raises' do
|
|
61
|
+
expect {
|
|
62
|
+
subject.execute(source_file, data_source, data_sink)
|
|
63
|
+
}.to raise_error(Hasta::ExecutionError)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/filter'
|
|
6
|
+
|
|
7
|
+
describe Hasta::Filter do
|
|
8
|
+
context 'given a single regex' do
|
|
9
|
+
subject { described_class.new(regex) }
|
|
10
|
+
|
|
11
|
+
let(:regex) { /\A\d{1,}.*/ }
|
|
12
|
+
let(:drop_all) { ['First', 'Second', 'Third'] }
|
|
13
|
+
let(:drop_none) { ['1. First', '2. Second'] }
|
|
14
|
+
let(:drop_some) { ['1 First', 'Second', '3 Third'] }
|
|
15
|
+
|
|
16
|
+
it { expect(drop_all.select(&subject)).to be_empty }
|
|
17
|
+
it { expect(drop_none.select(&subject)).to eq(drop_none) }
|
|
18
|
+
it { expect(drop_some.select(&subject)).to eq(['1 First', '3 Third']) }
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
context 'given multiple regexes' do
|
|
22
|
+
subject { described_class.new(regex1, regex2) }
|
|
23
|
+
|
|
24
|
+
let(:regex1) { /\A\d{1,}.*/ }
|
|
25
|
+
let(:regex2) { /\A[A-Z].*/ }
|
|
26
|
+
|
|
27
|
+
let(:drop_all) { ['first', 'second', 'third'] }
|
|
28
|
+
let(:drop_none) { ['1. First', '2. Second', 'Third'] }
|
|
29
|
+
let(:drop_some) { ['First', 'second', '3 Third'] }
|
|
30
|
+
|
|
31
|
+
it { expect(drop_all.select(&subject)).to be_empty }
|
|
32
|
+
it { expect(drop_none.select(&subject)).to eq(drop_none) }
|
|
33
|
+
it { expect(drop_some.select(&subject)).to eq(['First', '3 Third']) }
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
context 'given multiple identical regexes' do
|
|
37
|
+
subject { described_class.new(regex1, regex1) }
|
|
38
|
+
|
|
39
|
+
let(:regex1) { /\A\d{1,}.*/ }
|
|
40
|
+
let(:single_regex_filter) { described_class.new(regex1) }
|
|
41
|
+
|
|
42
|
+
it { expect(subject.to_s).to eq(single_regex_filter.to_s) }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
context 'given distinct regexes in a different order' do
|
|
46
|
+
subject { described_class.new(regex1, regex2) }
|
|
47
|
+
|
|
48
|
+
let(:regex1) { /\A\d{1,}.*/ }
|
|
49
|
+
let(:regex2) { /\A[A-Z].*/ }
|
|
50
|
+
let(:different_order_filter) { described_class.new(regex2, regex1) }
|
|
51
|
+
|
|
52
|
+
it { expect(subject.to_s).to eq(different_order_filter.to_s) }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
describe '.from_file' do
|
|
56
|
+
let(:non_existent_file) { 'spec/fixtures/hasta/non_existent_file.dll' }
|
|
57
|
+
|
|
58
|
+
context 'given an non-existent file' do
|
|
59
|
+
it 'raises a ConfigurationError' do
|
|
60
|
+
expect {
|
|
61
|
+
described_class.from_file(non_existent_file)
|
|
62
|
+
}.to raise_error(Hasta::ConfigurationError)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/filtered_s3_file'
|
|
6
|
+
|
|
7
|
+
describe Hasta::FilteredS3File do
|
|
8
|
+
describe '#each_line' do
|
|
9
|
+
subject { described_class.new(s3_file, filter) }
|
|
10
|
+
|
|
11
|
+
let(:s3_file) { Hasta::S3File.new(fog_file) }
|
|
12
|
+
let(:fog_file) {
|
|
13
|
+
double('Fog::File',
|
|
14
|
+
:key => 'code/1986/HELLO.BAS',
|
|
15
|
+
:body => lines.join,
|
|
16
|
+
:etag => '696b61f4be8b11e383a37831c1ce6688'
|
|
17
|
+
)
|
|
18
|
+
}
|
|
19
|
+
let(:lines) {
|
|
20
|
+
[
|
|
21
|
+
"10 PRINT \"Hello World\"\n",
|
|
22
|
+
"20 GOTO 10\n",
|
|
23
|
+
"30 END\n",
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
context 'given a filter that drops every line' do
|
|
28
|
+
let(:filter) { Hasta::Filter.new(/.^/) }
|
|
29
|
+
let(:exp_fingerprint) { '89a35b5f6745b5bab65cb918e9933df4' }
|
|
30
|
+
|
|
31
|
+
it { expect(subject.each_line.to_a).to be_empty }
|
|
32
|
+
it { expect(subject.body).to be_empty }
|
|
33
|
+
it { expect(subject.fingerprint).to eq(exp_fingerprint) }
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
context 'given a filter that drops some lines' do
|
|
37
|
+
let(:filter) { Hasta::Filter.new(/\A[1,3]/) }
|
|
38
|
+
let(:exp_fingerprint) { '46426cde7e24ab46ed95913c02623593' }
|
|
39
|
+
|
|
40
|
+
it { expect(subject.each_line.to_a).to eq(lines.values_at(0, 2)) }
|
|
41
|
+
it { expect(subject.body).to eq(lines.values_at(0, 2).join) }
|
|
42
|
+
it { expect(subject.fingerprint).to eq(exp_fingerprint) }
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/identity_mapper'
|
|
6
|
+
require 'hasta/execution_context'
|
|
7
|
+
require 'hasta/in_memory_data_sink'
|
|
8
|
+
require 'hasta/in_memory_data_source'
|
|
9
|
+
|
|
10
|
+
describe Hasta::IdentityMapper do
|
|
11
|
+
describe '#map' do
|
|
12
|
+
subject { described_class }
|
|
13
|
+
|
|
14
|
+
let(:sink) { Hasta::InMemoryDataSink.new }
|
|
15
|
+
let(:lines) { [["First\n"], ["Second\n", "Third\n"]] }
|
|
16
|
+
let(:exp_lines) { lines.flatten.map(&:rstrip).map { |line| "#{line}\t" } }
|
|
17
|
+
let(:sources) { lines.map { |source_lines| Hasta::InMemoryDataSource.new(source_lines) } }
|
|
18
|
+
let(:context) { double(Hasta::ExecutionContext) }
|
|
19
|
+
|
|
20
|
+
it { expect(subject.map(context, sources, sink).data_source.to_a).to eq(exp_lines) }
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/identity_reducer'
|
|
6
|
+
require 'hasta/execution_context'
|
|
7
|
+
|
|
8
|
+
describe Hasta::IdentityReducer do
|
|
9
|
+
describe '#reduce' do
|
|
10
|
+
subject { described_class }
|
|
11
|
+
|
|
12
|
+
let(:sink) { Hasta::InMemoryDataSink.new }
|
|
13
|
+
let(:lines) { ["Key1\tFirst\n", "Key2\tSecond\n", "Key3\tThird\n"] }
|
|
14
|
+
let(:exp_lines) { lines.map(&:rstrip) }
|
|
15
|
+
let(:source) { Hasta::InMemoryDataSource.new(lines) }
|
|
16
|
+
let(:context) { double(Hasta::ExecutionContext) }
|
|
17
|
+
|
|
18
|
+
it { expect(subject.reduce(context, source, sink).data_source.to_a).to eq(exp_lines) }
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/interpolate_string'
|
|
6
|
+
|
|
7
|
+
describe Hasta::InterpolateString do
|
|
8
|
+
describe '#evaluate' do
|
|
9
|
+
subject { described_class.new(text) }
|
|
10
|
+
|
|
11
|
+
let(:context) { { 'scheduledStartTime' => Time.parse('2014-03-28T18:05:11Z') } }
|
|
12
|
+
|
|
13
|
+
context 'given static text' do
|
|
14
|
+
let(:text) { 'Static text' }
|
|
15
|
+
|
|
16
|
+
it { expect(subject.evaluate(context)).to eq(text) }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
context 'given text with a known interpolate time expression' do
|
|
20
|
+
let(:text) {
|
|
21
|
+
's3://my-bucket/path/to/dir/#{format(@scheduledStartTime,\'YYYY-MM-dd_HHmmss\')}/files/'
|
|
22
|
+
}
|
|
23
|
+
let(:interpolated_text) { 's3://my-bucket/path/to/dir/2014-03-28_180511/files/' }
|
|
24
|
+
|
|
25
|
+
it { expect(subject.evaluate(context)).to eq(interpolated_text) }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
context 'given text with a known interpolate date expression' do
|
|
29
|
+
let(:text) {
|
|
30
|
+
's3://my-bucket/path/to/dir/#{format(@scheduledStartTime,\'YYYY-MM-dd\')}/files/'
|
|
31
|
+
}
|
|
32
|
+
let(:interpolated_text) { 's3://my-bucket/path/to/dir/2014-03-28/files/' }
|
|
33
|
+
|
|
34
|
+
it { expect(subject.evaluate(context)).to eq(interpolated_text) }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
context 'given text with an unknown interpolate expression' do
|
|
38
|
+
let(:time) { '#{format(minusMinutes(@scheduledStartTime,30),\'YYYY-MM-dd hh:mm:ss\')}' }
|
|
39
|
+
let(:text) { "s3://my-bucket/path/to/dir/#{time}/files/" }
|
|
40
|
+
|
|
41
|
+
it { expect(subject.evaluate(context)).to eq(text) }
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/local_file_path'
|
|
6
|
+
|
|
7
|
+
describe Hasta::LocalFilePath do
|
|
8
|
+
describe '.for' do
|
|
9
|
+
subject { described_class.for(s3_uri) }
|
|
10
|
+
|
|
11
|
+
let(:s3_uri) { Hasta::S3URI.new(bucket_name, path) }
|
|
12
|
+
let(:bucket_name) { 'my-bucket' }
|
|
13
|
+
let(:path) { 'path/to/my/file.txt' }
|
|
14
|
+
let(:exp_path) { File.expand_path("#{Hasta.local_storage_root}/#{bucket_name}/#{path}") }
|
|
15
|
+
|
|
16
|
+
it { expect(subject).to eq(exp_path) }
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/local_file_path'
|
|
6
|
+
|
|
7
|
+
describe Hasta::LocalStorage do
|
|
8
|
+
subject { described_class.new(fog_storage) }
|
|
9
|
+
|
|
10
|
+
after do
|
|
11
|
+
FileUtils.rm_rf(tmpdir)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
let(:fog_storage) {
|
|
15
|
+
Fog::Storage.new(
|
|
16
|
+
:provider => 'Local',
|
|
17
|
+
:local_root => tmpdir,
|
|
18
|
+
:endpoint => 'http://example.com'
|
|
19
|
+
)
|
|
20
|
+
}
|
|
21
|
+
let(:tmpdir) { Dir.mktmpdir('hasta_local_storage_test') }
|
|
22
|
+
|
|
23
|
+
describe '#files_for' do
|
|
24
|
+
it_should_behave_like 'a storage service'
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
describe '#write' do
|
|
28
|
+
let(:s3_uri) { Hasta::S3URI.new(bucket_name, path) }
|
|
29
|
+
let(:bucket_name) { 'my-bucket' }
|
|
30
|
+
let(:content) { "Hi\nBye\nWhy?\n" }
|
|
31
|
+
let(:data_source) { StringIO.new(content) }
|
|
32
|
+
|
|
33
|
+
let(:result) { subject.write(s3_uri, data_source) }
|
|
34
|
+
let(:local_file_path) { Hasta::LocalFilePath.for(result) }
|
|
35
|
+
|
|
36
|
+
context 'given a directory uri' do
|
|
37
|
+
let(:path) { 'path/to/files/' }
|
|
38
|
+
let(:expected_uri) { s3_uri.append('part-00000') }
|
|
39
|
+
|
|
40
|
+
it { expect(result).to eq(expected_uri) }
|
|
41
|
+
it { expect(File.read(local_file_path)).to eq(content) }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
context 'given a file uri' do
|
|
45
|
+
let(:path) { 'path/to/files/file.txt' }
|
|
46
|
+
let(:expected_uri) { s3_uri }
|
|
47
|
+
|
|
48
|
+
it { expect(result).to eq(expected_uri) }
|
|
49
|
+
it { expect(File.read(local_file_path)).to eq(content) }
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/mapper'
|
|
6
|
+
|
|
7
|
+
describe Hasta::Mapper do
|
|
8
|
+
let(:mapper_file) { 'spec/fixtures/hasta/lib/test_identity_mapper.rb' }
|
|
9
|
+
|
|
10
|
+
describe '#map' do
|
|
11
|
+
subject { described_class.new(mapper_file) }
|
|
12
|
+
|
|
13
|
+
let(:input_source) { Hasta::InMemoryDataSource.new(input_lines, "Test Input") }
|
|
14
|
+
let(:input_sources) { [input_source] }
|
|
15
|
+
let(:input_lines) { ["Key1\tOne", "Key2\tTwo", "Key3\tThree", "Key4\tFour"] }
|
|
16
|
+
let(:output_lines) { input_lines }
|
|
17
|
+
let(:context) { Hasta::ExecutionContext.new }
|
|
18
|
+
|
|
19
|
+
let(:sink) { Hasta::InMemoryDataSink.new }
|
|
20
|
+
|
|
21
|
+
it 'writes all of the mapped lines to the sink' do
|
|
22
|
+
expect(subject.map(context, input_sources, sink)).to eq(sink)
|
|
23
|
+
expect(sink.data_source.to_a).to eq(input_lines)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/reducer'
|
|
6
|
+
require 'hasta/in_memory_data_source'
|
|
7
|
+
|
|
8
|
+
describe Hasta::Reducer do
|
|
9
|
+
describe '#reduce' do
|
|
10
|
+
subject { described_class.new(reducer_file) }
|
|
11
|
+
|
|
12
|
+
let(:reducer_file) { 'spec/fixtures/hasta/lib/unconventional_reducer.rb' }
|
|
13
|
+
let(:input_source) { Hasta::InMemoryDataSource.new(input_lines) }
|
|
14
|
+
let(:input_lines) { ["Small", "Medium", "Large"] }
|
|
15
|
+
let(:sorted_lines) { ["Large", "Medium", "Small"] }
|
|
16
|
+
let(:exp_lines) { sorted_lines.map { |line| "#{line}\t"} }
|
|
17
|
+
let(:context) { Hasta::ExecutionContext.new }
|
|
18
|
+
|
|
19
|
+
let(:sink) { Hasta::InMemoryDataSink.new }
|
|
20
|
+
|
|
21
|
+
it 'reducers over all of the input lines in sorted order' do
|
|
22
|
+
expect(subject.reduce(context, input_source, sink)).to eq(sink)
|
|
23
|
+
expect(sink.data_source.to_a).to eq(exp_lines)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Copyright Swipely, Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
require 'hasta/resolve_cached_s3_file'
|
|
6
|
+
|
|
7
|
+
describe Hasta::ResolveCachedS3File do
|
|
8
|
+
describe '#resolve' do
|
|
9
|
+
subject { described_class.new(file_cache, child_resolver) }
|
|
10
|
+
|
|
11
|
+
let(:file_cache) { double(Hasta::S3FileCache) }
|
|
12
|
+
let(:child_resolver) { Hasta::ResolveFilteredS3File.new(filter) }
|
|
13
|
+
let(:filter) { Hasta::Filter.new(/.*/) }
|
|
14
|
+
|
|
15
|
+
let(:fog_file) {
|
|
16
|
+
double('Fog::File',
|
|
17
|
+
:directory => fog_bucket,
|
|
18
|
+
:key => path,
|
|
19
|
+
:body => body
|
|
20
|
+
)
|
|
21
|
+
}
|
|
22
|
+
let(:fog_bucket) { double('Fog::Directory', :key => bucket_name) }
|
|
23
|
+
let(:bucket_name) { 'my-bucket' }
|
|
24
|
+
let(:path) { 'path/to/my/file.txt' }
|
|
25
|
+
let(:body) { "Parts\n" }
|
|
26
|
+
let(:s3_uri) { Hasta::S3URI.new(bucket_name, path) }
|
|
27
|
+
|
|
28
|
+
let(:exp_fingerprint) {
|
|
29
|
+
Digest::MD5.hexdigest("#{Digest::MD5.hexdigest(body)}_#{filter.to_s}")
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
let(:result) { subject.resolve(fog_file) }
|
|
33
|
+
|
|
34
|
+
context 'given the file is not cached' do
|
|
35
|
+
before do
|
|
36
|
+
file_cache.stub(:get).and_return(nil)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it 'caches the file' do
|
|
40
|
+
file_cache.should_receive(:put).with(exp_fingerprint, body)
|
|
41
|
+
|
|
42
|
+
expect(result.body).to eq(body)
|
|
43
|
+
expect(result.s3_uri).to eq(s3_uri)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
context 'given the file is cached' do
|
|
48
|
+
before do
|
|
49
|
+
file_cache.stub(:get).with(exp_fingerprint).and_return(cached_file)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
let(:cached_file) {
|
|
53
|
+
double('Fog::File',
|
|
54
|
+
:directory => double('Fog::Bucket', :key => 'cache_dir'),
|
|
55
|
+
:key => exp_fingerprint,
|
|
56
|
+
:body => body
|
|
57
|
+
)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
it 'retrieves the cached file' do
|
|
61
|
+
file_cache.should_not_receive(:put)
|
|
62
|
+
|
|
63
|
+
expect(result.body).to eq(body)
|
|
64
|
+
expect(result.s3_uri).to eq(s3_uri)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|