masamune 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +54 -0
  4. data/Rakefile +15 -0
  5. data/bin/masamune-elastic-mapreduce +4 -0
  6. data/bin/masamune-hive +4 -0
  7. data/bin/masamune-psql +4 -0
  8. data/bin/masamune-shell +4 -0
  9. data/lib/masamune.rb +56 -0
  10. data/lib/masamune/accumulate.rb +60 -0
  11. data/lib/masamune/actions.rb +38 -0
  12. data/lib/masamune/actions/data_flow.rb +131 -0
  13. data/lib/masamune/actions/date_parse.rb +75 -0
  14. data/lib/masamune/actions/elastic_mapreduce.rb +68 -0
  15. data/lib/masamune/actions/execute.rb +52 -0
  16. data/lib/masamune/actions/filesystem.rb +37 -0
  17. data/lib/masamune/actions/hadoop_filesystem.rb +40 -0
  18. data/lib/masamune/actions/hadoop_streaming.rb +41 -0
  19. data/lib/masamune/actions/hive.rb +74 -0
  20. data/lib/masamune/actions/postgres.rb +76 -0
  21. data/lib/masamune/actions/postgres_admin.rb +34 -0
  22. data/lib/masamune/actions/s3cmd.rb +44 -0
  23. data/lib/masamune/actions/transform.rb +89 -0
  24. data/lib/masamune/after_initialize_callbacks.rb +55 -0
  25. data/lib/masamune/cached_filesystem.rb +110 -0
  26. data/lib/masamune/commands.rb +37 -0
  27. data/lib/masamune/commands/elastic_mapreduce.rb +119 -0
  28. data/lib/masamune/commands/hadoop_filesystem.rb +57 -0
  29. data/lib/masamune/commands/hadoop_streaming.rb +116 -0
  30. data/lib/masamune/commands/hive.rb +178 -0
  31. data/lib/masamune/commands/interactive.rb +37 -0
  32. data/lib/masamune/commands/postgres.rb +128 -0
  33. data/lib/masamune/commands/postgres_admin.rb +72 -0
  34. data/lib/masamune/commands/postgres_common.rb +33 -0
  35. data/lib/masamune/commands/retry_with_backoff.rb +60 -0
  36. data/lib/masamune/commands/s3cmd.rb +70 -0
  37. data/lib/masamune/commands/shell.rb +202 -0
  38. data/lib/masamune/configuration.rb +195 -0
  39. data/lib/masamune/data_plan.rb +31 -0
  40. data/lib/masamune/data_plan/builder.rb +66 -0
  41. data/lib/masamune/data_plan/elem.rb +190 -0
  42. data/lib/masamune/data_plan/engine.rb +162 -0
  43. data/lib/masamune/data_plan/rule.rb +292 -0
  44. data/lib/masamune/data_plan/set.rb +176 -0
  45. data/lib/masamune/environment.rb +164 -0
  46. data/lib/masamune/filesystem.rb +567 -0
  47. data/lib/masamune/has_environment.rb +40 -0
  48. data/lib/masamune/helpers.rb +27 -0
  49. data/lib/masamune/helpers/postgres.rb +84 -0
  50. data/lib/masamune/io.rb +33 -0
  51. data/lib/masamune/last_element.rb +53 -0
  52. data/lib/masamune/method_logger.rb +41 -0
  53. data/lib/masamune/multi_io.rb +39 -0
  54. data/lib/masamune/schema.rb +36 -0
  55. data/lib/masamune/schema/catalog.rb +233 -0
  56. data/lib/masamune/schema/column.rb +527 -0
  57. data/lib/masamune/schema/dimension.rb +133 -0
  58. data/lib/masamune/schema/event.rb +121 -0
  59. data/lib/masamune/schema/fact.rb +133 -0
  60. data/lib/masamune/schema/map.rb +265 -0
  61. data/lib/masamune/schema/row.rb +133 -0
  62. data/lib/masamune/schema/store.rb +115 -0
  63. data/lib/masamune/schema/table.rb +308 -0
  64. data/lib/masamune/schema/table_reference.rb +76 -0
  65. data/lib/masamune/spec_helper.rb +23 -0
  66. data/lib/masamune/string_format.rb +34 -0
  67. data/lib/masamune/tasks/elastic_mapreduce_thor.rb +60 -0
  68. data/lib/masamune/tasks/hive_thor.rb +55 -0
  69. data/lib/masamune/tasks/postgres_thor.rb +47 -0
  70. data/lib/masamune/tasks/shell_thor.rb +63 -0
  71. data/lib/masamune/template.rb +77 -0
  72. data/lib/masamune/thor.rb +186 -0
  73. data/lib/masamune/thor_loader.rb +38 -0
  74. data/lib/masamune/topological_hash.rb +34 -0
  75. data/lib/masamune/transform.rb +47 -0
  76. data/lib/masamune/transform/bulk_upsert.psql.erb +64 -0
  77. data/lib/masamune/transform/bulk_upsert.rb +52 -0
  78. data/lib/masamune/transform/consolidate_dimension.rb +54 -0
  79. data/lib/masamune/transform/deduplicate_dimension.psql.erb +52 -0
  80. data/lib/masamune/transform/deduplicate_dimension.rb +53 -0
  81. data/lib/masamune/transform/define_event_view.hql.erb +51 -0
  82. data/lib/masamune/transform/define_event_view.rb +60 -0
  83. data/lib/masamune/transform/define_index.psql.erb +34 -0
  84. data/lib/masamune/transform/define_schema.hql.erb +23 -0
  85. data/lib/masamune/transform/define_schema.psql.erb +79 -0
  86. data/lib/masamune/transform/define_schema.rb +56 -0
  87. data/lib/masamune/transform/define_table.hql.erb +34 -0
  88. data/lib/masamune/transform/define_table.psql.erb +95 -0
  89. data/lib/masamune/transform/define_table.rb +40 -0
  90. data/lib/masamune/transform/define_unique.psql.erb +30 -0
  91. data/lib/masamune/transform/insert_reference_values.psql.erb +43 -0
  92. data/lib/masamune/transform/insert_reference_values.rb +64 -0
  93. data/lib/masamune/transform/load_dimension.rb +47 -0
  94. data/lib/masamune/transform/load_fact.rb +45 -0
  95. data/lib/masamune/transform/operator.rb +96 -0
  96. data/lib/masamune/transform/relabel_dimension.psql.erb +76 -0
  97. data/lib/masamune/transform/relabel_dimension.rb +39 -0
  98. data/lib/masamune/transform/rollup_fact.psql.erb +79 -0
  99. data/lib/masamune/transform/rollup_fact.rb +149 -0
  100. data/lib/masamune/transform/snapshot_dimension.psql.erb +75 -0
  101. data/lib/masamune/transform/snapshot_dimension.rb +74 -0
  102. data/lib/masamune/transform/stage_dimension.psql.erb +39 -0
  103. data/lib/masamune/transform/stage_dimension.rb +83 -0
  104. data/lib/masamune/transform/stage_fact.psql.erb +80 -0
  105. data/lib/masamune/transform/stage_fact.rb +111 -0
  106. data/lib/masamune/version.rb +25 -0
  107. data/spec/fixtures/aggregate.sql.erb +25 -0
  108. data/spec/fixtures/comment.sql.erb +27 -0
  109. data/spec/fixtures/invalid.sql.erb +23 -0
  110. data/spec/fixtures/relative.sql.erb +23 -0
  111. data/spec/fixtures/simple.sql.erb +28 -0
  112. data/spec/fixtures/whitespace.sql.erb +30 -0
  113. data/spec/masamune/actions/elastic_mapreduce_spec.rb +108 -0
  114. data/spec/masamune/actions/execute_spec.rb +50 -0
  115. data/spec/masamune/actions/hadoop_filesystem_spec.rb +44 -0
  116. data/spec/masamune/actions/hadoop_streaming_spec.rb +74 -0
  117. data/spec/masamune/actions/hive_spec.rb +117 -0
  118. data/spec/masamune/actions/postgres_admin_spec.rb +58 -0
  119. data/spec/masamune/actions/postgres_spec.rb +134 -0
  120. data/spec/masamune/actions/s3cmd_spec.rb +44 -0
  121. data/spec/masamune/actions/transform_spec.rb +144 -0
  122. data/spec/masamune/after_initialization_callbacks_spec.rb +61 -0
  123. data/spec/masamune/cached_filesystem_spec.rb +167 -0
  124. data/spec/masamune/commands/hadoop_filesystem_spec.rb +50 -0
  125. data/spec/masamune/commands/hadoop_streaming_spec.rb +106 -0
  126. data/spec/masamune/commands/hive_spec.rb +117 -0
  127. data/spec/masamune/commands/postgres_admin_spec.rb +69 -0
  128. data/spec/masamune/commands/postgres_spec.rb +100 -0
  129. data/spec/masamune/commands/retry_with_backoff_spec.rb +116 -0
  130. data/spec/masamune/commands/s3cmd_spec.rb +50 -0
  131. data/spec/masamune/commands/shell_spec.rb +101 -0
  132. data/spec/masamune/configuration_spec.rb +102 -0
  133. data/spec/masamune/data_plan/builder_spec.rb +91 -0
  134. data/spec/masamune/data_plan/elem_spec.rb +102 -0
  135. data/spec/masamune/data_plan/engine_spec.rb +356 -0
  136. data/spec/masamune/data_plan/rule_spec.rb +407 -0
  137. data/spec/masamune/data_plan/set_spec.rb +517 -0
  138. data/spec/masamune/environment_spec.rb +65 -0
  139. data/spec/masamune/filesystem_spec.rb +1421 -0
  140. data/spec/masamune/helpers/postgres_spec.rb +95 -0
  141. data/spec/masamune/schema/catalog_spec.rb +613 -0
  142. data/spec/masamune/schema/column_spec.rb +696 -0
  143. data/spec/masamune/schema/dimension_spec.rb +137 -0
  144. data/spec/masamune/schema/event_spec.rb +75 -0
  145. data/spec/masamune/schema/fact_spec.rb +117 -0
  146. data/spec/masamune/schema/map_spec.rb +593 -0
  147. data/spec/masamune/schema/row_spec.rb +28 -0
  148. data/spec/masamune/schema/store_spec.rb +49 -0
  149. data/spec/masamune/schema/table_spec.rb +395 -0
  150. data/spec/masamune/string_format_spec.rb +60 -0
  151. data/spec/masamune/tasks/elastic_mapreduce_thor_spec.rb +57 -0
  152. data/spec/masamune/tasks/hive_thor_spec.rb +75 -0
  153. data/spec/masamune/tasks/postgres_thor_spec.rb +42 -0
  154. data/spec/masamune/tasks/shell_thor_spec.rb +51 -0
  155. data/spec/masamune/template_spec.rb +77 -0
  156. data/spec/masamune/thor_spec.rb +238 -0
  157. data/spec/masamune/transform/bulk_upsert.dimension_spec.rb +200 -0
  158. data/spec/masamune/transform/consolidate_dimension_spec.rb +62 -0
  159. data/spec/masamune/transform/deduplicate_dimension_spec.rb +84 -0
  160. data/spec/masamune/transform/define_event_view_spec.rb +84 -0
  161. data/spec/masamune/transform/define_schema_spec.rb +83 -0
  162. data/spec/masamune/transform/define_table.dimension_spec.rb +306 -0
  163. data/spec/masamune/transform/define_table.fact_spec.rb +291 -0
  164. data/spec/masamune/transform/define_table.table_spec.rb +525 -0
  165. data/spec/masamune/transform/insert_reference_values.dimension_spec.rb +111 -0
  166. data/spec/masamune/transform/insert_reference_values.fact_spec.rb +149 -0
  167. data/spec/masamune/transform/load_dimension_spec.rb +76 -0
  168. data/spec/masamune/transform/load_fact_spec.rb +89 -0
  169. data/spec/masamune/transform/relabel_dimension_spec.rb +102 -0
  170. data/spec/masamune/transform/rollup_fact_spec.rb +333 -0
  171. data/spec/masamune/transform/snapshot_dimension_spec.rb +103 -0
  172. data/spec/masamune/transform/stage_dimension_spec.rb +115 -0
  173. data/spec/masamune/transform/stage_fact_spec.rb +204 -0
  174. data/spec/masamune_spec.rb +32 -0
  175. data/spec/spec_helper.rb +41 -0
  176. data/spec/support/masamune/example_group.rb +36 -0
  177. data/spec/support/masamune/mock_command.rb +99 -0
  178. data/spec/support/masamune/mock_delegate.rb +51 -0
  179. data/spec/support/masamune/mock_filesystem.rb +96 -0
  180. data/spec/support/masamune/thor_mute.rb +35 -0
  181. data/spec/support/rspec/example/action_example_group.rb +34 -0
  182. data/spec/support/rspec/example/task_example_group.rb +80 -0
  183. data/spec/support/rspec/example/transform_example_group.rb +36 -0
  184. data/spec/support/shared_examples/postgres_common_examples.rb +53 -0
  185. metadata +462 -0
@@ -0,0 +1,65 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ describe Masamune::Environment do
26
+ let(:instance) { described_class.new }
27
+ let(:run_dir) { Dir.mktmpdir('masamune') }
28
+
29
+ describe '#with_exclusive_lock' do
30
+ context 'when run_dir not defined' do
31
+ it { expect { |b| instance.with_exclusive_lock('some_lock', &b) }.to raise_error /filesystem path :run_dir not defined/ }
32
+ end
33
+
34
+ context 'when lock can be acquired' do
35
+ before do
36
+ instance.filesystem.add_path(:run_dir, run_dir)
37
+ expect_any_instance_of(File).to receive(:flock).twice.and_return(0)
38
+ expect(instance.logger).to receive(:debug).with(%q{acquiring lock 'some_lock'})
39
+ expect(instance.logger).to receive(:debug).with(%q{releasing lock 'some_lock'})
40
+ end
41
+ it { expect { |b| instance.with_exclusive_lock('some_lock', &b) }.to yield_control }
42
+ end
43
+
44
+ context 'with lock configuration' do
45
+ before do
46
+ instance.filesystem.add_path(:run_dir, run_dir)
47
+ instance.configuration.lock = 'long_running'
48
+ expect_any_instance_of(File).to receive(:flock).twice.and_return(0)
49
+ expect(instance.logger).to receive(:debug).with(%q{acquiring lock 'some_lock:long_running'})
50
+ expect(instance.logger).to receive(:debug).with(%q{releasing lock 'some_lock:long_running'})
51
+ end
52
+ it { expect { |b| instance.with_exclusive_lock('some_lock', &b) }.to yield_control }
53
+ end
54
+
55
+ context 'when lock cannot be acquired' do
56
+ before do
57
+ instance.filesystem.add_path(:run_dir, run_dir)
58
+ expect(instance.logger).to receive(:error).with(/acquire lock attempt failed for 'some_lock'/)
59
+ expect_any_instance_of(File).to receive(:flock).twice.and_return(1)
60
+ end
61
+
62
+ it { expect { |b| instance.with_exclusive_lock('some_lock', &b) }.to_not raise_error }
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,1421 @@
1
+ # The MIT License (MIT)
2
+ #
3
+ # Copyright (c) 2014-2015, VMware, Inc. All Rights Reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ require 'spec_helper'
24
+
25
+ require 'securerandom'
26
+
27
+ # NOTE when operating between hdfs and s3, hadoop fs requires s3n URI
28
+ # See: http://wiki.apache.org/hadoop/AmazonS3
29
+ shared_examples_for 'Filesystem' do
30
+ let(:filesystem) { Masamune::Filesystem.new }
31
+
32
+ let(:tmp_dir) { File.join(Dir.tmpdir, SecureRandom.hex, SecureRandom.hex) }
33
+ let!(:old_dir) { File.join(tmp_dir, SecureRandom.hex) }
34
+ let(:new_dir) { File.join(tmp_dir, SecureRandom.hex) }
35
+ let(:other_new_dir) { File.join(tmp_dir, SecureRandom.hex) }
36
+ let(:new_file) { File.join(old_dir, SecureRandom.hex) }
37
+ let(:other_new_file) { File.join(old_dir, SecureRandom.hex) }
38
+ let(:old_file) { File.join(old_dir, SecureRandom.hex + '.txt') }
39
+
40
+ before do
41
+ filesystem.configuration.retries = 0
42
+ FileUtils.mkdir_p(old_dir)
43
+ FileUtils.touch(old_file)
44
+ end
45
+
46
+ after do
47
+ FileUtils.rmdir(tmp_dir)
48
+ end
49
+
50
+ describe '#get_path' do
51
+ context 'after add_path is called' do
52
+ before do
53
+ instance.add_path(:home_dir, '/home')
54
+ end
55
+ it { expect(instance.get_path(:home_dir)).to eq('/home') }
56
+
57
+ context 'with extra directories' do
58
+ it { expect(instance.get_path(:home_dir, 'a', 'b', 'c')).to eq('/home/a/b/c') }
59
+ end
60
+
61
+ context 'with extra directories delimited by "/"' do
62
+ it { expect(instance.get_path(:home_dir, '/a/b', 'c')).to eq('/home/a/b/c') }
63
+ end
64
+
65
+ context 'with parameter substitution' do
66
+ before do
67
+ instance.configuration.params[:user] = 'zombo'
68
+ instance.add_path(:user_path, '/home/%user/files')
69
+ end
70
+ it { expect(instance.get_path(:user_path)).to eq('/home/zombo/files') }
71
+
72
+ context 'in extra section' do
73
+ before do
74
+ instance.configuration.params[:file] = 'anything_is_possible.txt'
75
+ end
76
+ it { expect(instance.get_path(:user_path, '%file')).to eq('/home/zombo/files/anything_is_possible.txt') }
77
+ end
78
+ end
79
+ end
80
+
81
+ context 'before add_path is called' do
82
+ it { expect(instance.get_path(:home_dir)).to be_a(Proc) }
83
+ end
84
+ end
85
+
86
+ describe '#parent_paths' do
87
+ subject { instance.parent_paths(path) }
88
+
89
+ context 'with local blank' do
90
+ let(:path) { '' }
91
+ it { is_expected.to eq([]) }
92
+ end
93
+
94
+ context 'with local path with slash' do
95
+ let(:path) { '/a/b/c' }
96
+ it { is_expected.to eq(['/', '/a', '/a/b']) }
97
+ end
98
+
99
+ context 'with local path without slash' do
100
+ let(:path) { 'a/b/c' }
101
+ it { is_expected.to eq(['a', 'a/b']) }
102
+ end
103
+
104
+ context 'with s3 bucket with blank' do
105
+ let(:path) { 's3://bucket' }
106
+ it { is_expected.to eq([]) }
107
+ end
108
+
109
+ context 'with s3 bucket with slash' do
110
+ let(:path) { 's3://bucket/' }
111
+ it { is_expected.to eq([]) }
112
+ end
113
+
114
+ context 'with s3 bucket with path' do
115
+ let(:path) { 's3://bucket/a/b/c' }
116
+ it { is_expected.to eq(['s3://bucket/', 's3://bucket/a', 's3://bucket/a/b']) }
117
+ end
118
+
119
+ context 'with hdfs directory with path' do
120
+ let(:path) { 'hdfs:///a/b/c' }
121
+ it { is_expected.to eq(['hdfs:///', 'hdfs:///a', 'hdfs:///a/b']) }
122
+ end
123
+ end
124
+
125
+ describe '#root_path?' do
126
+ subject { instance.root_path?(path) }
127
+
128
+ context 'with nil' do
129
+ let(:path) { nil }
130
+ it { expect { |b| subject }.to raise_error ArgumentError }
131
+ end
132
+
133
+ context 'with blank' do
134
+ let(:path) { ' ' }
135
+ it { expect { |b| subject }.to raise_error ArgumentError }
136
+ end
137
+
138
+ context 'with empty' do
139
+ let(:path) { '' }
140
+ it { expect { |b| subject }.to raise_error ArgumentError }
141
+ end
142
+
143
+ context 'with relative path' do
144
+ let(:path) { 'tmp' }
145
+ it { expect { |b| subject }.to raise_error ArgumentError }
146
+ end
147
+
148
+ context 'with local root' do
149
+ let(:path) { '/' }
150
+ it { is_expected.to eq(true) }
151
+ end
152
+
153
+ context 'with local non-root' do
154
+ let(:path) { '/tmp' }
155
+ it { is_expected.to eq(false) }
156
+ end
157
+
158
+ context 'with hdfs root' do
159
+ let(:path) { 'file:///' }
160
+ it { is_expected.to eq(true) }
161
+ end
162
+
163
+ context 'with hdfs non-root' do
164
+ let(:path) { 'file:///tmp' }
165
+ it { is_expected.to eq(false) }
166
+ end
167
+
168
+ context 'with s3 root' do
169
+ let(:path) { 's3://bucket/' }
170
+ it { is_expected.to eq(true) }
171
+ end
172
+
173
+ context 'with s3 non-root' do
174
+ let(:path) { 's3://bucket/tmp' }
175
+ it { is_expected.to eq(false) }
176
+ end
177
+
178
+ context 'with s3 bucket' do
179
+ let(:path) { 's3://bucket' }
180
+ it { is_expected.to eq(true) }
181
+ end
182
+ end
183
+
184
+ describe '#resolve_file' do
185
+ subject { instance.resolve_file(paths) }
186
+
187
+ context 'with nil' do
188
+ let(:paths) { nil }
189
+ it { is_expected.to be_nil }
190
+ end
191
+
192
+ context 'with empty' do
193
+ let(:paths) { [] }
194
+ it { is_expected.to be_nil }
195
+ end
196
+
197
+ context 'with one file' do
198
+ let(:paths) { old_file }
199
+ it { is_expected.to eq(old_file) }
200
+ end
201
+
202
+ context 'with directories and file' do
203
+ let(:paths) { [old_dir, new_dir, new_file, old_file] }
204
+ it { is_expected.to eq(old_file) }
205
+ end
206
+ end
207
+
208
+ describe '#dirname' do
209
+ subject { instance.dirname(path) }
210
+
211
+ context 'with local blank' do
212
+ let(:path) { '' }
213
+ it { is_expected.to be_blank }
214
+ end
215
+
216
+ context 'with local path with slash' do
217
+ let(:path) { '/a/b/c' }
218
+ it { is_expected.to eq('/a/b') }
219
+ end
220
+
221
+ context 'with local path without slash' do
222
+ let(:path) { 'a/b/c' }
223
+ it { is_expected.to eq('a/b') }
224
+ end
225
+
226
+ context 'with local relative path' do
227
+ let(:path) { '/a/b/../c' }
228
+ it { is_expected.to eq('/a/c') }
229
+ end
230
+
231
+ context 'with s3 bucket with blank' do
232
+ let(:path) { 's3://bucket' }
233
+ it { is_expected.to eq('s3://bucket') }
234
+ end
235
+
236
+ context 'with s3 bucket with slash' do
237
+ let(:path) { 's3://bucket/' }
238
+ it { is_expected.to eq('s3://bucket/') }
239
+ end
240
+
241
+ context 'with s3 bucket with path' do
242
+ let(:path) { 's3://bucket/a/b/c' }
243
+ it { is_expected.to eq('s3://bucket/a/b') }
244
+ end
245
+
246
+ context 'with s3 bucket with relative path' do
247
+ let(:path) { 's3://bucket/a/b/../c' }
248
+ it { is_expected.to eq('s3://bucket/a/c') }
249
+ end
250
+
251
+ context 'with hdfs directory with path' do
252
+ let(:path) { 'hdfs:///a/b/c' }
253
+ it { is_expected.to eq('hdfs:///a/b') }
254
+ end
255
+
256
+ context 'with hdfs directory with path' do
257
+ let(:path) { 'hdfs:///a/b/c' }
258
+ it { is_expected.to eq('hdfs:///a/b') }
259
+ end
260
+
261
+ context 'with hdfs directory with relative path' do
262
+ let(:path) { 'hdfs:///a/b/../c' }
263
+ it { is_expected.to eq('hdfs:///a/c') }
264
+ end
265
+ end
266
+
267
+ describe '#basename' do
268
+ subject { instance.basename(path) }
269
+
270
+ context 'with local blank' do
271
+ let(:path) { '' }
272
+ it { is_expected.to be_blank }
273
+ end
274
+
275
+ context 'with local path with slash' do
276
+ let(:path) { '/a/b/c' }
277
+ it { is_expected.to eq('c') }
278
+ end
279
+
280
+ context 'with local path without slash' do
281
+ let(:path) { 'a/b/c' }
282
+ it { is_expected.to eq('c') }
283
+ end
284
+
285
+ context 'with local relative path' do
286
+ let(:path) { '/a/b/../c' }
287
+ it { is_expected.to eq('c') }
288
+ end
289
+
290
+ context 'with s3 bucket with blank' do
291
+ let(:path) { 's3://bucket' }
292
+ it { is_expected.to be_nil }
293
+ end
294
+
295
+ context 'with s3 bucket with slash' do
296
+ let(:path) { 's3://bucket/' }
297
+ it { is_expected.to be_nil }
298
+ end
299
+
300
+ context 'with s3 bucket with path' do
301
+ let(:path) { 's3://bucket/a/b/c' }
302
+ it { is_expected.to eq('c') }
303
+ end
304
+
305
+ context 'with s3 bucket with relative path' do
306
+ let(:path) { 's3://bucket/a/b/../c' }
307
+ it { is_expected.to eq('c') }
308
+ end
309
+
310
+ context 'with hdfs directory with path' do
311
+ let(:path) { 'hdfs:///a/b/c' }
312
+ it { is_expected.to eq('c') }
313
+ end
314
+
315
+ context 'with hdfs directory with path' do
316
+ let(:path) { 'hdfs:///a/b/c' }
317
+ it { is_expected.to eq('c') }
318
+ end
319
+
320
+ context 'with hdfs directory with relative path' do
321
+ let(:path) { 'hdfs:///a/b/../c' }
322
+ it { is_expected.to eq('c') }
323
+ end
324
+ end
325
+
326
+ describe '#touch!' do
327
+ subject do
328
+ File.exists?(new_file) && File.exists?(other_new_file)
329
+ end
330
+
331
+ context 'local' do
332
+ before do
333
+ instance.touch!(new_file, other_new_file)
334
+ end
335
+ it { is_expected.to eq(true) }
336
+ end
337
+
338
+ context 'hdfs' do
339
+ it do
340
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + old_dir).once
341
+ expect(filesystem).to receive(:hadoop_fs).with('-touchz', 'file://' + new_file, 'file://' + other_new_file).once
342
+ instance.touch!('file://' + new_file, 'file://' + other_new_file)
343
+ end
344
+ end
345
+
346
+ context 's3' do
347
+ it do
348
+ expect(filesystem).to receive(:s3cmd).with('put', an_instance_of(String), 's3://bucket/file').at_most(:once)
349
+ expect(filesystem).to receive(:s3cmd).with('put', an_instance_of(String), 's3://bucket/other_file').at_most(:once)
350
+ instance.touch!('s3://bucket/file', 's3://bucket/other_file')
351
+ end
352
+ end
353
+ end
354
+
355
+ describe '#exists?' do
356
+ context 'local missing file' do
357
+ subject { instance.exists?(new_file) }
358
+ it { is_expected.to eq(false) }
359
+ end
360
+
361
+ context 'hdfs missing file' do
362
+ before do
363
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
364
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}")
365
+ expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + new_file, safe: true).at_most(:once).and_return(mock_failure)
366
+ end
367
+ subject { instance.exists?('file://' + new_file) }
368
+ it { is_expected.to eq(false) }
369
+ end
370
+
371
+ context 'local existing file' do
372
+ subject { instance.exists?(old_file) }
373
+ it { is_expected.to eq(true) }
374
+ end
375
+
376
+ context 'hdfs existing file' do
377
+ before do
378
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
379
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
380
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
381
+ expect(filesystem).to receive(:hadoop_fs).with('-test', '-e', 'file://' + old_file, safe: true).at_most(:once).and_return(mock_success)
382
+ end
383
+ subject { instance.exists?('file://' + old_file) }
384
+ it { is_expected.to eq(true) }
385
+ end
386
+
387
+ context 's3 existing file' do
388
+ before do
389
+ expect(filesystem).to receive(:s3cmd).with('ls', 's3://bucket/00', safe: true).at_most(:once).
390
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/00)).
391
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/01))
392
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', 's3://bucket/00', safe: true).at_most(:once).
393
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/00)).
394
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/01))
395
+ end
396
+
397
+ subject { instance.exists?('s3://bucket/00') }
398
+
399
+ it { is_expected.to eq(true) }
400
+ end
401
+
402
+ context 's3 missing file' do
403
+ before do
404
+ expect(filesystem).to receive(:s3cmd).with('ls', 's3://bucket/0', safe: true).at_most(:once).
405
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/00)).
406
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/01))
407
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', 's3://bucket/0', safe: true).at_most(:once).
408
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/00)).
409
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/01))
410
+ end
411
+
412
+ subject { instance.exists?('s3://bucket/0') }
413
+
414
+ it { is_expected.to eq(false) }
415
+ end
416
+ end
417
+
418
+ describe '#stat' do
419
+ subject(:stat) { result }
420
+ context 'local missing file' do
421
+ let(:result) { instance.stat(new_file) }
422
+ it { is_expected.to be_nil }
423
+ end
424
+
425
+ context 'hdfs missing file' do
426
+ before do
427
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
428
+ and_yield('')
429
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_dir + '/*', safe: true).at_most(:once).
430
+ and_yield('')
431
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + new_file + '/*', safe: true).at_most(:once).
432
+ and_yield('')
433
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + new_file, safe: true).at_most(:once).
434
+ and_yield('')
435
+ end
436
+ let(:result) { instance.stat('file://' + new_file) }
437
+ it { is_expected.to be_nil }
438
+ end
439
+
440
+ context 's3 missing file' do
441
+ before do
442
+ expect(filesystem).to receive(:s3cmd).with('ls', 's3://bucket/', safe: true).at_most(:once)
443
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', %r{s3://bucket/[\*|file.txt]}, safe: true).
444
+ and_yield('')
445
+ end
446
+ let(:result) { instance.stat('s3://bucket/file.txt') }
447
+ it { is_expected.to be_nil }
448
+ end
449
+
450
+ context 'local existing file' do
451
+ let(:result) { instance.stat(old_file) }
452
+
453
+ describe '#name' do
454
+ subject { stat.name }
455
+ it { is_expected.to eq(old_file) }
456
+ end
457
+
458
+ describe '#mtime' do
459
+ subject { stat.mtime }
460
+ it { is_expected.to eq(File.stat(old_file).mtime.at_beginning_of_minute.utc) }
461
+ it { is_expected.to be_a(Time) }
462
+ end
463
+
464
+ describe '#size' do
465
+ subject { stat.size }
466
+ it { is_expected.to be_an(Integer) }
467
+ end
468
+ end
469
+
470
+ context 'local existing file with glob' do
471
+ let(:result) { instance.stat(File.join(old_dir, '*')) }
472
+ it { expect { result }.to raise_error ArgumentError }
473
+ end
474
+
475
+ context 'local existing file (recursive)' do
476
+ let(:result) { instance.stat(File.join(tmp_dir, '*')) }
477
+ it { expect { result }.to raise_error /cannot contain wildcard/ }
478
+ end
479
+
480
+ context 'hdfs existing file' do
481
+ before do
482
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
483
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_dir}").
484
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
485
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_file, safe: true).at_most(:once).
486
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
487
+ end
488
+
489
+ let(:result) { instance.stat('file://' + old_file) }
490
+
491
+ describe '#name' do
492
+ subject { stat.name }
493
+ it { is_expected.to eq('file://' + old_file) }
494
+ end
495
+
496
+ describe '#mtime' do
497
+ subject { stat.mtime }
498
+ it { is_expected.to eq(Time.parse('2015-02-24 12:09:00 +0000')) }
499
+ it { is_expected.to be_a(Time) }
500
+ end
501
+
502
+ describe '#size' do
503
+ subject { stat.size }
504
+ it { is_expected.to eq(68) }
505
+ end
506
+ end
507
+
508
+ context 's3 existing file' do
509
+ before do
510
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', %r{s3://bucket/[\*|file.txt]}, safe: true).
511
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/file.txt))
512
+ end
513
+ let(:result) { instance.stat('s3://bucket/file.txt') }
514
+
515
+ describe '#name' do
516
+ subject { stat.name }
517
+ it { is_expected.to eq('s3://bucket/file.txt') }
518
+ end
519
+
520
+ describe '#mtime' do
521
+ subject { stat.mtime }
522
+ it { is_expected.to eq(Time.parse('2013-05-24 18:52:00 +0000')) }
523
+ it { is_expected.to be_a(Time) }
524
+ end
525
+
526
+ describe '#size' do
527
+ subject { stat.size }
528
+ it { is_expected.to eq(2912) }
529
+ end
530
+ end
531
+
532
+ context 's3 existing directory' do
533
+ before do
534
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', %r{s3://bucket/[\*|dir]}, safe: true).
535
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/dir/file.txt))
536
+ end
537
+
538
+ let(:result) { instance.stat('s3://bucket/dir') }
539
+
540
+ describe '#name' do
541
+ subject { stat.name }
542
+ it { is_expected.to eq('s3://bucket/dir/file.txt') }
543
+ end
544
+
545
+ describe '#mtime' do
546
+ subject { stat.mtime }
547
+ it { is_expected.to eq(Time.parse('2013-05-24 18:52:00 +0000')) }
548
+ it { is_expected.to be_a(Time) }
549
+ end
550
+
551
+ describe '#size' do
552
+ subject { stat.size }
553
+ it { is_expected.to eq(2912) }
554
+ end
555
+ end
556
+ end
557
+
558
+ describe '#mkdir!' do
559
+ subject do
560
+ Dir.exists?(new_dir) && Dir.exists?(other_new_dir)
561
+ end
562
+
563
+ context 'local directory' do
564
+ before do
565
+ instance.mkdir!(new_dir, other_new_dir)
566
+ end
567
+ it { is_expected.to eq(true) }
568
+ end
569
+
570
+ context 'hdfs directory' do
571
+ it do
572
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir, 'file://' + other_new_dir).once
573
+ instance.mkdir!('file://' + new_dir, 'file://' + other_new_dir)
574
+ end
575
+ end
576
+
577
+ context 's3 directory' do
578
+ it do
579
+ expect(filesystem).to receive(:s3cmd).with('put', an_instance_of(String), 's3://bucket/dir/.not_empty').at_most(:once)
580
+ expect(filesystem).to receive(:s3cmd).with('put', an_instance_of(String), 's3://bucket/other_dir/.not_empty').at_most(:once)
581
+ instance.mkdir!('s3://bucket/dir', 's3://bucket/other_dir')
582
+ end
583
+ end
584
+ end
585
+
586
+ describe '#glob' do
587
+ subject do
588
+ instance.glob(pattern)
589
+ end
590
+
591
+ context 'local no matches' do
592
+ let(:pattern) { File.join(new_dir, '*') }
593
+ it { is_expected.to be_empty }
594
+ it { expect { |b| instance.glob(pattern, &b) }.to_not yield_control }
595
+ end
596
+
597
+ context 'local one matches' do
598
+ let(:pattern) { File.join(File.dirname(old_file), '*') }
599
+ it { is_expected.not_to be_empty }
600
+ it { expect { |b| instance.glob(pattern, &b) }.to yield_with_args(old_file) }
601
+ end
602
+
603
+ context 'local one matches (recursive)' do
604
+ let(:pattern) { File.join(tmp_dir, '*') }
605
+ it 'has 2 items' do
606
+ expect(subject.size).to eq(2)
607
+ end
608
+ it { is_expected.to include old_dir }
609
+ it { is_expected.to include old_file }
610
+ it { expect { |b| instance.glob(pattern, &b) }.to yield_successive_args(old_dir, old_file) }
611
+ end
612
+
613
+ context 'local one matches (with suffix)' do
614
+ let(:pattern) { File.join(File.dirname(old_file), '*.txt') }
615
+ it 'has 1 item' do
616
+ expect(subject.size).to eq(1)
617
+ end
618
+ it { is_expected.to include old_file }
619
+ it { expect { |b| instance.glob(pattern, &b) }.to yield_with_args(old_file) }
620
+ end
621
+
622
+ context 'hdfs no matches' do
623
+ before do
624
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', 'file://' + new_dir + '/*', safe: true).at_most(:once).
625
+ and_yield('')
626
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + new_dir + '/*', safe: true).at_most(:once).
627
+ and_yield('')
628
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(new_dir) + '/*', safe: true).at_most(:once).
629
+ and_yield('')
630
+ end
631
+ let(:pattern) { File.join(new_dir, '*') }
632
+ it { is_expected.to be_empty }
633
+ it { expect { |b| instance.glob('file://' + pattern, &b) }.to_not yield_control }
634
+ end
635
+
636
+ context 'hdfs one matches' do
637
+ before do
638
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', 'file://' + old_dir + '/*', safe: true).at_most(:once).
639
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
640
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + old_dir + '/*', safe: true).at_most(:once).
641
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
642
+ expect(filesystem).to receive(:hadoop_fs).with('-ls', '-R', 'file://' + File.dirname(old_dir) + '/*', safe: true).at_most(:once).
643
+ and_yield("drwxrwxrwt - root wheel 68 2015-02-24 12:09 #{old_file}")
644
+ end
645
+ let(:pattern) { File.join(File.dirname(old_file), '*') }
646
+ it { is_expected.not_to be_empty }
647
+ it { expect { |b| instance.glob('file://' + pattern, &b) }.to yield_with_args('file://' + old_file) }
648
+ end
649
+
650
+ context 's3 no matches' do
651
+ let(:pattern) { 's3://bucket/dir/*.txt' }
652
+
653
+ before do
654
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', "s3://bucket/dir", safe: true).at_most(:once)
655
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', "s3://bucket/dir/*", safe: true).at_most(:once)
656
+ end
657
+
658
+ it { is_expected.to be_empty }
659
+ end
660
+
661
+ context 's3 no matches with implicit glob results' do
662
+ let(:pattern) { 's3://bucket/dir/0' }
663
+
664
+ before do
665
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', %r{s3://bucket/[\*|dir/*]}, safe: true).
666
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/dir/01.txt)).
667
+ and_yield(%q(2013-05-24 18:53 2912 s3://bucket/dir/02.txt))
668
+ end
669
+
670
+ it { is_expected.to be_empty }
671
+ end
672
+
673
+ context 's3 one matches' do
674
+ let(:pattern) { 's3://bucket/dir/*.txt' }
675
+
676
+ before do
677
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', %r{s3://bucket/[\*|dir/*]}, safe: true).
678
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/dir/file.txt)).
679
+ and_yield(%q(2013-05-24 18:53 2912 s3://bucket/dir/file.csv))
680
+ end
681
+
682
+ it { is_expected.to include 's3://bucket/dir/file.txt' }
683
+ it { is_expected.not_to include 's3://bucket/dir/file.csv' }
684
+ end
685
+
686
+ context 's3 many matches' do
687
+ let(:pattern) { 's3://bucket/dir/*' }
688
+
689
+ before do
690
+ expect(filesystem).to receive(:s3cmd).with('ls', '--recursive', %r{s3://bucket/[\*|dir/*]}, safe: true).
691
+ and_yield(%q( DIR s3://bucket/dir/file_$folder$)).
692
+ and_yield(%q(2013-05-24 18:52 2912 s3://bucket/dir/file.txt)).
693
+ and_yield(%q(2013-05-24 18:53 2912 s3://bucket/dir/file.csv))
694
+ end
695
+
696
+ it { is_expected.to include 's3://bucket/dir/file.txt' }
697
+ it { is_expected.to include 's3://bucket/dir/file.csv' }
698
+ end
699
+ end
700
+
701
+ describe '#glob_sort' do
702
+ before do
703
+ allow_any_instance_of(Masamune::Filesystem).to receive(:glob).and_return(%w(/tmp/a/02.txt /tmp/b/01.txt /tmp/c/00.txt))
704
+ end
705
+
706
+ subject do
707
+ instance.glob_sort('/tmp/*', order: :basename)
708
+ end
709
+
710
+ it { is_expected.to eq(%w(/tmp/c/00.txt /tmp/b/01.txt /tmp/a/02.txt)) }
711
+ end
712
+
713
+ describe '#copy_file_to_file' do
714
+ let(:result_file) { File.join(new_dir, File.basename(old_file)) }
715
+
716
+ subject do
717
+ File.exists?(result_file)
718
+ end
719
+
720
+ context 'local file to local file' do
721
+ before do
722
+ instance.copy_file_to_file(old_file, result_file)
723
+ end
724
+
725
+ it { is_expected.to eq(true) }
726
+ end
727
+
728
+ context 'local file to s3 file' do
729
+ it do
730
+ expect(filesystem).to receive(:s3cmd).with('put', old_file, 's3://bucket/new_dir/new_file')
731
+ instance.copy_file_to_file(old_file, 's3://bucket/new_dir/new_file')
732
+ end
733
+ end
734
+
735
+ context 'local file to hdfs file' do
736
+ it do
737
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
738
+ expect(filesystem).to receive(:hadoop_fs).with('-copyFromLocal', 'file://' + old_file, 'file://' + result_file)
739
+ instance.copy_file_to_file(old_file, 'file://' + result_file)
740
+ end
741
+ end
742
+
743
+ context 'hdfs file to hdfs file' do
744
+ it do
745
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
746
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_file, 'file://' + result_file)
747
+ instance.copy_file_to_file('file://' + old_file, 'file://' + result_file)
748
+ end
749
+ end
750
+
751
+ context 'hdfs file to local file' do
752
+ it do
753
+ expect(filesystem).to receive(:hadoop_fs).with('-copyToLocal', 'file://' + old_file, 'file://' + result_file)
754
+ instance.copy_file_to_file('file://' + old_file, result_file)
755
+ end
756
+ end
757
+
758
+ context 'hdfs file to s3 file' do
759
+ it do
760
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_file, 's3n://bucket/new_dir/new_file')
761
+ instance.copy_file_to_file('file://' + old_file, 's3://bucket/new_dir/new_file')
762
+ end
763
+ end
764
+
765
+ context 's3 file to s3 file' do
766
+ it do
767
+ expect(filesystem).to receive(:s3cmd).with('cp', 's3://bucket/old_file', 's3://bucket/new_dir/new_file')
768
+ instance.copy_file_to_file('s3://bucket/old_file', 's3://bucket/new_dir/new_file')
769
+ end
770
+ end
771
+
772
+ context 's3 file to local file' do
773
+ it do
774
+ expect(filesystem).to receive(:s3cmd).with('get', 's3://bucket/old_file', new_file)
775
+ instance.copy_file_to_file('s3://bucket/old_file', new_file)
776
+ end
777
+ end
778
+
779
+ context 's3 file to hdfs file' do
780
+ it do
781
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
782
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 's3n://bucket/old_file', 'file://' + result_file)
783
+ instance.copy_file_to_file('s3://bucket/old_file', 'file://' + result_file)
784
+ end
785
+ end
786
+ end
787
+
788
+ describe '#copy_file_to_dir' do
789
+ let(:result_file) { File.join(new_dir, File.basename(old_file)) }
790
+
791
+ subject do
792
+ File.exists?(result_file)
793
+ end
794
+
795
+ context 'local file to local dir' do
796
+ before do
797
+ instance.copy_file_to_dir(old_file, new_dir)
798
+ end
799
+
800
+ it { is_expected.to eq(true) }
801
+ end
802
+
803
+ context 'local file to same local dir' do
804
+ before do
805
+ instance.copy_file_to_dir(old_file, old_dir)
806
+ end
807
+
808
+ it { is_expected.to eq(false) }
809
+ end
810
+
811
+ context 'local file to s3 dir' do
812
+ it do
813
+ expect(filesystem).to receive(:s3cmd).with('put', old_file, 's3://bucket/new_dir/')
814
+ instance.copy_file_to_dir(old_file, 's3://bucket/new_dir')
815
+ end
816
+ end
817
+
818
+ context 'local file to hdfs dir' do
819
+ it do
820
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
821
+ expect(filesystem).to receive(:hadoop_fs).with('-copyFromLocal', 'file://' + old_file, 'file://' + new_dir)
822
+ instance.copy_file_to_dir(old_file, 'file://' + new_dir)
823
+ end
824
+ end
825
+
826
+ context 'hdfs file to hdfs dir' do
827
+ it do
828
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
829
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_file, 'file://' + new_dir)
830
+ instance.copy_file_to_dir('file://' + old_file, 'file://' + new_dir)
831
+ end
832
+ end
833
+
834
+ context 'hdfs file to local dir' do
835
+ it do
836
+ expect(filesystem).to receive(:hadoop_fs).with('-copyToLocal', 'file://' + old_file, 'file://' + new_dir)
837
+ instance.copy_file_to_dir('file://' + old_file, new_dir)
838
+ end
839
+ end
840
+
841
+ context 'hdfs file to s3 dir' do
842
+ it do
843
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_file, 's3n://bucket/new_dir')
844
+ instance.copy_file_to_dir('file://' + old_file, 's3://bucket/new_dir')
845
+ end
846
+ end
847
+
848
+ context 's3 file to s3 dir' do
849
+ it do
850
+ expect(filesystem).to receive(:s3cmd).with('cp', 's3://bucket/old_file', 's3://bucket/new_dir/')
851
+ instance.copy_file_to_dir('s3://bucket/old_file', 's3://bucket/new_dir')
852
+ end
853
+ end
854
+
855
+ context 's3 file to local dir' do
856
+ it do
857
+ expect(filesystem).to receive(:s3cmd).with('get', 's3://bucket/old_file', new_dir)
858
+ instance.copy_file_to_dir('s3://bucket/old_file', new_dir)
859
+ end
860
+ end
861
+
862
+ context 's3 file to hdfs dir' do
863
+ it do
864
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
865
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 's3n://bucket/old_file', 'file://' + new_dir)
866
+ instance.copy_file_to_dir('s3://bucket/old_file', 'file://' + new_dir)
867
+ end
868
+ end
869
+ end
870
+
871
+ describe '#copy_dir' do
872
+ subject do
873
+ File.exists?(File.join(new_dir, File.basename(old_dir), File.basename(old_file)))
874
+ end
875
+
876
+ context 'local dir to local dir' do
877
+ before do
878
+ instance.copy_dir(old_dir, new_dir)
879
+ end
880
+
881
+ it { is_expected.to eq(true) }
882
+ end
883
+
884
+ context 'local dir to s3 dir' do
885
+ it do
886
+ expect(filesystem).to receive(:s3cmd).with('put', '--recursive', old_dir, 's3://bucket/new_dir/')
887
+ instance.copy_dir(old_dir, 's3://bucket/new_dir')
888
+ end
889
+ end
890
+
891
+ context 'local dir to hdfs dir' do
892
+ it do
893
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
894
+ expect(filesystem).to receive(:hadoop_fs).with('-copyFromLocal', 'file://' + old_dir, 'file://' + new_dir)
895
+ instance.copy_dir(old_dir, 'file://' + new_dir)
896
+ end
897
+ end
898
+
899
+ context 'hdfs dir to hdfs dir' do
900
+ it do
901
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
902
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_dir, 'file://' + new_dir)
903
+ instance.copy_dir('file://' + old_dir, 'file://' + new_dir)
904
+ end
905
+ end
906
+
907
+ context 'hdfs dir to local dir' do
908
+ it do
909
+ expect(filesystem).to receive(:hadoop_fs).with('-copyToLocal', 'file://' + old_dir, 'file://' + new_dir)
910
+ instance.copy_dir('file://' + old_dir, new_dir)
911
+ end
912
+ end
913
+
914
+ context 'hdfs dir to s3 dir' do
915
+ it do
916
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_dir, 's3n://bucket/new_dir')
917
+ instance.copy_dir('file://' + old_dir, 's3://bucket/new_dir')
918
+ end
919
+ end
920
+
921
+ context 's3 dir to s3 dir' do
922
+ it do
923
+ expect(filesystem).to receive(:s3cmd).with('cp', '--recursive', 's3://bucket/old_dir/', 's3://bucket/new_dir/')
924
+ instance.copy_dir('s3://bucket/old_dir', 's3://bucket/new_dir')
925
+ end
926
+ end
927
+
928
+ context 's3 dir to local dir' do
929
+ it do
930
+ expect(filesystem).to receive(:s3cmd).with('get', '--recursive', '--skip-existing', 's3://bucket/old_dir/', File.join(new_dir, 'old_dir'))
931
+ instance.copy_dir('s3://bucket/old_dir', new_dir)
932
+ end
933
+ end
934
+
935
+ context 's3 dir to hdfs dir' do
936
+ it do
937
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
938
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 's3n://bucket/old_dir', 'file://' + new_dir)
939
+ instance.copy_dir('s3://bucket/old_dir', 'file://' + new_dir)
940
+ end
941
+ end
942
+ end
943
+
944
+ describe '#remove_file' do
945
+ subject do
946
+ File.exists?(old_file)
947
+ end
948
+
949
+ context 'local false' do
950
+ before do
951
+ instance.remove_file(old_file)
952
+ end
953
+
954
+ it { is_expected.to eq(false)}
955
+ end
956
+
957
+ context 'hdfs file' do
958
+ it do
959
+ expect(filesystem).to receive(:hadoop_fs).with('-rm', 'file://' + old_file)
960
+ instance.remove_file('file://' + old_file)
961
+ end
962
+ end
963
+
964
+ context 's3 file' do
965
+ it do
966
+ expect(filesystem).to receive(:s3cmd).with('del', 's3://bucket/file')
967
+ instance.remove_file('s3://bucket/file')
968
+ end
969
+ end
970
+ end
971
+
972
+ describe '#remove_dir' do
973
+ subject do
974
+ File.exists?(old_dir)
975
+ end
976
+
977
+ context 'local dir' do
978
+ before do
979
+ expect(filesystem).to receive(:root_path?).once.and_return(false)
980
+ instance.remove_dir(old_dir)
981
+ end
982
+
983
+ it { is_expected.to eq(false)}
984
+ end
985
+
986
+ context 'local root dir' do
987
+ before do
988
+ expect(filesystem).to receive(:root_path?).once.and_return(true)
989
+ end
990
+
991
+ it { expect { instance.remove_dir(old_dir) }.to raise_error /root path/ }
992
+ end
993
+
994
+ context 'hdfs dir' do
995
+ it do
996
+ expect(filesystem).to receive(:root_path?).once.and_return(false)
997
+ expect(filesystem).to receive(:hadoop_fs).with('-rmr', 'file://' + old_dir)
998
+ instance.remove_dir('file://' + old_dir)
999
+ end
1000
+ end
1001
+
1002
+ context 'hdfs root dir' do
1003
+ before do
1004
+ expect(filesystem).to receive(:root_path?).once.and_return(true)
1005
+ end
1006
+
1007
+ it { expect { instance.remove_dir('file://' + old_dir) }.to raise_error /root path/ }
1008
+ end
1009
+
1010
+ context 's3 dir' do
1011
+ it do
1012
+ expect(filesystem).to receive(:s3cmd).with('del', '--recursive', 's3://bucket/dir/')
1013
+ expect(filesystem).to receive(:s3cmd).with('del', '--recursive', 's3://bucket/dir_$folder$')
1014
+ instance.remove_dir('s3://bucket/dir')
1015
+ end
1016
+ end
1017
+
1018
+ context 's3 root dir' do
1019
+ before do
1020
+ expect(filesystem).to receive(:s3cmd).never
1021
+ end
1022
+
1023
+ it { expect { instance.remove_dir('s3://bucket/') }.to raise_error /root path/ }
1024
+ end
1025
+ end
1026
+
1027
+ describe '#move_file_to_file' do
1028
+ subject(:removes_old_file) do
1029
+ !File.exists?(old_file)
1030
+ end
1031
+
1032
+ subject(:creates_new_file) do
1033
+ File.exists?(new_file)
1034
+ end
1035
+
1036
+ context 'local file to local file' do
1037
+ before do
1038
+ expect(FileUtils).to receive(:chmod).once
1039
+ instance.move_file_to_file(old_file, new_file)
1040
+ end
1041
+
1042
+ it { expect(removes_old_file).to eq(true) }
1043
+ it { expect(creates_new_file).to eq(true) }
1044
+ end
1045
+
1046
+ context 'local file to s3 file' do
1047
+ before do
1048
+ expect(filesystem).to receive(:s3cmd).with('put', old_file, 's3://bucket/new_dir/new_file')
1049
+ instance.move_file_to_file(old_file, 's3://bucket/new_dir/new_file')
1050
+ end
1051
+
1052
+ it { expect(removes_old_file).to eq(true) }
1053
+ end
1054
+
1055
+ context 'local file to hdfs file' do
1056
+ it do
1057
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + old_dir).once
1058
+ expect(filesystem).to receive(:hadoop_fs).with('-moveFromLocal', 'file://' + old_file, 'file://' + new_file)
1059
+ instance.move_file_to_file(old_file, 'file://' + new_file)
1060
+ end
1061
+ end
1062
+
1063
+ context 'hdfs file to hdfs file' do
1064
+ it do
1065
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + old_dir).once
1066
+ expect(filesystem).to receive(:hadoop_fs).with('-mv', 'file://' + old_file, 'file://' + new_file)
1067
+ instance.move_file_to_file('file://' + old_file, 'file://' + new_file)
1068
+ end
1069
+ end
1070
+
1071
+ context 'hdfs file to local file' do
1072
+ it do
1073
+ expect(filesystem).to receive(:hadoop_fs).with('-copyToLocal', 'file://' + old_file, 'file://' + new_file)
1074
+ expect(filesystem).to receive(:hadoop_fs).with('-rm', 'file://' + old_file)
1075
+ instance.move_file_to_file('file://' + old_file, new_file)
1076
+ end
1077
+ end
1078
+
1079
+ context 'hdfs file to s3 file' do
1080
+ it do
1081
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_file, 's3n://bucket/new_dir/new_file')
1082
+ expect(filesystem).to receive(:hadoop_fs).with('-rm', 'file://' + old_file)
1083
+ instance.move_file_to_file('file://' + old_file, 's3://bucket/new_dir/new_file')
1084
+ end
1085
+ end
1086
+
1087
+ context 's3 file to s3 file' do
1088
+ it do
1089
+ expect(filesystem).to receive(:s3cmd).with('mv', 's3://bucket/old_file', 's3://bucket/new_dir/new_file')
1090
+ instance.move_file_to_file('s3://bucket/old_file', 's3://bucket/new_dir/new_file')
1091
+ end
1092
+ end
1093
+
1094
+ context 's3 file to local file' do
1095
+ it do
1096
+ expect(filesystem).to receive(:s3cmd).with('get', 's3://bucket/old_file', new_file)
1097
+ expect(filesystem).to receive(:s3cmd).with('del', 's3://bucket/old_file')
1098
+ instance.move_file_to_file('s3://bucket/old_file', new_file)
1099
+ end
1100
+ end
1101
+
1102
+ context 's3 file to hdfs file' do
1103
+ it do
1104
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + File.dirname(new_file))
1105
+ expect(filesystem).to receive(:hadoop_fs).with('-mv', 's3n://bucket/old_file', 'file://' + new_file)
1106
+ instance.move_file_to_file('s3://bucket/old_file', 'file://' + new_file)
1107
+ end
1108
+ end
1109
+ end
1110
+
1111
+ describe '#move_file_to_dir' do
1112
+ before do
1113
+ FileUtils.mkdir_p(new_dir)
1114
+ end
1115
+
1116
+ subject(:removes_old_file) do
1117
+ !File.exists?(old_file)
1118
+ end
1119
+
1120
+ subject(:creates_new_file) do
1121
+ File.exists?(File.join(new_dir, File.basename(old_file)))
1122
+ end
1123
+
1124
+ context 'local file to local dir' do
1125
+ before do
1126
+ expect(FileUtils).to receive(:chmod).once
1127
+ instance.move_file_to_dir(old_file, new_dir)
1128
+ end
1129
+
1130
+ it { expect(removes_old_file).to eq(true) }
1131
+ it { expect(creates_new_file).to eq(true) }
1132
+ end
1133
+
1134
+ context 'local file to s3 dir' do
1135
+ before do
1136
+ expect(filesystem).to receive(:s3cmd).with('put', old_file, 's3://bucket/new_dir/')
1137
+ instance.move_file_to_dir(old_file, 's3://bucket/new_dir')
1138
+ end
1139
+
1140
+ it { expect(removes_old_file).to eq(true) }
1141
+ end
1142
+
1143
+ context 'local file to hdfs dir' do
1144
+ it do
1145
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir).once
1146
+ expect(filesystem).to receive(:hadoop_fs).with('-moveFromLocal', 'file://' + old_file, 'file://' + new_dir)
1147
+ instance.move_file_to_dir(old_file, 'file://' + new_dir)
1148
+ end
1149
+ end
1150
+
1151
+ context 'hdfs file to hdfs dir' do
1152
+ it do
1153
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir).once
1154
+ expect(filesystem).to receive(:hadoop_fs).with('-mv', 'file://' + old_file, 'file://' + new_dir)
1155
+ instance.move_file_to_dir('file://' + old_file, 'file://' + new_dir)
1156
+ end
1157
+ end
1158
+
1159
+ context 'hdfs file to local dir' do
1160
+ it do
1161
+ expect(filesystem).to receive(:hadoop_fs).with('-copyToLocal', 'file://' + old_file, 'file://' + new_dir)
1162
+ expect(filesystem).to receive(:hadoop_fs).with('-rm', 'file://' + old_file)
1163
+ instance.move_file_to_dir('file://' + old_file, new_dir)
1164
+ end
1165
+ end
1166
+
1167
+ context 'hdfs file to s3 dir' do
1168
+ it do
1169
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_file, 's3n://bucket/new_dir/')
1170
+ expect(filesystem).to receive(:hadoop_fs).with('-rm', 'file://' + old_file)
1171
+ instance.move_file_to_dir('file://' + old_file, 's3://bucket/new_dir')
1172
+ end
1173
+ end
1174
+
1175
+ context 's3 file to s3 dir' do
1176
+ it do
1177
+ expect(filesystem).to receive(:s3cmd).with('mv', 's3://bucket/old_file', 's3://bucket/new_dir/')
1178
+ instance.move_file_to_dir('s3://bucket/old_file', 's3://bucket/new_dir')
1179
+ end
1180
+ end
1181
+
1182
+ context 's3 file to local dir' do
1183
+ it do
1184
+ expect(filesystem).to receive(:s3cmd).with('get', 's3://bucket/old_file', new_dir)
1185
+ expect(filesystem).to receive(:s3cmd).with('del', 's3://bucket/old_file')
1186
+ instance.move_file_to_dir('s3://bucket/old_file', new_dir)
1187
+ end
1188
+ end
1189
+
1190
+ context 's3 file to hdfs file' do
1191
+ it do
1192
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
1193
+ expect(filesystem).to receive(:hadoop_fs).with('-mv', 's3n://bucket/old_file', 'file://' + new_dir)
1194
+ instance.move_file_to_dir('s3://bucket/old_file', 'file://' + new_dir)
1195
+ end
1196
+ end
1197
+ end
1198
+
1199
+ describe '#move_dir' do
1200
+ subject(:removes_old_dir) do
1201
+ !File.exists?(old_dir)
1202
+ end
1203
+
1204
+ subject(:creates_new_dir) do
1205
+ File.exists?(new_dir)
1206
+ end
1207
+
1208
+ context 'local dir to local dir' do
1209
+ before do
1210
+ instance.move_dir(old_dir, new_dir)
1211
+ end
1212
+
1213
+ it { expect(removes_old_dir).to eq(true) }
1214
+ it { expect(creates_new_dir).to eq(true) }
1215
+ end
1216
+
1217
+ context 'local dir to s3 dir' do
1218
+ before do
1219
+ expect(filesystem).to receive(:s3cmd).with('put', '--recursive', old_dir + '/', 's3://bucket/new_dir/')
1220
+ instance.move_dir(old_dir, 's3://bucket/new_dir')
1221
+ end
1222
+
1223
+ it { expect(removes_old_dir).to eq(true) }
1224
+ end
1225
+
1226
+ context 'local dir to hdfs dir' do
1227
+ it do
1228
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + File.dirname(new_dir)).once
1229
+ expect(filesystem).to receive(:hadoop_fs).with('-moveFromLocal', 'file://' + old_dir, 'file://' + new_dir)
1230
+ instance.move_dir(old_dir, 'file://' + new_dir)
1231
+ end
1232
+ end
1233
+
1234
+ context 'hdfs dir to hdfs dir' do
1235
+ it do
1236
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + File.dirname(new_dir)).once
1237
+ expect(filesystem).to receive(:hadoop_fs).with('-mv', 'file://' + old_dir, 'file://' + new_dir)
1238
+ instance.move_dir('file://' + old_dir, 'file://' + new_dir)
1239
+ end
1240
+ end
1241
+
1242
+ context 'hdfs dir to local dir' do
1243
+ it do
1244
+ expect(filesystem).to receive(:hadoop_fs).with('-copyToLocal', 'file://' + old_dir, 'file://' + new_dir)
1245
+ expect(filesystem).to receive(:hadoop_fs).with('-rmr', 'file://' + old_dir)
1246
+ instance.move_dir('file://' + old_dir, new_dir)
1247
+ end
1248
+ end
1249
+
1250
+ context 'hdfs dir to s3 dir' do
1251
+ it do
1252
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 'file://' + old_dir, 's3n://bucket/new_dir/')
1253
+ expect(filesystem).to receive(:hadoop_fs).with('-rmr', 'file://' + old_dir)
1254
+ instance.move_dir('file://' + old_dir, 's3://bucket/new_dir')
1255
+ end
1256
+ end
1257
+
1258
+ context 's3 dir to s3 dir' do
1259
+ it do
1260
+ expect(filesystem).to receive(:s3cmd).with('mv', '--recursive', 's3://bucket/old_dir/', 's3://bucket/new_dir')
1261
+ instance.move_dir('s3://bucket/old_dir', 's3://bucket/new_dir')
1262
+ end
1263
+ end
1264
+
1265
+ context 's3 dir to local dir' do
1266
+ it do
1267
+ expect(filesystem).to receive(:s3cmd).with('get', '--recursive', 's3://bucket/old_dir/', new_dir)
1268
+ expect(filesystem).to receive(:s3cmd).with('del', '--recursive', 's3://bucket/old_dir/')
1269
+ expect(filesystem).to receive(:s3cmd).with('del', '--recursive', 's3://bucket/old_dir_$folder$')
1270
+ instance.move_dir('s3://bucket/old_dir', new_dir)
1271
+ end
1272
+ end
1273
+
1274
+ context 's3 dir to hdfs dir' do
1275
+ it do
1276
+ expect(filesystem).to receive(:hadoop_fs).with('-mkdir', '-p', 'file://' + new_dir)
1277
+ expect(filesystem).to receive(:hadoop_fs).with('-cp', 's3n://bucket/old_dir', 'file://' + new_dir)
1278
+ expect(filesystem).to receive(:s3cmd).with('del', '--recursive', 's3://bucket/old_dir/')
1279
+ expect(filesystem).to receive(:s3cmd).with('del', '--recursive', 's3://bucket/old_dir_$folder$')
1280
+ instance.move_dir('s3://bucket/old_dir', 'file://' + new_dir)
1281
+ end
1282
+ end
1283
+ end
1284
+
1285
+ context 'directory marked as immutable' do
1286
+ let(:dir) { 's3://bucket/incoming' }
1287
+ let(:file) { File.join(dir, '20130420.log') }
1288
+
1289
+ before do
1290
+ instance.add_path(:incoming, dir, immutable: true)
1291
+ end
1292
+
1293
+ describe '#remove_dir' do
1294
+ subject do
1295
+ instance.remove_dir(dir)
1296
+ end
1297
+
1298
+ it { expect { subject }.to raise_error RuntimeError, /#{dir} is marked as immutable, cannot modify #{dir}/ }
1299
+
1300
+ context 'nested directory' do
1301
+ let(:nested_dir) { File.join(dir, '2013') }
1302
+
1303
+ subject do
1304
+ instance.remove_dir(nested_dir)
1305
+ end
1306
+
1307
+ it { expect { subject }.to raise_error RuntimeError, /#{dir} is marked as immutable, cannot modify #{nested_dir}/ }
1308
+ end
1309
+ end
1310
+
1311
+ describe '#move_file_to_file' do
1312
+ subject do
1313
+ instance.move_file_to_file(file, 's3://bucket/processed/new_file')
1314
+ end
1315
+ it { expect { subject }.to raise_error RuntimeError, /#{dir} is marked as immutable, cannot modify #{file}/ }
1316
+ end
1317
+
1318
+ describe '#move_file_to_dir' do
1319
+ subject do
1320
+ instance.move_file_to_dir(dir, 's3://bucket/processed')
1321
+ end
1322
+
1323
+ it { expect { subject }.to raise_error RuntimeError, /#{dir} is marked as immutable, cannot modify #{dir}/ }
1324
+ end
1325
+
1326
+ describe '#move_dir' do
1327
+ subject do
1328
+ instance.move_dir(dir, 's3://bucket/processed/')
1329
+ end
1330
+
1331
+ it { expect { subject }.to raise_error RuntimeError, /#{dir} is marked as immutable, cannot modify #{dir}/ }
1332
+ end
1333
+ end
1334
+
1335
+ describe '#cat' do
1336
+ context 'simple file' do
1337
+ before do
1338
+ instance.write('dog', new_file)
1339
+ end
1340
+
1341
+ subject do
1342
+ instance.cat(new_file).string
1343
+ end
1344
+
1345
+ it { is_expected.to eq('dog') }
1346
+ end
1347
+
1348
+ context 'result of directory glob' do
1349
+ before do
1350
+ instance.add_path(:new_dir, new_dir)
1351
+ instance.write('dog', instance.path(:new_dir, 'a', 'b', 'c', 'dog'))
1352
+ end
1353
+
1354
+ subject do
1355
+ instance.cat(*instance.glob(instance.path(:new_dir, '*'))).string
1356
+ end
1357
+
1358
+ it { is_expected.to eq('dog') }
1359
+ end
1360
+ end
1361
+
1362
+ describe '#chown!' do
1363
+ context 'local' do
1364
+ subject(:operation) do
1365
+ instance.chown!(old_file)
1366
+ end
1367
+ it { expect { operation }.to_not raise_error }
1368
+ end
1369
+
1370
+ context 'hdfs' do
1371
+ before do
1372
+ expect(filesystem).to receive(:hadoop_fs).with('-chown', '-R', instance_of(String), 'file://' + old_file).once
1373
+ end
1374
+
1375
+ subject(:operation) do
1376
+ instance.chown!('file://' + old_file)
1377
+ end
1378
+
1379
+ it { expect { operation }.to_not raise_error }
1380
+ end
1381
+ end
1382
+
1383
+ describe '#glob_to_regexp' do
1384
+ let(:recursive) { false }
1385
+ subject(:file_regexp) { instance.glob_to_regexp(path, recursive: recursive) }
1386
+
1387
+ context 'a path without glob' do
1388
+ let(:path) { '/tmp' }
1389
+ it { is_expected.to eq(%r{\A/tmp\z}) }
1390
+ end
1391
+
1392
+ context 'a path without glob with recursive' do
1393
+ let(:recursive) { true }
1394
+ let(:path) { '/tmp' }
1395
+ it { is_expected.to eq(%r{\A/tmp}) }
1396
+ end
1397
+
1398
+ context 'a path with glob' do
1399
+ let(:path) { '/tmp/*' }
1400
+ it { is_expected.to eq(%r{\A/tmp/?.*?}) }
1401
+ end
1402
+
1403
+ context 'a path with glob with recursive' do
1404
+ let(:recursive) { true }
1405
+ let(:path) { '/tmp/*' }
1406
+ it { is_expected.to eq(%r{\A/tmp/?.*?}) }
1407
+ end
1408
+ end
1409
+ end
1410
+
1411
+ describe Masamune::Filesystem do
1412
+ let(:instance) { filesystem }
1413
+
1414
+ it_behaves_like 'Filesystem'
1415
+ end
1416
+
1417
+ describe Masamune::CachedFilesystem do
1418
+ let(:instance) { described_class.new(filesystem) }
1419
+
1420
+ it_behaves_like 'Filesystem'
1421
+ end