imw 0.2.18 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. data/Gemfile +7 -26
  2. data/Gemfile.lock +13 -38
  3. data/{LICENSE → LICENSE.txt} +1 -1
  4. data/README.textile +35 -0
  5. data/Rakefile +45 -22
  6. data/VERSION +1 -1
  7. data/examples/foo.rb +19 -0
  8. data/examples/html_selector.rb +22 -0
  9. data/examples/nes_game_list.csv +625 -0
  10. data/examples/nes_gamespot.csv +1371 -0
  11. data/examples/nes_nintendo.csv +624 -0
  12. data/examples/nes_unlicensed.csv +89 -0
  13. data/examples/nes_wikipedia.csv +710 -0
  14. data/examples/nibbler_test.rb +24 -0
  15. data/examples/script.rb +19 -0
  16. data/lib/imw.rb +28 -140
  17. data/lib/imw/error.rb +9 -0
  18. data/lib/imw/recordizer.rb +8 -0
  19. data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
  20. data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
  21. data/lib/imw/resource.rb +3 -119
  22. data/lib/imw/serializer.rb +7 -0
  23. data/lib/imw/serializer/json_serializer.rb +17 -0
  24. data/lib/imw/uri.rb +41 -0
  25. data/spec/resource_spec.rb +78 -0
  26. data/spec/uri_spec.rb +55 -0
  27. metadata +81 -232
  28. data/README.rdoc +0 -371
  29. data/bin/imw +0 -5
  30. data/bin/tsv_to_json.rb +0 -29
  31. data/etc/imwrc.rb +0 -26
  32. data/examples/dataset.rb +0 -12
  33. data/examples/metadata.yml +0 -10
  34. data/lib/imw/archives.rb +0 -120
  35. data/lib/imw/archives/rar.rb +0 -19
  36. data/lib/imw/archives/tar.rb +0 -19
  37. data/lib/imw/archives/tarbz2.rb +0 -73
  38. data/lib/imw/archives/targz.rb +0 -73
  39. data/lib/imw/archives/zip.rb +0 -51
  40. data/lib/imw/boot.rb +0 -87
  41. data/lib/imw/compressed_files.rb +0 -94
  42. data/lib/imw/compressed_files/bz2.rb +0 -16
  43. data/lib/imw/compressed_files/compressible.rb +0 -75
  44. data/lib/imw/compressed_files/gz.rb +0 -16
  45. data/lib/imw/dataset.rb +0 -125
  46. data/lib/imw/dataset/paths.rb +0 -29
  47. data/lib/imw/dataset/workflow.rb +0 -195
  48. data/lib/imw/formats.rb +0 -33
  49. data/lib/imw/formats/delimited.rb +0 -170
  50. data/lib/imw/formats/excel.rb +0 -100
  51. data/lib/imw/formats/json.rb +0 -41
  52. data/lib/imw/formats/pdf.rb +0 -71
  53. data/lib/imw/formats/sgml.rb +0 -69
  54. data/lib/imw/formats/yaml.rb +0 -41
  55. data/lib/imw/metadata.rb +0 -83
  56. data/lib/imw/metadata/contains_metadata.rb +0 -54
  57. data/lib/imw/metadata/dsl.rb +0 -111
  58. data/lib/imw/metadata/field.rb +0 -37
  59. data/lib/imw/metadata/has_metadata.rb +0 -98
  60. data/lib/imw/metadata/has_summary.rb +0 -57
  61. data/lib/imw/metadata/schema.rb +0 -17
  62. data/lib/imw/parsers.rb +0 -8
  63. data/lib/imw/parsers/flat.rb +0 -44
  64. data/lib/imw/parsers/html_parser.rb +0 -387
  65. data/lib/imw/parsers/html_parser/matchers.rb +0 -289
  66. data/lib/imw/parsers/line_parser.rb +0 -87
  67. data/lib/imw/parsers/regexp_parser.rb +0 -72
  68. data/lib/imw/repository.rb +0 -12
  69. data/lib/imw/runner.rb +0 -118
  70. data/lib/imw/schemes.rb +0 -23
  71. data/lib/imw/schemes/ftp.rb +0 -142
  72. data/lib/imw/schemes/hdfs.rb +0 -251
  73. data/lib/imw/schemes/http.rb +0 -165
  74. data/lib/imw/schemes/local.rb +0 -409
  75. data/lib/imw/schemes/remote.rb +0 -119
  76. data/lib/imw/schemes/s3.rb +0 -143
  77. data/lib/imw/schemes/sql.rb +0 -129
  78. data/lib/imw/tools.rb +0 -12
  79. data/lib/imw/tools/aggregator.rb +0 -148
  80. data/lib/imw/tools/archiver.rb +0 -220
  81. data/lib/imw/tools/downloader.rb +0 -63
  82. data/lib/imw/tools/extension_analyzer.rb +0 -114
  83. data/lib/imw/tools/summarizer.rb +0 -83
  84. data/lib/imw/tools/transferer.rb +0 -167
  85. data/lib/imw/utils.rb +0 -74
  86. data/lib/imw/utils/dynamically_extendable.rb +0 -137
  87. data/lib/imw/utils/error.rb +0 -59
  88. data/lib/imw/utils/extensions/hpricot.rb +0 -34
  89. data/lib/imw/utils/has_uri.rb +0 -131
  90. data/lib/imw/utils/log.rb +0 -92
  91. data/lib/imw/utils/misc.rb +0 -57
  92. data/lib/imw/utils/paths.rb +0 -146
  93. data/lib/imw/utils/uri.rb +0 -59
  94. data/lib/imw/utils/uuid.rb +0 -33
  95. data/lib/imw/utils/validate.rb +0 -38
  96. data/lib/imw/utils/version.rb +0 -11
  97. data/spec/data/formats/delimited/sample.csv +0 -131
  98. data/spec/data/formats/delimited/sample.tsv +0 -131
  99. data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
  100. data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
  101. data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
  102. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
  103. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
  104. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
  105. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
  106. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
  107. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
  108. data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
  109. data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
  110. data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
  111. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
  112. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
  113. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
  114. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
  115. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
  116. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
  117. data/spec/data/formats/excel/sample.xls +0 -0
  118. data/spec/data/formats/json/sample.json +0 -1
  119. data/spec/data/formats/none/sample +0 -650
  120. data/spec/data/formats/sgml/sample.xml +0 -617
  121. data/spec/data/formats/text/sample.txt +0 -650
  122. data/spec/data/formats/yaml/sample.yaml +0 -410
  123. data/spec/data/schema-tabular.yaml +0 -11
  124. data/spec/imw/archives/rar_spec.rb +0 -16
  125. data/spec/imw/archives/tar_spec.rb +0 -16
  126. data/spec/imw/archives/tarbz2_spec.rb +0 -24
  127. data/spec/imw/archives/targz_spec.rb +0 -21
  128. data/spec/imw/archives/zip_spec.rb +0 -16
  129. data/spec/imw/archives_spec.rb +0 -77
  130. data/spec/imw/compressed_files/bz2_spec.rb +0 -15
  131. data/spec/imw/compressed_files/compressible_spec.rb +0 -36
  132. data/spec/imw/compressed_files/gz_spec.rb +0 -15
  133. data/spec/imw/compressed_files_spec.rb +0 -47
  134. data/spec/imw/dataset/paths_spec.rb +0 -32
  135. data/spec/imw/dataset/workflow_spec.rb +0 -41
  136. data/spec/imw/formats/delimited_spec.rb +0 -44
  137. data/spec/imw/formats/excel_spec.rb +0 -55
  138. data/spec/imw/formats/json_spec.rb +0 -18
  139. data/spec/imw/formats/sgml_spec.rb +0 -24
  140. data/spec/imw/formats/yaml_spec.rb +0 -19
  141. data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
  142. data/spec/imw/metadata/field_spec.rb +0 -25
  143. data/spec/imw/metadata/has_metadata_spec.rb +0 -58
  144. data/spec/imw/metadata/has_summary_spec.rb +0 -32
  145. data/spec/imw/metadata/schema_spec.rb +0 -24
  146. data/spec/imw/metadata_spec.rb +0 -86
  147. data/spec/imw/parsers/line_parser_spec.rb +0 -96
  148. data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
  149. data/spec/imw/resource_spec.rb +0 -32
  150. data/spec/imw/schemes/hdfs_spec.rb +0 -67
  151. data/spec/imw/schemes/http_spec.rb +0 -19
  152. data/spec/imw/schemes/local_spec.rb +0 -165
  153. data/spec/imw/schemes/remote_spec.rb +0 -38
  154. data/spec/imw/schemes/s3_spec.rb +0 -31
  155. data/spec/imw/schemes/sql_spec.rb +0 -3
  156. data/spec/imw/tools/aggregator_spec.rb +0 -71
  157. data/spec/imw/tools/archiver_spec.rb +0 -120
  158. data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
  159. data/spec/imw/tools/summarizer_spec.rb +0 -8
  160. data/spec/imw/tools/transferer_spec.rb +0 -195
  161. data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
  162. data/spec/imw/utils/has_uri_spec.rb +0 -61
  163. data/spec/imw/utils/paths_spec.rb +0 -10
  164. data/spec/imw/utils/shared_paths_spec.rb +0 -29
  165. data/spec/imw_spec.rb +0 -14
  166. data/spec/rcov.opts +0 -1
  167. data/spec/spec_helper.rb +0 -31
  168. data/spec/support/custom_matchers.rb +0 -28
  169. data/spec/support/file_contents_matcher.rb +0 -30
  170. data/spec/support/paths_matcher.rb +0 -66
  171. data/spec/support/random.rb +0 -213
  172. data/spec/support/without_regard_to_order_matcher.rb +0 -41
@@ -1,153 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Tools::ExtensionAnalyzer do
4
-
5
- before do
6
- class Analyzer
7
- attr_accessor :dir, :resources
8
- include IMW::Tools::ExtensionAnalyzer
9
- def initialize dir
10
- self.dir = File.expand_path(dir)
11
- @resources = IMW.open(self.dir).all_resources
12
- end
13
- def total_size
14
- @total_size ||= resources.map(&:size).inject(0) { |e, sum| sum += e }
15
- end
16
- end
17
- end
18
-
19
- describe 'working with an empty directory' do
20
- before do
21
- @analyzer = Analyzer.new(IMWTest::TMP_DIR)
22
- end
23
-
24
- %w[most_common_extension_by_count most_common_extension_by_size most_common_extension].each do |method|
25
- it "should return 'flat' when asked for its '#{method}'" do
26
- @analyzer.send(method).should == 'flat'
27
- end
28
- end
29
-
30
- %w[extension_counts normalized_extension_counts extension_sizes normalized_extension_sizes].each do |method|
31
- it "should return an empty hash when asked for its '#{method}'" do
32
- @analyzer.send(method).should == {}
33
- end
34
- end
35
- end
36
-
37
- describe 'working with files that lack extensions' do
38
-
39
- before do
40
- @dir = File.join(IMWTest::TMP_DIR, 'ext_dir')
41
- FileUtils.mkdir_p(@dir)
42
-
43
- @f1 = "foobar1"
44
- @f2 = "foobar2"
45
- @f3 = "foobar1"
46
- @files = [@f1, @f2, @f3]
47
-
48
- @files.each do |basename|
49
- IMWTest::Random.file File.join(@dir, basename)
50
- end
51
-
52
- @analyzer = Analyzer.new(IMWTest::TMP_DIR)
53
- end
54
-
55
- %w[most_common_extension_by_count most_common_extension_by_size most_common_extension].each do |method|
56
- it "should return 'flat' when asked for its '#{method}'" do
57
- @analyzer.send(method).should == 'flat'
58
- end
59
- end
60
- end
61
-
62
- describe 'working with a directory of files' do
63
- before do
64
- @dir = File.join(IMWTest::TMP_DIR, 'ext_dir')
65
- FileUtils.mkdir_p(@dir)
66
-
67
- @csv1 = "foobar1.csv"
68
- @csv2 = "foobar2.csv"
69
- @xml = "foobar1.xml"
70
- @txt = "foobar1.txt"
71
- @files = [@csv1, @csv2, @xml, @txt]
72
-
73
- @files.each do |basename|
74
- IMWTest::Random.file File.join(@dir, basename)
75
- end
76
-
77
- def bloat basename
78
- File.open(File.join(@dir, basename), 'a') do |f|
79
- 1000.times do
80
- f.write( 'hello ' * 100)
81
- end
82
- end
83
- end
84
-
85
- @analyzer = Analyzer.new @dir
86
- end
87
-
88
- describe "working with extension counts" do
89
- it "should be able to return counts by extension" do
90
- @analyzer.extension_counts.should == {'xml' => 1, 'txt' => 1, 'csv' => 2 }
91
- end
92
-
93
- it "should be able to return the most common extension by count" do
94
- @analyzer.most_common_extension_by_count.should == 'csv'
95
- end
96
-
97
- it "should be able to calculate extension weighted by number of files" do
98
- @analyzer.normalized_extension_counts.should == { 'csv' => 0.5, 'xml' => 0.25, 'txt' => 0.25 }
99
- end
100
- end
101
-
102
- describe "working with extension sizes" do
103
- it "should be able to calculate extension sizes" do
104
- csv_size = File.size(File.join(@dir, @csv1)) + File.size(File.join(@dir, @csv2))
105
- xml_size = File.size(File.join(@dir, @xml))
106
- txt_size = File.size(File.join(@dir, @txt))
107
- @analyzer.extension_sizes.should == { 'csv' => csv_size, 'xml' => xml_size, 'txt' => txt_size }
108
- end
109
-
110
- it "should be able to return the most common extension by size" do
111
- bloat @txt
112
- @analyzer.most_common_extension_by_size.should == 'txt'
113
- end
114
-
115
- it "should be able to calculate extension sizes" do
116
- csv_size = File.size(File.join(@dir, @csv1)) + File.size(File.join(@dir, @csv2))
117
- xml_size = File.size(File.join(@dir, @xml))
118
- txt_size = File.size(File.join(@dir, @txt))
119
- total_size = csv_size + xml_size + txt_size
120
- @analyzer.normalized_extension_sizes.should == { 'csv' => csv_size.to_f / total_size.to_f, 'xml' => xml_size.to_f / total_size.to_f, 'txt' => txt_size.to_f / total_size.to_f }
121
- end
122
- end
123
-
124
- describe "determining the most common extension" do
125
-
126
- it "should obviously return an extension if it is the most common by count as well as the most common by size" do
127
- bloat @csv1
128
- @analyzer.most_common_extension.should == 'csv'
129
- end
130
-
131
- it "should return the most common extension by count if the count fraction is half or greater and the size fraction is less than half" do
132
- bloat @txt
133
- bloat @xml
134
- @analyzer.most_common_extension.should == 'csv'
135
- end
136
-
137
- it "should return the most common extension by size if the size fraction is half or greater and the count fraction is less than half" do
138
- # need to add an xml file
139
- @new_xml = File.join(@dir, 'xml2.xml')
140
- IMWTest::Random.file(@new_xml)
141
- bloat @txt
142
- @analyzer = Analyzer.new @dir
143
- @analyzer.most_common_extension.should == 'txt'
144
- end
145
-
146
- it "should return the most common extension by size if no other conditions are met" do
147
- bloat @txt
148
- @analyzer.most_common_extension.should == 'txt'
149
- end
150
-
151
- end
152
- end
153
- end
@@ -1,8 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Tools::Summarizer do
4
-
5
-
6
- end
7
-
8
-
@@ -1,195 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Tools::Transferer do
4
- before do
5
- @dir = IMW.open("dir")
6
- @new_dir = IMW.open("new_dir")
7
- @nested = IMW.open('new_dir/nested.txt')
8
- @nested_dir = IMW.open('new_dir/nested')
9
- @local = IMW.open("foobar.txt")
10
- @dest = IMW.open("barbaz.txt")
11
- @http = IMW.open("http://www.google.com")
12
- @hdfs = IMW.open("hdfs:///path/to/foobar.txt")
13
- @s3 = IMW.open("s3://mybucket/foo/bar")
14
- IMWTest::Random.file(@local.path)
15
- end
16
-
17
- it "should raise an error unless the action is one of :cp, :copy, :mv :move, or :mv!" do
18
- @transferer = IMW::Tools::Transferer.new(:cp, @local, @http)
19
- @transferer.action = :cp
20
- @transferer.action = :copy
21
- @transferer.action = :mv
22
- @transferer.action = :mv!
23
- @transferer.action = :move
24
- lambda { @transferer.action = :foobar }.should raise_error(IMW::ArgumentError)
25
- end
26
-
27
- it "should raise an error if the source and the destination have the same URI" do
28
- lambda { IMW::Tools::Transferer.new(:cp, @local, @local) }.should raise_error(IMW::PathError)
29
- end
30
-
31
- it "should print a log message when IMW is verbose" do
32
- IMW.stub!(:verbose).and_return(:true)
33
- IMW.should_receive(:announce_if_verbose).with("Copying #{@local} to #{@dest}")
34
- IMW::Tools::Transferer.new(:cp, @local, @dest).transfer!
35
- end
36
-
37
- describe "transfering local files" do
38
-
39
- before do
40
- IMWTest::Random.file @local.path
41
- @transferer = IMW::Tools::Transferer.new(:cp, @local, @dest)
42
- end
43
-
44
- it "should raise an error if the source doesn't exist" do
45
- @local.rm!
46
- lambda { @transferer.source = @local }.should raise_error(IMW::PathError)
47
- end
48
-
49
- it "should raise an error if the directory of the destination doesn't exist" do
50
- lambda { @transferer.destination = @nested }.should raise_error(IMW::PathError)
51
- end
52
-
53
- it "can copy a local file" do
54
- @transferer.transfer!
55
- @local.exist?.should be_true
56
- @dest.exist?.should be_true
57
- end
58
-
59
- it "can copy a local file to a directory" do
60
- FileUtils.mkdir(@dir.path)
61
- @transferer.destination = @dir
62
- @transferer.transfer!
63
- IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
64
- end
65
-
66
- it "can move a local file" do
67
- @transferer.action = :mv
68
- @transferer.transfer!
69
- @local.exist?.should be_false
70
- @dest.exist?.should be_true
71
- end
72
-
73
- it "can move a local file to a directory" do
74
- FileUtils.mkdir(@dir.path)
75
- @transferer.action = :mv
76
- @transferer.destination = @dir
77
- @transferer.transfer!
78
- IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
79
- @local.exist?.should be_false
80
- end
81
- end
82
-
83
- describe "transfering local directories" do
84
-
85
- before do
86
- IMWTest::Random.directory_with_files @dir.path
87
- @dir = @dir.reopen
88
- end
89
-
90
- it "should raise an error if the source doesn't exist" do
91
- @dir.rm_rf!
92
- lambda { IMW::Tools::Transferer.new(:cp, @dir, @new_dir).transfer! }.should raise_error(IMW::PathError)
93
- end
94
-
95
- it "should raise an error if the directory of the destination doesn't exist" do
96
- lambda { IMW::Tools::Transferer.new(:cp, @dir, @nested_dir).transfer! }.should raise_error(IMW::PathError)
97
- end
98
-
99
- it "can copy a local directory" do
100
- IMW::Tools::Transferer.new(:cp, @dir, @new_dir).transfer!
101
- @dir.exist?.should be_true
102
- @new_dir.exist?.should be_true
103
- end
104
-
105
- it "can move a local directory" do
106
- IMW::Tools::Transferer.new(:mv, @dir, @new_dir).transfer!
107
- @dir.exist?.should be_false
108
- @new_dir.exist?.should be_true
109
- end
110
-
111
- it "can copy a local directory to an existing directory" do
112
- FileUtils.mkdir(@new_dir.path)
113
- IMW::Tools::Transferer.new(:cp, @dir, @nested_dir).transfer!
114
- @dir.exist?.should be_true
115
- @nested_dir.exist?.should be_true
116
- end
117
-
118
- it "can move a local directory to an existing directory" do
119
- FileUtils.mkdir(@new_dir.path)
120
- IMW::Tools::Transferer.new(:mv, @dir, @nested_dir).transfer!
121
- @dir.exist?.should_not be_true
122
- @nested_dir.exist?.should be_true
123
- end
124
- end
125
-
126
-
127
- describe "transferring HTTP files" do
128
- it "can copy a remote file to a local path" do
129
- IMW::Tools::Transferer.new(:cp, @http, @local).transfer!
130
- @local.exist?.should be_true
131
- end
132
- end
133
-
134
- describe "transferring S3 files" do
135
-
136
- it "can copy an S3 file to a local path" do
137
- IMW::Schemes::S3.should_receive(:get).with(@s3, @local)
138
- IMW::Tools::Transferer.new(:cp, @s3, @local).transfer!
139
- end
140
-
141
- it "can copy a local path to an S3 file" do
142
- IMWTest::Random.file @local.path
143
- IMW::Schemes::S3.should_receive(:put).with(@local, @s3)
144
- IMW::Tools::Transferer.new(:cp, @local, @s3).transfer!
145
- end
146
-
147
- it "can copy between S3 files" do
148
- @new_s3 = IMW.open('s3://mybucket/new/path')
149
- IMW::Schemes::S3.should_receive(:copy).with(@s3, @new_s3)
150
- IMW::Tools::Transferer.new(:cp, @s3, @new_s3).transfer!
151
- end
152
- end
153
-
154
- describe "transferring HDFS files" do
155
- before do
156
- IMW::Schemes::HDFS.stub!(:fs)
157
- end
158
-
159
- it "can copy a local file to an HDFS path" do
160
- IMWTest::Random.file @local.path
161
-
162
- IMW::Schemes::HDFS.should_receive(:fs).with(:put, @local.path, @hdfs.path)
163
- IMW::Tools::Transferer.new(:cp, @local, @hdfs).transfer!
164
- end
165
-
166
- it "can copy an HDFS file to a local path" do
167
- IMW::Schemes::HDFS.should_receive(:fs).with(:get, @hdfs.path, @local.path)
168
- IMW::Tools::Transferer.new(:cp, @hdfs, @local).transfer!
169
- end
170
-
171
- it "can copy between HDFS paths" do
172
- @new_hdfs = IMW.open('hdfs:///a/new/path')
173
- IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @new_hdfs.path)
174
- IMW::Tools::Transferer.new(:cp, @hdfs, @new_hdfs).transfer!
175
- end
176
-
177
- it "can move between HDFS paths" do
178
- @new_hdfs = IMW.open('hdfs:///a/new/path')
179
- IMW::Schemes::HDFS.should_receive(:fs).with(:mv, @hdfs.path, @new_hdfs.path)
180
- IMW::Tools::Transferer.new(:mv, @hdfs, @new_hdfs).transfer!
181
- end
182
-
183
- it "can copy from S3 to HDFS" do
184
- IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @s3.s3n_url, @hdfs.path)
185
- IMW::Tools::Transferer.new(:cp, @s3, @hdfs).transfer!
186
- end
187
-
188
- it "can copy from HDFS to S3" do
189
- IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @s3.s3n_url)
190
- IMW::Tools::Transferer.new(:cp, @hdfs, @s3).transfer!
191
- end
192
- end
193
- end
194
-
195
-
@@ -1,69 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe "extending resources with specific modules" do
4
- before do
5
- @class = Class.new
6
- @class.send(:include, IMW::Utils::DynamicallyExtendable)
7
- @instance = @class.new
8
- end
9
-
10
- it "should raise an error when registering a malformed handler" do
11
- lambda { @class.register_handler("Foo", 3) }.should raise_error(IMW::ArgumentError)
12
- end
13
-
14
- it "should store in instances modules they've been extended by" do
15
- @foo = Module.new
16
- @instance.extend(@foo)
17
- @instance.modules.should include(@foo)
18
- end
19
-
20
- describe "evaluating handlers" do
21
- before do
22
- @proccer = Module.new
23
- @class.send(:attr_accessor, :prop)
24
- @class.register_handler(@proccer, Proc.new { |instance| instance.prop })
25
-
26
- @regexper = Module.new
27
- @class.send(:define_method, :to_s) { 'whoa' }
28
- @class.register_handler(@regexper, /whoa/)
29
- end
30
-
31
- it "should extend an instance with a matching proc handler" do
32
- @instance.prop = true
33
- @instance.extend_appropriately!
34
- @instance.modules.should include(@proccer)
35
- end
36
-
37
- it "should not extend an instance with a non-matching proc handler" do
38
- @instance.prop = false
39
- @instance.extend_appropriately!
40
- @instance.modules.should_not include(@proccer)
41
- end
42
-
43
- it "should extend an instance with a matching regexp handler" do
44
- @instance.extend_appropriately!
45
- @instance.modules.should include(@regexper)
46
- end
47
-
48
- it "should not extend an instance with a non-matching regexp handler" do
49
- @class.send(:define_method, :to_s) { 'fowl' }
50
- @instance.extend_appropriately!
51
- @instance.modules.should_not include(@regexper)
52
- end
53
-
54
- it "should not extend an instance with a module it was asked to skip" do
55
- @instance.extend_appropriately!(:skip_modules => [@regexper])
56
- @instance.modules.should_not include(@regexper)
57
- end
58
-
59
- it "should not extend with any modules if asked" do
60
- @instance.extend_appropriately!(:no_modules => true)
61
- @instance.modules.should_not include(@regexper)
62
- end
63
-
64
- it "should use a module if asked to do so even if it's handler didn't match" do
65
- @instance.extend_appropriately!(:use_modules => [@proccer])
66
- @instance.modules.should include(@proccer)
67
- end
68
- end
69
- end
@@ -1,61 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- class Klass
4
- include IMW::Utils::HasURI
5
- end
6
-
7
- def new_obj uri
8
- obj = Klass.new
9
- obj.uri = uri
10
- obj
11
- end
12
-
13
- describe IMW::Utils::HasURI do
14
-
15
- it "local file path" do
16
- obj = new_obj("/home/foo.txt")
17
- obj.stub!(:path).and_return("/home/foo.txt")
18
-
19
- obj.scheme.should be_nil
20
- obj.dirname.should == '/home'
21
- obj.basename.should == 'foo.txt'
22
- obj.extname.should == '.txt'
23
- obj.extension.should == 'txt'
24
- obj.name.should == 'foo'
25
- end
26
-
27
- it "local file path with spaces in the name" do
28
- obj = new_obj("/home/foo bar.txt")
29
- obj.stub!(:path).and_return("/home/foo bar.txt")
30
- obj.name.should == 'foo bar'
31
- end
32
-
33
- it "local file path with an explicit file:// scheme" do
34
- obj = new_obj("file:///home/foo.txt")
35
- obj.scheme.should == 'file'
36
- end
37
-
38
- it "web URL with query and fragment" do
39
- obj = new_obj("http://mysite.com/some/page?param=value#frag")
40
- obj.stub!(:path).and_return("/some/page")
41
- obj.scheme.should == 'http'
42
- obj.dirname.should == '/some'
43
- obj.basename.should == 'page'
44
- obj.extname.should == ''
45
- obj.extension.should == ''
46
- obj.name.should == 'page'
47
- end
48
-
49
- it "should be able to strip URIs" do
50
- new_obj('/path/to/something').stripped_uri.to_s.should == '/path/to/something'
51
- new_obj('http://user:pass@example.com:8080/path/to/some/script.php?param=value#frag').stripped_uri.to_s.should == 'http://user:pass@example.com:8080/path/to/some/script.php'
52
- end
53
-
54
- it "should be able to return raw paths" do
55
- new_obj('s3://bucket/crazy url with # some dumb naming convention').raw_path.should == '/crazy url with # some dumb naming convention'
56
- new_obj('s3://bucket/crazy url with ?some dumb naming convention').raw_path.should == '/crazy url with ?some dumb naming convention'
57
- new_obj('s3://bucket/crazy url with ?some dumb naming #convention').raw_path.should == '/crazy url with ?some dumb naming #convention'
58
- new_obj('s3://bucket/crazy url with #some dumb naming ?convention').raw_path.should == '/crazy url with #some dumb naming ?convention'
59
- end
60
-
61
- end