imw 0.2.18 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. data/Gemfile +7 -26
  2. data/Gemfile.lock +13 -38
  3. data/{LICENSE → LICENSE.txt} +1 -1
  4. data/README.textile +35 -0
  5. data/Rakefile +45 -22
  6. data/VERSION +1 -1
  7. data/examples/foo.rb +19 -0
  8. data/examples/html_selector.rb +22 -0
  9. data/examples/nes_game_list.csv +625 -0
  10. data/examples/nes_gamespot.csv +1371 -0
  11. data/examples/nes_nintendo.csv +624 -0
  12. data/examples/nes_unlicensed.csv +89 -0
  13. data/examples/nes_wikipedia.csv +710 -0
  14. data/examples/nibbler_test.rb +24 -0
  15. data/examples/script.rb +19 -0
  16. data/lib/imw.rb +28 -140
  17. data/lib/imw/error.rb +9 -0
  18. data/lib/imw/recordizer.rb +8 -0
  19. data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
  20. data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
  21. data/lib/imw/resource.rb +3 -119
  22. data/lib/imw/serializer.rb +7 -0
  23. data/lib/imw/serializer/json_serializer.rb +17 -0
  24. data/lib/imw/uri.rb +41 -0
  25. data/spec/resource_spec.rb +78 -0
  26. data/spec/uri_spec.rb +55 -0
  27. metadata +81 -232
  28. data/README.rdoc +0 -371
  29. data/bin/imw +0 -5
  30. data/bin/tsv_to_json.rb +0 -29
  31. data/etc/imwrc.rb +0 -26
  32. data/examples/dataset.rb +0 -12
  33. data/examples/metadata.yml +0 -10
  34. data/lib/imw/archives.rb +0 -120
  35. data/lib/imw/archives/rar.rb +0 -19
  36. data/lib/imw/archives/tar.rb +0 -19
  37. data/lib/imw/archives/tarbz2.rb +0 -73
  38. data/lib/imw/archives/targz.rb +0 -73
  39. data/lib/imw/archives/zip.rb +0 -51
  40. data/lib/imw/boot.rb +0 -87
  41. data/lib/imw/compressed_files.rb +0 -94
  42. data/lib/imw/compressed_files/bz2.rb +0 -16
  43. data/lib/imw/compressed_files/compressible.rb +0 -75
  44. data/lib/imw/compressed_files/gz.rb +0 -16
  45. data/lib/imw/dataset.rb +0 -125
  46. data/lib/imw/dataset/paths.rb +0 -29
  47. data/lib/imw/dataset/workflow.rb +0 -195
  48. data/lib/imw/formats.rb +0 -33
  49. data/lib/imw/formats/delimited.rb +0 -170
  50. data/lib/imw/formats/excel.rb +0 -100
  51. data/lib/imw/formats/json.rb +0 -41
  52. data/lib/imw/formats/pdf.rb +0 -71
  53. data/lib/imw/formats/sgml.rb +0 -69
  54. data/lib/imw/formats/yaml.rb +0 -41
  55. data/lib/imw/metadata.rb +0 -83
  56. data/lib/imw/metadata/contains_metadata.rb +0 -54
  57. data/lib/imw/metadata/dsl.rb +0 -111
  58. data/lib/imw/metadata/field.rb +0 -37
  59. data/lib/imw/metadata/has_metadata.rb +0 -98
  60. data/lib/imw/metadata/has_summary.rb +0 -57
  61. data/lib/imw/metadata/schema.rb +0 -17
  62. data/lib/imw/parsers.rb +0 -8
  63. data/lib/imw/parsers/flat.rb +0 -44
  64. data/lib/imw/parsers/html_parser.rb +0 -387
  65. data/lib/imw/parsers/html_parser/matchers.rb +0 -289
  66. data/lib/imw/parsers/line_parser.rb +0 -87
  67. data/lib/imw/parsers/regexp_parser.rb +0 -72
  68. data/lib/imw/repository.rb +0 -12
  69. data/lib/imw/runner.rb +0 -118
  70. data/lib/imw/schemes.rb +0 -23
  71. data/lib/imw/schemes/ftp.rb +0 -142
  72. data/lib/imw/schemes/hdfs.rb +0 -251
  73. data/lib/imw/schemes/http.rb +0 -165
  74. data/lib/imw/schemes/local.rb +0 -409
  75. data/lib/imw/schemes/remote.rb +0 -119
  76. data/lib/imw/schemes/s3.rb +0 -143
  77. data/lib/imw/schemes/sql.rb +0 -129
  78. data/lib/imw/tools.rb +0 -12
  79. data/lib/imw/tools/aggregator.rb +0 -148
  80. data/lib/imw/tools/archiver.rb +0 -220
  81. data/lib/imw/tools/downloader.rb +0 -63
  82. data/lib/imw/tools/extension_analyzer.rb +0 -114
  83. data/lib/imw/tools/summarizer.rb +0 -83
  84. data/lib/imw/tools/transferer.rb +0 -167
  85. data/lib/imw/utils.rb +0 -74
  86. data/lib/imw/utils/dynamically_extendable.rb +0 -137
  87. data/lib/imw/utils/error.rb +0 -59
  88. data/lib/imw/utils/extensions/hpricot.rb +0 -34
  89. data/lib/imw/utils/has_uri.rb +0 -131
  90. data/lib/imw/utils/log.rb +0 -92
  91. data/lib/imw/utils/misc.rb +0 -57
  92. data/lib/imw/utils/paths.rb +0 -146
  93. data/lib/imw/utils/uri.rb +0 -59
  94. data/lib/imw/utils/uuid.rb +0 -33
  95. data/lib/imw/utils/validate.rb +0 -38
  96. data/lib/imw/utils/version.rb +0 -11
  97. data/spec/data/formats/delimited/sample.csv +0 -131
  98. data/spec/data/formats/delimited/sample.tsv +0 -131
  99. data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
  100. data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
  101. data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
  102. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
  103. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
  104. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
  105. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
  106. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
  107. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
  108. data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
  109. data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
  110. data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
  111. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
  112. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
  113. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
  114. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
  115. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
  116. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
  117. data/spec/data/formats/excel/sample.xls +0 -0
  118. data/spec/data/formats/json/sample.json +0 -1
  119. data/spec/data/formats/none/sample +0 -650
  120. data/spec/data/formats/sgml/sample.xml +0 -617
  121. data/spec/data/formats/text/sample.txt +0 -650
  122. data/spec/data/formats/yaml/sample.yaml +0 -410
  123. data/spec/data/schema-tabular.yaml +0 -11
  124. data/spec/imw/archives/rar_spec.rb +0 -16
  125. data/spec/imw/archives/tar_spec.rb +0 -16
  126. data/spec/imw/archives/tarbz2_spec.rb +0 -24
  127. data/spec/imw/archives/targz_spec.rb +0 -21
  128. data/spec/imw/archives/zip_spec.rb +0 -16
  129. data/spec/imw/archives_spec.rb +0 -77
  130. data/spec/imw/compressed_files/bz2_spec.rb +0 -15
  131. data/spec/imw/compressed_files/compressible_spec.rb +0 -36
  132. data/spec/imw/compressed_files/gz_spec.rb +0 -15
  133. data/spec/imw/compressed_files_spec.rb +0 -47
  134. data/spec/imw/dataset/paths_spec.rb +0 -32
  135. data/spec/imw/dataset/workflow_spec.rb +0 -41
  136. data/spec/imw/formats/delimited_spec.rb +0 -44
  137. data/spec/imw/formats/excel_spec.rb +0 -55
  138. data/spec/imw/formats/json_spec.rb +0 -18
  139. data/spec/imw/formats/sgml_spec.rb +0 -24
  140. data/spec/imw/formats/yaml_spec.rb +0 -19
  141. data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
  142. data/spec/imw/metadata/field_spec.rb +0 -25
  143. data/spec/imw/metadata/has_metadata_spec.rb +0 -58
  144. data/spec/imw/metadata/has_summary_spec.rb +0 -32
  145. data/spec/imw/metadata/schema_spec.rb +0 -24
  146. data/spec/imw/metadata_spec.rb +0 -86
  147. data/spec/imw/parsers/line_parser_spec.rb +0 -96
  148. data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
  149. data/spec/imw/resource_spec.rb +0 -32
  150. data/spec/imw/schemes/hdfs_spec.rb +0 -67
  151. data/spec/imw/schemes/http_spec.rb +0 -19
  152. data/spec/imw/schemes/local_spec.rb +0 -165
  153. data/spec/imw/schemes/remote_spec.rb +0 -38
  154. data/spec/imw/schemes/s3_spec.rb +0 -31
  155. data/spec/imw/schemes/sql_spec.rb +0 -3
  156. data/spec/imw/tools/aggregator_spec.rb +0 -71
  157. data/spec/imw/tools/archiver_spec.rb +0 -120
  158. data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
  159. data/spec/imw/tools/summarizer_spec.rb +0 -8
  160. data/spec/imw/tools/transferer_spec.rb +0 -195
  161. data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
  162. data/spec/imw/utils/has_uri_spec.rb +0 -61
  163. data/spec/imw/utils/paths_spec.rb +0 -10
  164. data/spec/imw/utils/shared_paths_spec.rb +0 -29
  165. data/spec/imw_spec.rb +0 -14
  166. data/spec/rcov.opts +0 -1
  167. data/spec/spec_helper.rb +0 -31
  168. data/spec/support/custom_matchers.rb +0 -28
  169. data/spec/support/file_contents_matcher.rb +0 -30
  170. data/spec/support/paths_matcher.rb +0 -66
  171. data/spec/support/random.rb +0 -213
  172. data/spec/support/without_regard_to_order_matcher.rb +0 -41
@@ -1,153 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Tools::ExtensionAnalyzer do
4
-
5
- before do
6
- class Analyzer
7
- attr_accessor :dir, :resources
8
- include IMW::Tools::ExtensionAnalyzer
9
- def initialize dir
10
- self.dir = File.expand_path(dir)
11
- @resources = IMW.open(self.dir).all_resources
12
- end
13
- def total_size
14
- @total_size ||= resources.map(&:size).inject(0) { |e, sum| sum += e }
15
- end
16
- end
17
- end
18
-
19
- describe 'working with an empty directory' do
20
- before do
21
- @analyzer = Analyzer.new(IMWTest::TMP_DIR)
22
- end
23
-
24
- %w[most_common_extension_by_count most_common_extension_by_size most_common_extension].each do |method|
25
- it "should return 'flat' when asked for its '#{method}'" do
26
- @analyzer.send(method).should == 'flat'
27
- end
28
- end
29
-
30
- %w[extension_counts normalized_extension_counts extension_sizes normalized_extension_sizes].each do |method|
31
- it "should return an empty hash when asked for its '#{method}'" do
32
- @analyzer.send(method).should == {}
33
- end
34
- end
35
- end
36
-
37
- describe 'working with files that lack extensions' do
38
-
39
- before do
40
- @dir = File.join(IMWTest::TMP_DIR, 'ext_dir')
41
- FileUtils.mkdir_p(@dir)
42
-
43
- @f1 = "foobar1"
44
- @f2 = "foobar2"
45
- @f3 = "foobar1"
46
- @files = [@f1, @f2, @f3]
47
-
48
- @files.each do |basename|
49
- IMWTest::Random.file File.join(@dir, basename)
50
- end
51
-
52
- @analyzer = Analyzer.new(IMWTest::TMP_DIR)
53
- end
54
-
55
- %w[most_common_extension_by_count most_common_extension_by_size most_common_extension].each do |method|
56
- it "should return 'flat' when asked for its '#{method}'" do
57
- @analyzer.send(method).should == 'flat'
58
- end
59
- end
60
- end
61
-
62
- describe 'working with a directory of files' do
63
- before do
64
- @dir = File.join(IMWTest::TMP_DIR, 'ext_dir')
65
- FileUtils.mkdir_p(@dir)
66
-
67
- @csv1 = "foobar1.csv"
68
- @csv2 = "foobar2.csv"
69
- @xml = "foobar1.xml"
70
- @txt = "foobar1.txt"
71
- @files = [@csv1, @csv2, @xml, @txt]
72
-
73
- @files.each do |basename|
74
- IMWTest::Random.file File.join(@dir, basename)
75
- end
76
-
77
- def bloat basename
78
- File.open(File.join(@dir, basename), 'a') do |f|
79
- 1000.times do
80
- f.write( 'hello ' * 100)
81
- end
82
- end
83
- end
84
-
85
- @analyzer = Analyzer.new @dir
86
- end
87
-
88
- describe "working with extension counts" do
89
- it "should be able to return counts by extension" do
90
- @analyzer.extension_counts.should == {'xml' => 1, 'txt' => 1, 'csv' => 2 }
91
- end
92
-
93
- it "should be able to return the most common extension by count" do
94
- @analyzer.most_common_extension_by_count.should == 'csv'
95
- end
96
-
97
- it "should be able to calculate extension weighted by number of files" do
98
- @analyzer.normalized_extension_counts.should == { 'csv' => 0.5, 'xml' => 0.25, 'txt' => 0.25 }
99
- end
100
- end
101
-
102
- describe "working with extension sizes" do
103
- it "should be able to calculate extension sizes" do
104
- csv_size = File.size(File.join(@dir, @csv1)) + File.size(File.join(@dir, @csv2))
105
- xml_size = File.size(File.join(@dir, @xml))
106
- txt_size = File.size(File.join(@dir, @txt))
107
- @analyzer.extension_sizes.should == { 'csv' => csv_size, 'xml' => xml_size, 'txt' => txt_size }
108
- end
109
-
110
- it "should be able to return the most common extension by size" do
111
- bloat @txt
112
- @analyzer.most_common_extension_by_size.should == 'txt'
113
- end
114
-
115
- it "should be able to calculate extension sizes" do
116
- csv_size = File.size(File.join(@dir, @csv1)) + File.size(File.join(@dir, @csv2))
117
- xml_size = File.size(File.join(@dir, @xml))
118
- txt_size = File.size(File.join(@dir, @txt))
119
- total_size = csv_size + xml_size + txt_size
120
- @analyzer.normalized_extension_sizes.should == { 'csv' => csv_size.to_f / total_size.to_f, 'xml' => xml_size.to_f / total_size.to_f, 'txt' => txt_size.to_f / total_size.to_f }
121
- end
122
- end
123
-
124
- describe "determining the most common extension" do
125
-
126
- it "should obviously return an extension if it is the most common by count as well as the most common by size" do
127
- bloat @csv1
128
- @analyzer.most_common_extension.should == 'csv'
129
- end
130
-
131
- it "should return the most common extension by count if the count fraction is half or greater and the size fraction is less than half" do
132
- bloat @txt
133
- bloat @xml
134
- @analyzer.most_common_extension.should == 'csv'
135
- end
136
-
137
- it "should return the most common extension by size if the size fraction is half or greater and the count fraction is less than half" do
138
- # need to add an xml file
139
- @new_xml = File.join(@dir, 'xml2.xml')
140
- IMWTest::Random.file(@new_xml)
141
- bloat @txt
142
- @analyzer = Analyzer.new @dir
143
- @analyzer.most_common_extension.should == 'txt'
144
- end
145
-
146
- it "should return the most common extension by size if no other conditions are met" do
147
- bloat @txt
148
- @analyzer.most_common_extension.should == 'txt'
149
- end
150
-
151
- end
152
- end
153
- end
@@ -1,8 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Tools::Summarizer do
4
-
5
-
6
- end
7
-
8
-
@@ -1,195 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Tools::Transferer do
4
- before do
5
- @dir = IMW.open("dir")
6
- @new_dir = IMW.open("new_dir")
7
- @nested = IMW.open('new_dir/nested.txt')
8
- @nested_dir = IMW.open('new_dir/nested')
9
- @local = IMW.open("foobar.txt")
10
- @dest = IMW.open("barbaz.txt")
11
- @http = IMW.open("http://www.google.com")
12
- @hdfs = IMW.open("hdfs:///path/to/foobar.txt")
13
- @s3 = IMW.open("s3://mybucket/foo/bar")
14
- IMWTest::Random.file(@local.path)
15
- end
16
-
17
- it "should raise an error unless the action is one of :cp, :copy, :mv :move, or :mv!" do
18
- @transferer = IMW::Tools::Transferer.new(:cp, @local, @http)
19
- @transferer.action = :cp
20
- @transferer.action = :copy
21
- @transferer.action = :mv
22
- @transferer.action = :mv!
23
- @transferer.action = :move
24
- lambda { @transferer.action = :foobar }.should raise_error(IMW::ArgumentError)
25
- end
26
-
27
- it "should raise an error if the source and the destination have the same URI" do
28
- lambda { IMW::Tools::Transferer.new(:cp, @local, @local) }.should raise_error(IMW::PathError)
29
- end
30
-
31
- it "should print a log message when IMW is verbose" do
32
- IMW.stub!(:verbose).and_return(:true)
33
- IMW.should_receive(:announce_if_verbose).with("Copying #{@local} to #{@dest}")
34
- IMW::Tools::Transferer.new(:cp, @local, @dest).transfer!
35
- end
36
-
37
- describe "transfering local files" do
38
-
39
- before do
40
- IMWTest::Random.file @local.path
41
- @transferer = IMW::Tools::Transferer.new(:cp, @local, @dest)
42
- end
43
-
44
- it "should raise an error if the source doesn't exist" do
45
- @local.rm!
46
- lambda { @transferer.source = @local }.should raise_error(IMW::PathError)
47
- end
48
-
49
- it "should raise an error if the directory of the destination doesn't exist" do
50
- lambda { @transferer.destination = @nested }.should raise_error(IMW::PathError)
51
- end
52
-
53
- it "can copy a local file" do
54
- @transferer.transfer!
55
- @local.exist?.should be_true
56
- @dest.exist?.should be_true
57
- end
58
-
59
- it "can copy a local file to a directory" do
60
- FileUtils.mkdir(@dir.path)
61
- @transferer.destination = @dir
62
- @transferer.transfer!
63
- IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
64
- end
65
-
66
- it "can move a local file" do
67
- @transferer.action = :mv
68
- @transferer.transfer!
69
- @local.exist?.should be_false
70
- @dest.exist?.should be_true
71
- end
72
-
73
- it "can move a local file to a directory" do
74
- FileUtils.mkdir(@dir.path)
75
- @transferer.action = :mv
76
- @transferer.destination = @dir
77
- @transferer.transfer!
78
- IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
79
- @local.exist?.should be_false
80
- end
81
- end
82
-
83
- describe "transfering local directories" do
84
-
85
- before do
86
- IMWTest::Random.directory_with_files @dir.path
87
- @dir = @dir.reopen
88
- end
89
-
90
- it "should raise an error if the source doesn't exist" do
91
- @dir.rm_rf!
92
- lambda { IMW::Tools::Transferer.new(:cp, @dir, @new_dir).transfer! }.should raise_error(IMW::PathError)
93
- end
94
-
95
- it "should raise an error if the directory of the destination doesn't exist" do
96
- lambda { IMW::Tools::Transferer.new(:cp, @dir, @nested_dir).transfer! }.should raise_error(IMW::PathError)
97
- end
98
-
99
- it "can copy a local directory" do
100
- IMW::Tools::Transferer.new(:cp, @dir, @new_dir).transfer!
101
- @dir.exist?.should be_true
102
- @new_dir.exist?.should be_true
103
- end
104
-
105
- it "can move a local directory" do
106
- IMW::Tools::Transferer.new(:mv, @dir, @new_dir).transfer!
107
- @dir.exist?.should be_false
108
- @new_dir.exist?.should be_true
109
- end
110
-
111
- it "can copy a local directory to an existing directory" do
112
- FileUtils.mkdir(@new_dir.path)
113
- IMW::Tools::Transferer.new(:cp, @dir, @nested_dir).transfer!
114
- @dir.exist?.should be_true
115
- @nested_dir.exist?.should be_true
116
- end
117
-
118
- it "can move a local directory to an existing directory" do
119
- FileUtils.mkdir(@new_dir.path)
120
- IMW::Tools::Transferer.new(:mv, @dir, @nested_dir).transfer!
121
- @dir.exist?.should_not be_true
122
- @nested_dir.exist?.should be_true
123
- end
124
- end
125
-
126
-
127
- describe "transferring HTTP files" do
128
- it "can copy a remote file to a local path" do
129
- IMW::Tools::Transferer.new(:cp, @http, @local).transfer!
130
- @local.exist?.should be_true
131
- end
132
- end
133
-
134
- describe "transferring S3 files" do
135
-
136
- it "can copy an S3 file to a local path" do
137
- IMW::Schemes::S3.should_receive(:get).with(@s3, @local)
138
- IMW::Tools::Transferer.new(:cp, @s3, @local).transfer!
139
- end
140
-
141
- it "can copy a local path to an S3 file" do
142
- IMWTest::Random.file @local.path
143
- IMW::Schemes::S3.should_receive(:put).with(@local, @s3)
144
- IMW::Tools::Transferer.new(:cp, @local, @s3).transfer!
145
- end
146
-
147
- it "can copy between S3 files" do
148
- @new_s3 = IMW.open('s3://mybucket/new/path')
149
- IMW::Schemes::S3.should_receive(:copy).with(@s3, @new_s3)
150
- IMW::Tools::Transferer.new(:cp, @s3, @new_s3).transfer!
151
- end
152
- end
153
-
154
- describe "transferring HDFS files" do
155
- before do
156
- IMW::Schemes::HDFS.stub!(:fs)
157
- end
158
-
159
- it "can copy a local file to an HDFS path" do
160
- IMWTest::Random.file @local.path
161
-
162
- IMW::Schemes::HDFS.should_receive(:fs).with(:put, @local.path, @hdfs.path)
163
- IMW::Tools::Transferer.new(:cp, @local, @hdfs).transfer!
164
- end
165
-
166
- it "can copy an HDFS file to a local path" do
167
- IMW::Schemes::HDFS.should_receive(:fs).with(:get, @hdfs.path, @local.path)
168
- IMW::Tools::Transferer.new(:cp, @hdfs, @local).transfer!
169
- end
170
-
171
- it "can copy between HDFS paths" do
172
- @new_hdfs = IMW.open('hdfs:///a/new/path')
173
- IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @new_hdfs.path)
174
- IMW::Tools::Transferer.new(:cp, @hdfs, @new_hdfs).transfer!
175
- end
176
-
177
- it "can move between HDFS paths" do
178
- @new_hdfs = IMW.open('hdfs:///a/new/path')
179
- IMW::Schemes::HDFS.should_receive(:fs).with(:mv, @hdfs.path, @new_hdfs.path)
180
- IMW::Tools::Transferer.new(:mv, @hdfs, @new_hdfs).transfer!
181
- end
182
-
183
- it "can copy from S3 to HDFS" do
184
- IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @s3.s3n_url, @hdfs.path)
185
- IMW::Tools::Transferer.new(:cp, @s3, @hdfs).transfer!
186
- end
187
-
188
- it "can copy from HDFS to S3" do
189
- IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @s3.s3n_url)
190
- IMW::Tools::Transferer.new(:cp, @hdfs, @s3).transfer!
191
- end
192
- end
193
- end
194
-
195
-
@@ -1,69 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe "extending resources with specific modules" do
4
- before do
5
- @class = Class.new
6
- @class.send(:include, IMW::Utils::DynamicallyExtendable)
7
- @instance = @class.new
8
- end
9
-
10
- it "should raise an error when registering a malformed handler" do
11
- lambda { @class.register_handler("Foo", 3) }.should raise_error(IMW::ArgumentError)
12
- end
13
-
14
- it "should store in instances modules they've been extended by" do
15
- @foo = Module.new
16
- @instance.extend(@foo)
17
- @instance.modules.should include(@foo)
18
- end
19
-
20
- describe "evaluating handlers" do
21
- before do
22
- @proccer = Module.new
23
- @class.send(:attr_accessor, :prop)
24
- @class.register_handler(@proccer, Proc.new { |instance| instance.prop })
25
-
26
- @regexper = Module.new
27
- @class.send(:define_method, :to_s) { 'whoa' }
28
- @class.register_handler(@regexper, /whoa/)
29
- end
30
-
31
- it "should extend an instance with a matching proc handler" do
32
- @instance.prop = true
33
- @instance.extend_appropriately!
34
- @instance.modules.should include(@proccer)
35
- end
36
-
37
- it "should not extend an instance with a non-matching proc handler" do
38
- @instance.prop = false
39
- @instance.extend_appropriately!
40
- @instance.modules.should_not include(@proccer)
41
- end
42
-
43
- it "should extend an instance with a matching regexp handler" do
44
- @instance.extend_appropriately!
45
- @instance.modules.should include(@regexper)
46
- end
47
-
48
- it "should not extend an instance with a non-matching regexp handler" do
49
- @class.send(:define_method, :to_s) { 'fowl' }
50
- @instance.extend_appropriately!
51
- @instance.modules.should_not include(@regexper)
52
- end
53
-
54
- it "should not extend an instance with a module it was asked to skip" do
55
- @instance.extend_appropriately!(:skip_modules => [@regexper])
56
- @instance.modules.should_not include(@regexper)
57
- end
58
-
59
- it "should not extend with any modules if asked" do
60
- @instance.extend_appropriately!(:no_modules => true)
61
- @instance.modules.should_not include(@regexper)
62
- end
63
-
64
- it "should use a module if asked to do so even if it's handler didn't match" do
65
- @instance.extend_appropriately!(:use_modules => [@proccer])
66
- @instance.modules.should include(@proccer)
67
- end
68
- end
69
- end
@@ -1,61 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- class Klass
4
- include IMW::Utils::HasURI
5
- end
6
-
7
- def new_obj uri
8
- obj = Klass.new
9
- obj.uri = uri
10
- obj
11
- end
12
-
13
- describe IMW::Utils::HasURI do
14
-
15
- it "local file path" do
16
- obj = new_obj("/home/foo.txt")
17
- obj.stub!(:path).and_return("/home/foo.txt")
18
-
19
- obj.scheme.should be_nil
20
- obj.dirname.should == '/home'
21
- obj.basename.should == 'foo.txt'
22
- obj.extname.should == '.txt'
23
- obj.extension.should == 'txt'
24
- obj.name.should == 'foo'
25
- end
26
-
27
- it "local file path with spaces in the name" do
28
- obj = new_obj("/home/foo bar.txt")
29
- obj.stub!(:path).and_return("/home/foo bar.txt")
30
- obj.name.should == 'foo bar'
31
- end
32
-
33
- it "local file path with an explicit file:// scheme" do
34
- obj = new_obj("file:///home/foo.txt")
35
- obj.scheme.should == 'file'
36
- end
37
-
38
- it "web URL with query and fragment" do
39
- obj = new_obj("http://mysite.com/some/page?param=value#frag")
40
- obj.stub!(:path).and_return("/some/page")
41
- obj.scheme.should == 'http'
42
- obj.dirname.should == '/some'
43
- obj.basename.should == 'page'
44
- obj.extname.should == ''
45
- obj.extension.should == ''
46
- obj.name.should == 'page'
47
- end
48
-
49
- it "should be able to strip URIs" do
50
- new_obj('/path/to/something').stripped_uri.to_s.should == '/path/to/something'
51
- new_obj('http://user:pass@example.com:8080/path/to/some/script.php?param=value#frag').stripped_uri.to_s.should == 'http://user:pass@example.com:8080/path/to/some/script.php'
52
- end
53
-
54
- it "should be able to return raw paths" do
55
- new_obj('s3://bucket/crazy url with # some dumb naming convention').raw_path.should == '/crazy url with # some dumb naming convention'
56
- new_obj('s3://bucket/crazy url with ?some dumb naming convention').raw_path.should == '/crazy url with ?some dumb naming convention'
57
- new_obj('s3://bucket/crazy url with ?some dumb naming #convention').raw_path.should == '/crazy url with ?some dumb naming #convention'
58
- new_obj('s3://bucket/crazy url with #some dumb naming ?convention').raw_path.should == '/crazy url with #some dumb naming ?convention'
59
- end
60
-
61
- end