imw 0.2.18 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. data/Gemfile +7 -26
  2. data/Gemfile.lock +13 -38
  3. data/{LICENSE → LICENSE.txt} +1 -1
  4. data/README.textile +35 -0
  5. data/Rakefile +45 -22
  6. data/VERSION +1 -1
  7. data/examples/foo.rb +19 -0
  8. data/examples/html_selector.rb +22 -0
  9. data/examples/nes_game_list.csv +625 -0
  10. data/examples/nes_gamespot.csv +1371 -0
  11. data/examples/nes_nintendo.csv +624 -0
  12. data/examples/nes_unlicensed.csv +89 -0
  13. data/examples/nes_wikipedia.csv +710 -0
  14. data/examples/nibbler_test.rb +24 -0
  15. data/examples/script.rb +19 -0
  16. data/lib/imw.rb +28 -140
  17. data/lib/imw/error.rb +9 -0
  18. data/lib/imw/recordizer.rb +8 -0
  19. data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
  20. data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
  21. data/lib/imw/resource.rb +3 -119
  22. data/lib/imw/serializer.rb +7 -0
  23. data/lib/imw/serializer/json_serializer.rb +17 -0
  24. data/lib/imw/uri.rb +41 -0
  25. data/spec/resource_spec.rb +78 -0
  26. data/spec/uri_spec.rb +55 -0
  27. metadata +81 -232
  28. data/README.rdoc +0 -371
  29. data/bin/imw +0 -5
  30. data/bin/tsv_to_json.rb +0 -29
  31. data/etc/imwrc.rb +0 -26
  32. data/examples/dataset.rb +0 -12
  33. data/examples/metadata.yml +0 -10
  34. data/lib/imw/archives.rb +0 -120
  35. data/lib/imw/archives/rar.rb +0 -19
  36. data/lib/imw/archives/tar.rb +0 -19
  37. data/lib/imw/archives/tarbz2.rb +0 -73
  38. data/lib/imw/archives/targz.rb +0 -73
  39. data/lib/imw/archives/zip.rb +0 -51
  40. data/lib/imw/boot.rb +0 -87
  41. data/lib/imw/compressed_files.rb +0 -94
  42. data/lib/imw/compressed_files/bz2.rb +0 -16
  43. data/lib/imw/compressed_files/compressible.rb +0 -75
  44. data/lib/imw/compressed_files/gz.rb +0 -16
  45. data/lib/imw/dataset.rb +0 -125
  46. data/lib/imw/dataset/paths.rb +0 -29
  47. data/lib/imw/dataset/workflow.rb +0 -195
  48. data/lib/imw/formats.rb +0 -33
  49. data/lib/imw/formats/delimited.rb +0 -170
  50. data/lib/imw/formats/excel.rb +0 -100
  51. data/lib/imw/formats/json.rb +0 -41
  52. data/lib/imw/formats/pdf.rb +0 -71
  53. data/lib/imw/formats/sgml.rb +0 -69
  54. data/lib/imw/formats/yaml.rb +0 -41
  55. data/lib/imw/metadata.rb +0 -83
  56. data/lib/imw/metadata/contains_metadata.rb +0 -54
  57. data/lib/imw/metadata/dsl.rb +0 -111
  58. data/lib/imw/metadata/field.rb +0 -37
  59. data/lib/imw/metadata/has_metadata.rb +0 -98
  60. data/lib/imw/metadata/has_summary.rb +0 -57
  61. data/lib/imw/metadata/schema.rb +0 -17
  62. data/lib/imw/parsers.rb +0 -8
  63. data/lib/imw/parsers/flat.rb +0 -44
  64. data/lib/imw/parsers/html_parser.rb +0 -387
  65. data/lib/imw/parsers/html_parser/matchers.rb +0 -289
  66. data/lib/imw/parsers/line_parser.rb +0 -87
  67. data/lib/imw/parsers/regexp_parser.rb +0 -72
  68. data/lib/imw/repository.rb +0 -12
  69. data/lib/imw/runner.rb +0 -118
  70. data/lib/imw/schemes.rb +0 -23
  71. data/lib/imw/schemes/ftp.rb +0 -142
  72. data/lib/imw/schemes/hdfs.rb +0 -251
  73. data/lib/imw/schemes/http.rb +0 -165
  74. data/lib/imw/schemes/local.rb +0 -409
  75. data/lib/imw/schemes/remote.rb +0 -119
  76. data/lib/imw/schemes/s3.rb +0 -143
  77. data/lib/imw/schemes/sql.rb +0 -129
  78. data/lib/imw/tools.rb +0 -12
  79. data/lib/imw/tools/aggregator.rb +0 -148
  80. data/lib/imw/tools/archiver.rb +0 -220
  81. data/lib/imw/tools/downloader.rb +0 -63
  82. data/lib/imw/tools/extension_analyzer.rb +0 -114
  83. data/lib/imw/tools/summarizer.rb +0 -83
  84. data/lib/imw/tools/transferer.rb +0 -167
  85. data/lib/imw/utils.rb +0 -74
  86. data/lib/imw/utils/dynamically_extendable.rb +0 -137
  87. data/lib/imw/utils/error.rb +0 -59
  88. data/lib/imw/utils/extensions/hpricot.rb +0 -34
  89. data/lib/imw/utils/has_uri.rb +0 -131
  90. data/lib/imw/utils/log.rb +0 -92
  91. data/lib/imw/utils/misc.rb +0 -57
  92. data/lib/imw/utils/paths.rb +0 -146
  93. data/lib/imw/utils/uri.rb +0 -59
  94. data/lib/imw/utils/uuid.rb +0 -33
  95. data/lib/imw/utils/validate.rb +0 -38
  96. data/lib/imw/utils/version.rb +0 -11
  97. data/spec/data/formats/delimited/sample.csv +0 -131
  98. data/spec/data/formats/delimited/sample.tsv +0 -131
  99. data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
  100. data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
  101. data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
  102. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
  103. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
  104. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
  105. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
  106. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
  107. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
  108. data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
  109. data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
  110. data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
  111. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
  112. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
  113. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
  114. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
  115. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
  116. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
  117. data/spec/data/formats/excel/sample.xls +0 -0
  118. data/spec/data/formats/json/sample.json +0 -1
  119. data/spec/data/formats/none/sample +0 -650
  120. data/spec/data/formats/sgml/sample.xml +0 -617
  121. data/spec/data/formats/text/sample.txt +0 -650
  122. data/spec/data/formats/yaml/sample.yaml +0 -410
  123. data/spec/data/schema-tabular.yaml +0 -11
  124. data/spec/imw/archives/rar_spec.rb +0 -16
  125. data/spec/imw/archives/tar_spec.rb +0 -16
  126. data/spec/imw/archives/tarbz2_spec.rb +0 -24
  127. data/spec/imw/archives/targz_spec.rb +0 -21
  128. data/spec/imw/archives/zip_spec.rb +0 -16
  129. data/spec/imw/archives_spec.rb +0 -77
  130. data/spec/imw/compressed_files/bz2_spec.rb +0 -15
  131. data/spec/imw/compressed_files/compressible_spec.rb +0 -36
  132. data/spec/imw/compressed_files/gz_spec.rb +0 -15
  133. data/spec/imw/compressed_files_spec.rb +0 -47
  134. data/spec/imw/dataset/paths_spec.rb +0 -32
  135. data/spec/imw/dataset/workflow_spec.rb +0 -41
  136. data/spec/imw/formats/delimited_spec.rb +0 -44
  137. data/spec/imw/formats/excel_spec.rb +0 -55
  138. data/spec/imw/formats/json_spec.rb +0 -18
  139. data/spec/imw/formats/sgml_spec.rb +0 -24
  140. data/spec/imw/formats/yaml_spec.rb +0 -19
  141. data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
  142. data/spec/imw/metadata/field_spec.rb +0 -25
  143. data/spec/imw/metadata/has_metadata_spec.rb +0 -58
  144. data/spec/imw/metadata/has_summary_spec.rb +0 -32
  145. data/spec/imw/metadata/schema_spec.rb +0 -24
  146. data/spec/imw/metadata_spec.rb +0 -86
  147. data/spec/imw/parsers/line_parser_spec.rb +0 -96
  148. data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
  149. data/spec/imw/resource_spec.rb +0 -32
  150. data/spec/imw/schemes/hdfs_spec.rb +0 -67
  151. data/spec/imw/schemes/http_spec.rb +0 -19
  152. data/spec/imw/schemes/local_spec.rb +0 -165
  153. data/spec/imw/schemes/remote_spec.rb +0 -38
  154. data/spec/imw/schemes/s3_spec.rb +0 -31
  155. data/spec/imw/schemes/sql_spec.rb +0 -3
  156. data/spec/imw/tools/aggregator_spec.rb +0 -71
  157. data/spec/imw/tools/archiver_spec.rb +0 -120
  158. data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
  159. data/spec/imw/tools/summarizer_spec.rb +0 -8
  160. data/spec/imw/tools/transferer_spec.rb +0 -195
  161. data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
  162. data/spec/imw/utils/has_uri_spec.rb +0 -61
  163. data/spec/imw/utils/paths_spec.rb +0 -10
  164. data/spec/imw/utils/shared_paths_spec.rb +0 -29
  165. data/spec/imw_spec.rb +0 -14
  166. data/spec/rcov.opts +0 -1
  167. data/spec/spec_helper.rb +0 -31
  168. data/spec/support/custom_matchers.rb +0 -28
  169. data/spec/support/file_contents_matcher.rb +0 -30
  170. data/spec/support/paths_matcher.rb +0 -66
  171. data/spec/support/random.rb +0 -213
  172. data/spec/support/without_regard_to_order_matcher.rb +0 -41
@@ -1,24 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../../spec_helper')
2
-
3
- describe IMW::Formats::Xml do
4
- # just spec Xml now as the others are identical
5
-
6
- before do
7
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/sgml/sample.xml'))
8
- end
9
-
10
- it "should be able to load the XML" do
11
- ((@sample.load/"genus").first/"name").first.inner_text.should == 'Mandrillus'
12
- end
13
-
14
- it "should yield the XML when load is given a block" do
15
- @sample.load do |xml|
16
- ((xml/"genus").first/"name").first.inner_text.should == 'Mandrillus'
17
- end
18
- end
19
-
20
- it "should parse the XML" do
21
- @sample.parse(:species => ['species[@id]'])[:species].size.should == 130
22
- end
23
- end
24
-
@@ -1,19 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../../spec_helper')
2
-
3
- describe IMW::Formats::Yaml do
4
-
5
- before do
6
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/yaml/sample.yaml')).load
7
- end
8
-
9
- it "should be able to parse the YAML" do
10
- @sample['Lophocebus'].first[:id].should == 94
11
- end
12
-
13
- it "should be able to write YAML" do
14
- data = { 'foobar' => 3, 'bazbooz' => 4 }
15
- IMW.open!('test.yaml') { |f| f.emit(data) }
16
- IMW.open('test.yaml').load['foobar'].should == 3
17
- end
18
-
19
- end
@@ -1,56 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::ContainsMetadata do
4
-
5
- before do
6
- class Foo
7
- attr_accessor :contents
8
- def path ; IMWTest::TMP_DIR ; end
9
- def basename ; File.basename(IMWTest::TMP_DIR) ; end
10
- include IMW::Metadata::ContainsMetadata
11
- end
12
- @foo = Foo.new
13
- @foo.contents = []
14
- end
15
-
16
- describe 'finding the default metadata URI' do
17
- it "should return the default metadata URI when 'contents' is empty" do
18
- @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
19
- end
20
-
21
- it "should return the default metadata URI when 'contents' doesn't contain any metadata files" do
22
- @foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json'].map { |p| File.join(IMWTest::TMP_DIR, p) }
23
- @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
24
- end
25
-
26
- %w[my-projects.icss.yaml stupid-crazy-fool-of-a-dataset-icss.json foobar-25.metadata.buzz.yml].each do |basename|
27
- it "should return the metadata URI when 'contents' contains a URI matching '#{basename}'" do
28
- @foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json', basename].map { |p| File.join(IMWTest::TMP_DIR, p) }
29
- @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, basename)
30
- end
31
- end
32
- end
33
-
34
-
35
- describe 'returning its metadata' do
36
- it "should return 'nil' when no metadata exists on disk" do
37
- @foo.metadata.should be_nil
38
- end
39
-
40
- it "should return Metadata when metadata exists on disk" do
41
- IMW.open!(@foo.default_metadata_uri) do |f|
42
- f.write <<YAML
43
- ---
44
- foo:
45
- description: bar
46
- fields: baz
47
- YAML
48
- end
49
- @foo.metadata.class.should == IMW::Metadata
50
- @foo.metadata['foo']['description'].should == 'bar'
51
- end
52
- end
53
-
54
-
55
-
56
- end
@@ -1,25 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::Field do
4
-
5
- describe "initializing" do
6
- it "should parse a string into a hash" do
7
- IMW::Metadata::Field.new('foobar').should == { "name" => 'foobar' }
8
- end
9
-
10
- it "should raise an error on a Hash without a :name key" do
11
- lambda { IMW::Metadata::Field.new('foo' => 'bar') }.should raise_error(IMW::ArgumentError)
12
- end
13
-
14
- it "should accept a Hash with a :name key" do
15
- data = { 'name' => :foobar, 'title' => "Bazbooz", 'unit' => "m" }
16
- IMW::Metadata::Field.new(data).should == data
17
- end
18
-
19
- it "should dup a field if given one" do
20
- orig_field = IMW::Metadata::Field.new('foobar')
21
- IMW::Metadata::Field.new(orig_field).should == orig_field
22
- end
23
- end
24
-
25
- end
@@ -1,58 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::HasMetadata do
4
-
5
- before do
6
- class Foo
7
- def uri ; File.join(IMWTest::TMP_DIR, 'test', 'subdir', 'foobar.csv') ; end
8
- def basename ; File.basename(uri) ; end
9
- def extension ; 'csv' ; end
10
- def dir ; IMW.open(File.join(IMWTest::TMP_DIR, 'test', 'subdir')) ; end
11
- include IMW::Metadata::HasMetadata
12
- end
13
- @foo = Foo.new
14
- end
15
-
16
- it "should be able to build a schema" do
17
- @foo.schema.should include(:type, :namespace, :name, :doc, :fields, :non_avro)
18
- end
19
-
20
- describe "finding its metadata" do
21
-
22
- before do
23
- FileUtils.mkdir_p(@foo.dir.path)
24
- IMWTest::Random.file(File.join(@foo.dir.path, 'foobar.csv'))
25
- end
26
-
27
- it "should return 'nil' when it can't find any metadata" do
28
- @foo.metadata.should be_nil
29
- end
30
-
31
- it "should return 'nil' when a metadata file is found that doesn't describe it" do
32
- IMW.open!("has_metadata_test.icss.yaml") do |f|
33
- f.write <<YAML
34
- ---
35
- foobar.csv:
36
- description: bar
37
- fields: ["baz", "booz"]
38
- YAML
39
- end
40
- @foo.metadata.should be_nil
41
- end
42
-
43
- # it "should return the metadata when a metadata file is found that does describe it" do
44
- # IMW.open!("has_metadata_test.icss.yaml") do |f|
45
- # f.write <<YAML
46
- # ---
47
- # #{IMWTest::TMP_DIR}/test/subdir/foobar.csv:
48
- # description: bar
49
- # fields: ["baz", "booz"]
50
- # YAML
51
- # end
52
- # @foo.metadata.class.should == IMW::Metadata
53
- # @foo.metadata[@foo]['description'].should == 'bar'
54
- # end
55
-
56
- end
57
-
58
- end
@@ -1,32 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::HasSummary do
4
-
5
- before do
6
- class Foo
7
- def initialize(*args) ; @args = args ; end
8
- def uri ; File.join(IMWTest::TMP_DIR, *@args) ; end
9
- def basename ; File.basename(uri) ; end
10
- def extension ; File.extname(@args.last || '').gsub(/^\./,'') ; end
11
- include IMW::Metadata::HasSummary
12
- end
13
- @foo = Foo.new('foo', 'bar.csv')
14
- end
15
-
16
- it "should build a summary from an external summary" do
17
- @foo.summary.should include(:uri, :basename, :extension)
18
- end
19
-
20
- it "should build a summary from an external summary and a schema when possible" do
21
- @foo.stub!(:schema).and_return({:foo => 'bar'})
22
- @foo.summary[:schema].should == {:foo => 'bar'}
23
- end
24
-
25
- it "should be able to build an external summary describing how it's situated in the world" do
26
- @foo.summary[:uri].should == File.join(IMWTest::TMP_DIR, 'foo', 'bar.csv')
27
- @foo.summary[:basename].should == 'bar.csv'
28
- @foo.summary[:extension].should == 'csv'
29
- end
30
-
31
- end
32
-
@@ -1,24 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::Schema do
4
-
5
- describe "initializing" do
6
- it "should merge with a Hash" do
7
- IMW::Metadata::Schema.new({:foo => 'foobar'}).should == { :foo => 'foobar' }
8
- end
9
-
10
- it "should merge with a Schema" do
11
- IMW::Metadata::Schema.new(IMW::Metadata::Schema.new({:foo => 'foobar'})).should == { :foo => 'foobar' }
12
- end
13
-
14
- it "should ignore anything else" do
15
- IMW::Metadata::Schema.new('foobar').should == {}
16
- end
17
-
18
- it "should accept empty args" do
19
- IMW::Metadata::Schema.new.should == {}
20
- end
21
-
22
- end
23
-
24
- end
@@ -1,86 +0,0 @@
1
- require File.dirname(__FILE__) + "/../spec_helper"
2
-
3
- describe IMW::Metadata do
4
-
5
- before do
6
- @metadata = IMW::Metadata.new({'foobar' => {'description' => 'buzz', 'fields' => ['a','b', 'c']}, 'http://www.google.com' => { 'description' => 'google', 'fields' => ['d', 'e', 'f'] }})
7
- end
8
-
9
- describe "matching URLs without a base" do
10
-
11
- it "should be able to look up a relative URI literally" do
12
- @metadata.describes?('foobar').should be_true
13
- end
14
-
15
- it "should be able to look up a relative URI when passed an IMW::Resource" do
16
- @metadata.describes?(IMW.open('foobar')).should be_true
17
- end
18
-
19
- it "should be able to look up an absolute URI literally" do
20
- @metadata.describes?('http://www.google.com').should be_true
21
- end
22
-
23
- it "should rightly fail to literally look up a URI it doesn't know about" do
24
- @metadata.describes?('bungler').should be_false
25
- end
26
-
27
- end
28
-
29
- describe "setting URLs" do
30
- describe "without a base URL" do
31
- it "should set 'foobar' to 'foobar'" do
32
- @metadata['foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
33
- @metadata.keys.should include('foobar')
34
- end
35
-
36
- it "should set '/a/b/c/foobar' to '/a/b/c/foobar'" do
37
- @metadata['/a/b/c/foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
38
- @metadata.keys.should include('/a/b/c/foobar')
39
- end
40
-
41
- end
42
-
43
- describe "with a base URL" do
44
- before do
45
- FileUtils.mkdir_p('chimpo')
46
- @metadata.base = File.join(IMWTest::TMP_DIR, 'chimpo')
47
- end
48
-
49
- it "should set 'foobar' to '$base/foobar'" do
50
- @metadata['foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
51
- @metadata.keys.should include(File.join(IMWTest::TMP_DIR, 'chimpo', 'foobar'))
52
- end
53
-
54
- it "should set '/a/b/c/foobar' to '/a/b/c/foobar'" do
55
- @metadata['/a/b/c/foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
56
- @metadata.keys.should include('/a/b/c/foobar')
57
- end
58
-
59
- end
60
- end
61
-
62
- describe "matching URLs with a base" do
63
-
64
- it "should raise an error when trying to use a base URI that doesn't exist" do
65
- lambda { @metadata.base = 'chimpo' }.should raise_error(IMW::PathError)
66
- end
67
-
68
- it "should raise an error when trying to use a base URI that isn't a directory" do
69
- IMW.open!('chimpo') { |f| f.write('a file') }
70
- lambda { @metadata.base = 'chimpo' }.should raise_error(IMW::PathError)
71
- end
72
-
73
- it "should be able to look up a URI relative to its base" do
74
- FileUtils.mkdir_p('chimpo')
75
- @metadata.base = File.join(IMWTest::TMP_DIR, 'chimpo')
76
- @metadata['foobar'] = {'description' => 'buzz', 'fields' => ['a','b', 'c']}
77
- @metadata.describe?('foobar').should be_true
78
- @metadata.describe?(IMW.open('foobar')).should be_true
79
- end
80
-
81
- it "should continue to be able to look up an absolute URI literally" do
82
- @metadata.describes?('http://www.google.com').should be_true
83
- end
84
-
85
- end
86
- end
@@ -1,96 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
- require 'ostruct'
3
-
4
- describe IMW::Parsers::LineParser do
5
-
6
- before do
7
- @path = File.join(IMWTest::DATA_DIR, 'formats/delimited/sample.csv')
8
- @file = File.new(@path)
9
- @fields = [:id, :name, :genus, :species]
10
- end
11
-
12
- describe "without an implemented parsing method" do
13
-
14
- before do
15
- @parser = IMW::Parsers::LineParser.new
16
- end
17
-
18
- it "should raise an error when attempting to parse a line" do
19
- lambda { @parser.parse_line "wahtever" }.should raise_error(IMW::NotImplementedError)
20
- end
21
-
22
- end
23
-
24
- describe "with an implemented parsing method" do
25
-
26
- before do
27
-
28
- @parser_class = Class.new(IMW::Parsers::LineParser)
29
- @parser_class.class_eval do
30
- def parse_line line
31
- id, name, genus, species = line.chomp.split(',')
32
- { :id => id, :name => name, :genus => genus, :species => species }
33
- end
34
- end
35
-
36
- @parser = @parser_class.new
37
- end
38
-
39
- it "should skip lines as needed" do
40
- @parser.skip_first = 1
41
- results = @parser.parse!(@file)
42
- results.length.should == 130
43
- end
44
-
45
- it "should read as many lines as it's asked" do
46
- results = @parser.parse!(@file, :lines => 10)
47
- results.length.should == 10
48
- end
49
-
50
- describe "when parsing into hashes" do
51
-
52
- it "should return an array of hashes when called without a block" do
53
- results = @parser.parse!(@file)
54
- results.length.should == 131
55
- results.first.should == { :id => "ID", :name => "Name", :genus => "Genus", :species => "Species" }
56
- end
57
-
58
- it "should pass each hash to a block when given one" do
59
- results = [].tap do |array|
60
- @parser.parse!(@file) do |hsh|
61
- hsh.delete(:id)
62
- array << hsh
63
- end
64
- end
65
- results.length.should == 131
66
- results.first.should == { :name => "Name", :genus => "Genus", :species => "Species" }
67
- end
68
- end
69
-
70
- describe "when parsing into objects" do
71
- before { @parser.klass = OpenStruct }
72
-
73
- it "should return an array of objects when defined with a class" do
74
- results = @parser.parse!(@file)
75
- results.length.should == 131
76
- results.first.class.should == OpenStruct
77
- end
78
-
79
- it "should pass each object to a block when given one and defined with a class" do
80
- @parser.klass = OpenStruct
81
- results = [].tap do |array|
82
- @parser.parse!(@file) do |obj|
83
- obj.genus = nil
84
- array << obj
85
- end
86
- end
87
- results.length.should == 131
88
- results.first.class.should == OpenStruct
89
- results.first.genus.should be_blank
90
- end
91
- end
92
- end
93
- end
94
-
95
-
96
-
@@ -1,42 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
- require 'ostruct'
3
-
4
- describe IMW::Parsers::RegexpParser do
5
-
6
- before do
7
- @path = "foobar.dat"
8
- @text = <<EOF
9
- 151.199.53.145 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/main.php HTTP/1.0
10
- 81.227.179.120 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/libraries/select_lang.lib.php HTTP/1.0
11
- 81.3.107.173 14-Oct-2007:13:54:26-0500 GET / HTTP/1.1
12
- EOF
13
- File.open(@path, 'w') { |f| f.write(@text) }
14
- @file = File.new(@path)
15
-
16
- @regexp = %r{^([\d\.]+) (\d{2}-\w{3}-\d{4}:\d{2}:\d{2}:\d{2}-\d{4}) (\w+) ([^\s]+) HTTP/([\d.]{3})$}
17
- @fields = [:ip, :timestamp, :verb, :url, :version]
18
-
19
- @parser = IMW::Parsers::RegexpParser.new :by_regexp => @regexp, :into_fields => @fields
20
- end
21
-
22
- describe "parsing a line which matches its regexp" do
23
- it "should return an appropriate hash" do
24
- @parser.parse_line(@file.readline).should == {:ip => '151.199.53.145', :timestamp => '14-Oct-2007:13:34:34-0500', :verb => 'GET', :url => '/phpmyadmin/main.php', :version => "1.0"}
25
- end
26
- end
27
-
28
- describe "parsing a line which doesn't match its regexp" do
29
- before { @parser.regexp = /foobar/ }
30
-
31
- it "return an empty hash if not parsing strictly" do
32
- @parser.parse_line(@file.readline).should == {}
33
- end
34
-
35
- it "should raise an error if parsing strictly" do
36
- @parser.strict = true
37
- lambda { @parser.parse_line(@file.readline) }.should raise_error IMW::ParseError
38
- end
39
- end
40
- end
41
-
42
-