imw 0.2.18 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. data/Gemfile +7 -26
  2. data/Gemfile.lock +13 -38
  3. data/{LICENSE → LICENSE.txt} +1 -1
  4. data/README.textile +35 -0
  5. data/Rakefile +45 -22
  6. data/VERSION +1 -1
  7. data/examples/foo.rb +19 -0
  8. data/examples/html_selector.rb +22 -0
  9. data/examples/nes_game_list.csv +625 -0
  10. data/examples/nes_gamespot.csv +1371 -0
  11. data/examples/nes_nintendo.csv +624 -0
  12. data/examples/nes_unlicensed.csv +89 -0
  13. data/examples/nes_wikipedia.csv +710 -0
  14. data/examples/nibbler_test.rb +24 -0
  15. data/examples/script.rb +19 -0
  16. data/lib/imw.rb +28 -140
  17. data/lib/imw/error.rb +9 -0
  18. data/lib/imw/recordizer.rb +8 -0
  19. data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
  20. data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
  21. data/lib/imw/resource.rb +3 -119
  22. data/lib/imw/serializer.rb +7 -0
  23. data/lib/imw/serializer/json_serializer.rb +17 -0
  24. data/lib/imw/uri.rb +41 -0
  25. data/spec/resource_spec.rb +78 -0
  26. data/spec/uri_spec.rb +55 -0
  27. metadata +81 -232
  28. data/README.rdoc +0 -371
  29. data/bin/imw +0 -5
  30. data/bin/tsv_to_json.rb +0 -29
  31. data/etc/imwrc.rb +0 -26
  32. data/examples/dataset.rb +0 -12
  33. data/examples/metadata.yml +0 -10
  34. data/lib/imw/archives.rb +0 -120
  35. data/lib/imw/archives/rar.rb +0 -19
  36. data/lib/imw/archives/tar.rb +0 -19
  37. data/lib/imw/archives/tarbz2.rb +0 -73
  38. data/lib/imw/archives/targz.rb +0 -73
  39. data/lib/imw/archives/zip.rb +0 -51
  40. data/lib/imw/boot.rb +0 -87
  41. data/lib/imw/compressed_files.rb +0 -94
  42. data/lib/imw/compressed_files/bz2.rb +0 -16
  43. data/lib/imw/compressed_files/compressible.rb +0 -75
  44. data/lib/imw/compressed_files/gz.rb +0 -16
  45. data/lib/imw/dataset.rb +0 -125
  46. data/lib/imw/dataset/paths.rb +0 -29
  47. data/lib/imw/dataset/workflow.rb +0 -195
  48. data/lib/imw/formats.rb +0 -33
  49. data/lib/imw/formats/delimited.rb +0 -170
  50. data/lib/imw/formats/excel.rb +0 -100
  51. data/lib/imw/formats/json.rb +0 -41
  52. data/lib/imw/formats/pdf.rb +0 -71
  53. data/lib/imw/formats/sgml.rb +0 -69
  54. data/lib/imw/formats/yaml.rb +0 -41
  55. data/lib/imw/metadata.rb +0 -83
  56. data/lib/imw/metadata/contains_metadata.rb +0 -54
  57. data/lib/imw/metadata/dsl.rb +0 -111
  58. data/lib/imw/metadata/field.rb +0 -37
  59. data/lib/imw/metadata/has_metadata.rb +0 -98
  60. data/lib/imw/metadata/has_summary.rb +0 -57
  61. data/lib/imw/metadata/schema.rb +0 -17
  62. data/lib/imw/parsers.rb +0 -8
  63. data/lib/imw/parsers/flat.rb +0 -44
  64. data/lib/imw/parsers/html_parser.rb +0 -387
  65. data/lib/imw/parsers/html_parser/matchers.rb +0 -289
  66. data/lib/imw/parsers/line_parser.rb +0 -87
  67. data/lib/imw/parsers/regexp_parser.rb +0 -72
  68. data/lib/imw/repository.rb +0 -12
  69. data/lib/imw/runner.rb +0 -118
  70. data/lib/imw/schemes.rb +0 -23
  71. data/lib/imw/schemes/ftp.rb +0 -142
  72. data/lib/imw/schemes/hdfs.rb +0 -251
  73. data/lib/imw/schemes/http.rb +0 -165
  74. data/lib/imw/schemes/local.rb +0 -409
  75. data/lib/imw/schemes/remote.rb +0 -119
  76. data/lib/imw/schemes/s3.rb +0 -143
  77. data/lib/imw/schemes/sql.rb +0 -129
  78. data/lib/imw/tools.rb +0 -12
  79. data/lib/imw/tools/aggregator.rb +0 -148
  80. data/lib/imw/tools/archiver.rb +0 -220
  81. data/lib/imw/tools/downloader.rb +0 -63
  82. data/lib/imw/tools/extension_analyzer.rb +0 -114
  83. data/lib/imw/tools/summarizer.rb +0 -83
  84. data/lib/imw/tools/transferer.rb +0 -167
  85. data/lib/imw/utils.rb +0 -74
  86. data/lib/imw/utils/dynamically_extendable.rb +0 -137
  87. data/lib/imw/utils/error.rb +0 -59
  88. data/lib/imw/utils/extensions/hpricot.rb +0 -34
  89. data/lib/imw/utils/has_uri.rb +0 -131
  90. data/lib/imw/utils/log.rb +0 -92
  91. data/lib/imw/utils/misc.rb +0 -57
  92. data/lib/imw/utils/paths.rb +0 -146
  93. data/lib/imw/utils/uri.rb +0 -59
  94. data/lib/imw/utils/uuid.rb +0 -33
  95. data/lib/imw/utils/validate.rb +0 -38
  96. data/lib/imw/utils/version.rb +0 -11
  97. data/spec/data/formats/delimited/sample.csv +0 -131
  98. data/spec/data/formats/delimited/sample.tsv +0 -131
  99. data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
  100. data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
  101. data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
  102. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
  103. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
  104. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
  105. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
  106. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
  107. data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
  108. data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
  109. data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
  110. data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
  111. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
  112. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
  113. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
  114. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
  115. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
  116. data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
  117. data/spec/data/formats/excel/sample.xls +0 -0
  118. data/spec/data/formats/json/sample.json +0 -1
  119. data/spec/data/formats/none/sample +0 -650
  120. data/spec/data/formats/sgml/sample.xml +0 -617
  121. data/spec/data/formats/text/sample.txt +0 -650
  122. data/spec/data/formats/yaml/sample.yaml +0 -410
  123. data/spec/data/schema-tabular.yaml +0 -11
  124. data/spec/imw/archives/rar_spec.rb +0 -16
  125. data/spec/imw/archives/tar_spec.rb +0 -16
  126. data/spec/imw/archives/tarbz2_spec.rb +0 -24
  127. data/spec/imw/archives/targz_spec.rb +0 -21
  128. data/spec/imw/archives/zip_spec.rb +0 -16
  129. data/spec/imw/archives_spec.rb +0 -77
  130. data/spec/imw/compressed_files/bz2_spec.rb +0 -15
  131. data/spec/imw/compressed_files/compressible_spec.rb +0 -36
  132. data/spec/imw/compressed_files/gz_spec.rb +0 -15
  133. data/spec/imw/compressed_files_spec.rb +0 -47
  134. data/spec/imw/dataset/paths_spec.rb +0 -32
  135. data/spec/imw/dataset/workflow_spec.rb +0 -41
  136. data/spec/imw/formats/delimited_spec.rb +0 -44
  137. data/spec/imw/formats/excel_spec.rb +0 -55
  138. data/spec/imw/formats/json_spec.rb +0 -18
  139. data/spec/imw/formats/sgml_spec.rb +0 -24
  140. data/spec/imw/formats/yaml_spec.rb +0 -19
  141. data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
  142. data/spec/imw/metadata/field_spec.rb +0 -25
  143. data/spec/imw/metadata/has_metadata_spec.rb +0 -58
  144. data/spec/imw/metadata/has_summary_spec.rb +0 -32
  145. data/spec/imw/metadata/schema_spec.rb +0 -24
  146. data/spec/imw/metadata_spec.rb +0 -86
  147. data/spec/imw/parsers/line_parser_spec.rb +0 -96
  148. data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
  149. data/spec/imw/resource_spec.rb +0 -32
  150. data/spec/imw/schemes/hdfs_spec.rb +0 -67
  151. data/spec/imw/schemes/http_spec.rb +0 -19
  152. data/spec/imw/schemes/local_spec.rb +0 -165
  153. data/spec/imw/schemes/remote_spec.rb +0 -38
  154. data/spec/imw/schemes/s3_spec.rb +0 -31
  155. data/spec/imw/schemes/sql_spec.rb +0 -3
  156. data/spec/imw/tools/aggregator_spec.rb +0 -71
  157. data/spec/imw/tools/archiver_spec.rb +0 -120
  158. data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
  159. data/spec/imw/tools/summarizer_spec.rb +0 -8
  160. data/spec/imw/tools/transferer_spec.rb +0 -195
  161. data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
  162. data/spec/imw/utils/has_uri_spec.rb +0 -61
  163. data/spec/imw/utils/paths_spec.rb +0 -10
  164. data/spec/imw/utils/shared_paths_spec.rb +0 -29
  165. data/spec/imw_spec.rb +0 -14
  166. data/spec/rcov.opts +0 -1
  167. data/spec/spec_helper.rb +0 -31
  168. data/spec/support/custom_matchers.rb +0 -28
  169. data/spec/support/file_contents_matcher.rb +0 -30
  170. data/spec/support/paths_matcher.rb +0 -66
  171. data/spec/support/random.rb +0 -213
  172. data/spec/support/without_regard_to_order_matcher.rb +0 -41
@@ -1,24 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../../spec_helper')
2
-
3
- describe IMW::Formats::Xml do
4
- # just spec Xml now as the others are identical
5
-
6
- before do
7
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/sgml/sample.xml'))
8
- end
9
-
10
- it "should be able to load the XML" do
11
- ((@sample.load/"genus").first/"name").first.inner_text.should == 'Mandrillus'
12
- end
13
-
14
- it "should yield the XML when load is given a block" do
15
- @sample.load do |xml|
16
- ((xml/"genus").first/"name").first.inner_text.should == 'Mandrillus'
17
- end
18
- end
19
-
20
- it "should parse the XML" do
21
- @sample.parse(:species => ['species[@id]'])[:species].size.should == 130
22
- end
23
- end
24
-
@@ -1,19 +0,0 @@
1
- require File.join(File.dirname(__FILE__),'../../spec_helper')
2
-
3
- describe IMW::Formats::Yaml do
4
-
5
- before do
6
- @sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/yaml/sample.yaml')).load
7
- end
8
-
9
- it "should be able to parse the YAML" do
10
- @sample['Lophocebus'].first[:id].should == 94
11
- end
12
-
13
- it "should be able to write YAML" do
14
- data = { 'foobar' => 3, 'bazbooz' => 4 }
15
- IMW.open!('test.yaml') { |f| f.emit(data) }
16
- IMW.open('test.yaml').load['foobar'].should == 3
17
- end
18
-
19
- end
@@ -1,56 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::ContainsMetadata do
4
-
5
- before do
6
- class Foo
7
- attr_accessor :contents
8
- def path ; IMWTest::TMP_DIR ; end
9
- def basename ; File.basename(IMWTest::TMP_DIR) ; end
10
- include IMW::Metadata::ContainsMetadata
11
- end
12
- @foo = Foo.new
13
- @foo.contents = []
14
- end
15
-
16
- describe 'finding the default metadata URI' do
17
- it "should return the default metadata URI when 'contents' is empty" do
18
- @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
19
- end
20
-
21
- it "should return the default metadata URI when 'contents' doesn't contain any metadata files" do
22
- @foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json'].map { |p| File.join(IMWTest::TMP_DIR, p) }
23
- @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
24
- end
25
-
26
- %w[my-projects.icss.yaml stupid-crazy-fool-of-a-dataset-icss.json foobar-25.metadata.buzz.yml].each do |basename|
27
- it "should return the metadata URI when 'contents' contains a URI matching '#{basename}'" do
28
- @foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json', basename].map { |p| File.join(IMWTest::TMP_DIR, p) }
29
- @foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, basename)
30
- end
31
- end
32
- end
33
-
34
-
35
- describe 'returning its metadata' do
36
- it "should return 'nil' when no metadata exists on disk" do
37
- @foo.metadata.should be_nil
38
- end
39
-
40
- it "should return Metadata when metadata exists on disk" do
41
- IMW.open!(@foo.default_metadata_uri) do |f|
42
- f.write <<YAML
43
- ---
44
- foo:
45
- description: bar
46
- fields: baz
47
- YAML
48
- end
49
- @foo.metadata.class.should == IMW::Metadata
50
- @foo.metadata['foo']['description'].should == 'bar'
51
- end
52
- end
53
-
54
-
55
-
56
- end
@@ -1,25 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::Field do
4
-
5
- describe "initializing" do
6
- it "should parse a string into a hash" do
7
- IMW::Metadata::Field.new('foobar').should == { "name" => 'foobar' }
8
- end
9
-
10
- it "should raise an error on a Hash without a :name key" do
11
- lambda { IMW::Metadata::Field.new('foo' => 'bar') }.should raise_error(IMW::ArgumentError)
12
- end
13
-
14
- it "should accept a Hash with a :name key" do
15
- data = { 'name' => :foobar, 'title' => "Bazbooz", 'unit' => "m" }
16
- IMW::Metadata::Field.new(data).should == data
17
- end
18
-
19
- it "should dup a field if given one" do
20
- orig_field = IMW::Metadata::Field.new('foobar')
21
- IMW::Metadata::Field.new(orig_field).should == orig_field
22
- end
23
- end
24
-
25
- end
@@ -1,58 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::HasMetadata do
4
-
5
- before do
6
- class Foo
7
- def uri ; File.join(IMWTest::TMP_DIR, 'test', 'subdir', 'foobar.csv') ; end
8
- def basename ; File.basename(uri) ; end
9
- def extension ; 'csv' ; end
10
- def dir ; IMW.open(File.join(IMWTest::TMP_DIR, 'test', 'subdir')) ; end
11
- include IMW::Metadata::HasMetadata
12
- end
13
- @foo = Foo.new
14
- end
15
-
16
- it "should be able to build a schema" do
17
- @foo.schema.should include(:type, :namespace, :name, :doc, :fields, :non_avro)
18
- end
19
-
20
- describe "finding its metadata" do
21
-
22
- before do
23
- FileUtils.mkdir_p(@foo.dir.path)
24
- IMWTest::Random.file(File.join(@foo.dir.path, 'foobar.csv'))
25
- end
26
-
27
- it "should return 'nil' when it can't find any metadata" do
28
- @foo.metadata.should be_nil
29
- end
30
-
31
- it "should return 'nil' when a metadata file is found that doesn't describe it" do
32
- IMW.open!("has_metadata_test.icss.yaml") do |f|
33
- f.write <<YAML
34
- ---
35
- foobar.csv:
36
- description: bar
37
- fields: ["baz", "booz"]
38
- YAML
39
- end
40
- @foo.metadata.should be_nil
41
- end
42
-
43
- # it "should return the metadata when a metadata file is found that does describe it" do
44
- # IMW.open!("has_metadata_test.icss.yaml") do |f|
45
- # f.write <<YAML
46
- # ---
47
- # #{IMWTest::TMP_DIR}/test/subdir/foobar.csv:
48
- # description: bar
49
- # fields: ["baz", "booz"]
50
- # YAML
51
- # end
52
- # @foo.metadata.class.should == IMW::Metadata
53
- # @foo.metadata[@foo]['description'].should == 'bar'
54
- # end
55
-
56
- end
57
-
58
- end
@@ -1,32 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::HasSummary do
4
-
5
- before do
6
- class Foo
7
- def initialize(*args) ; @args = args ; end
8
- def uri ; File.join(IMWTest::TMP_DIR, *@args) ; end
9
- def basename ; File.basename(uri) ; end
10
- def extension ; File.extname(@args.last || '').gsub(/^\./,'') ; end
11
- include IMW::Metadata::HasSummary
12
- end
13
- @foo = Foo.new('foo', 'bar.csv')
14
- end
15
-
16
- it "should build a summary from an external summary" do
17
- @foo.summary.should include(:uri, :basename, :extension)
18
- end
19
-
20
- it "should build a summary from an external summary and a schema when possible" do
21
- @foo.stub!(:schema).and_return({:foo => 'bar'})
22
- @foo.summary[:schema].should == {:foo => 'bar'}
23
- end
24
-
25
- it "should be able to build an external summary describing how it's situated in the world" do
26
- @foo.summary[:uri].should == File.join(IMWTest::TMP_DIR, 'foo', 'bar.csv')
27
- @foo.summary[:basename].should == 'bar.csv'
28
- @foo.summary[:extension].should == 'csv'
29
- end
30
-
31
- end
32
-
@@ -1,24 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Metadata::Schema do
4
-
5
- describe "initializing" do
6
- it "should merge with a Hash" do
7
- IMW::Metadata::Schema.new({:foo => 'foobar'}).should == { :foo => 'foobar' }
8
- end
9
-
10
- it "should merge with a Schema" do
11
- IMW::Metadata::Schema.new(IMW::Metadata::Schema.new({:foo => 'foobar'})).should == { :foo => 'foobar' }
12
- end
13
-
14
- it "should ignore anything else" do
15
- IMW::Metadata::Schema.new('foobar').should == {}
16
- end
17
-
18
- it "should accept empty args" do
19
- IMW::Metadata::Schema.new.should == {}
20
- end
21
-
22
- end
23
-
24
- end
@@ -1,86 +0,0 @@
1
- require File.dirname(__FILE__) + "/../spec_helper"
2
-
3
- describe IMW::Metadata do
4
-
5
- before do
6
- @metadata = IMW::Metadata.new({'foobar' => {'description' => 'buzz', 'fields' => ['a','b', 'c']}, 'http://www.google.com' => { 'description' => 'google', 'fields' => ['d', 'e', 'f'] }})
7
- end
8
-
9
- describe "matching URLs without a base" do
10
-
11
- it "should be able to look up a relative URI literally" do
12
- @metadata.describes?('foobar').should be_true
13
- end
14
-
15
- it "should be able to look up a relative URI when passed an IMW::Resource" do
16
- @metadata.describes?(IMW.open('foobar')).should be_true
17
- end
18
-
19
- it "should be able to look up an absolute URI literally" do
20
- @metadata.describes?('http://www.google.com').should be_true
21
- end
22
-
23
- it "should rightly fail to literally look up a URI it doesn't know about" do
24
- @metadata.describes?('bungler').should be_false
25
- end
26
-
27
- end
28
-
29
- describe "setting URLs" do
30
- describe "without a base URL" do
31
- it "should set 'foobar' to 'foobar'" do
32
- @metadata['foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
33
- @metadata.keys.should include('foobar')
34
- end
35
-
36
- it "should set '/a/b/c/foobar' to '/a/b/c/foobar'" do
37
- @metadata['/a/b/c/foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
38
- @metadata.keys.should include('/a/b/c/foobar')
39
- end
40
-
41
- end
42
-
43
- describe "with a base URL" do
44
- before do
45
- FileUtils.mkdir_p('chimpo')
46
- @metadata.base = File.join(IMWTest::TMP_DIR, 'chimpo')
47
- end
48
-
49
- it "should set 'foobar' to '$base/foobar'" do
50
- @metadata['foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
51
- @metadata.keys.should include(File.join(IMWTest::TMP_DIR, 'chimpo', 'foobar'))
52
- end
53
-
54
- it "should set '/a/b/c/foobar' to '/a/b/c/foobar'" do
55
- @metadata['/a/b/c/foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
56
- @metadata.keys.should include('/a/b/c/foobar')
57
- end
58
-
59
- end
60
- end
61
-
62
- describe "matching URLs with a base" do
63
-
64
- it "should raise an error when trying to use a base URI that doesn't exist" do
65
- lambda { @metadata.base = 'chimpo' }.should raise_error(IMW::PathError)
66
- end
67
-
68
- it "should raise an error when trying to use a base URI that isn't a directory" do
69
- IMW.open!('chimpo') { |f| f.write('a file') }
70
- lambda { @metadata.base = 'chimpo' }.should raise_error(IMW::PathError)
71
- end
72
-
73
- it "should be able to look up a URI relative to its base" do
74
- FileUtils.mkdir_p('chimpo')
75
- @metadata.base = File.join(IMWTest::TMP_DIR, 'chimpo')
76
- @metadata['foobar'] = {'description' => 'buzz', 'fields' => ['a','b', 'c']}
77
- @metadata.describe?('foobar').should be_true
78
- @metadata.describe?(IMW.open('foobar')).should be_true
79
- end
80
-
81
- it "should continue to be able to look up an absolute URI literally" do
82
- @metadata.describes?('http://www.google.com').should be_true
83
- end
84
-
85
- end
86
- end
@@ -1,96 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
- require 'ostruct'
3
-
4
- describe IMW::Parsers::LineParser do
5
-
6
- before do
7
- @path = File.join(IMWTest::DATA_DIR, 'formats/delimited/sample.csv')
8
- @file = File.new(@path)
9
- @fields = [:id, :name, :genus, :species]
10
- end
11
-
12
- describe "without an implemented parsing method" do
13
-
14
- before do
15
- @parser = IMW::Parsers::LineParser.new
16
- end
17
-
18
- it "should raise an error when attempting to parse a line" do
19
- lambda { @parser.parse_line "wahtever" }.should raise_error(IMW::NotImplementedError)
20
- end
21
-
22
- end
23
-
24
- describe "with an implemented parsing method" do
25
-
26
- before do
27
-
28
- @parser_class = Class.new(IMW::Parsers::LineParser)
29
- @parser_class.class_eval do
30
- def parse_line line
31
- id, name, genus, species = line.chomp.split(',')
32
- { :id => id, :name => name, :genus => genus, :species => species }
33
- end
34
- end
35
-
36
- @parser = @parser_class.new
37
- end
38
-
39
- it "should skip lines as needed" do
40
- @parser.skip_first = 1
41
- results = @parser.parse!(@file)
42
- results.length.should == 130
43
- end
44
-
45
- it "should read as many lines as it's asked" do
46
- results = @parser.parse!(@file, :lines => 10)
47
- results.length.should == 10
48
- end
49
-
50
- describe "when parsing into hashes" do
51
-
52
- it "should return an array of hashes when called without a block" do
53
- results = @parser.parse!(@file)
54
- results.length.should == 131
55
- results.first.should == { :id => "ID", :name => "Name", :genus => "Genus", :species => "Species" }
56
- end
57
-
58
- it "should pass each hash to a block when given one" do
59
- results = [].tap do |array|
60
- @parser.parse!(@file) do |hsh|
61
- hsh.delete(:id)
62
- array << hsh
63
- end
64
- end
65
- results.length.should == 131
66
- results.first.should == { :name => "Name", :genus => "Genus", :species => "Species" }
67
- end
68
- end
69
-
70
- describe "when parsing into objects" do
71
- before { @parser.klass = OpenStruct }
72
-
73
- it "should return an array of objects when defined with a class" do
74
- results = @parser.parse!(@file)
75
- results.length.should == 131
76
- results.first.class.should == OpenStruct
77
- end
78
-
79
- it "should pass each object to a block when given one and defined with a class" do
80
- @parser.klass = OpenStruct
81
- results = [].tap do |array|
82
- @parser.parse!(@file) do |obj|
83
- obj.genus = nil
84
- array << obj
85
- end
86
- end
87
- results.length.should == 131
88
- results.first.class.should == OpenStruct
89
- results.first.genus.should be_blank
90
- end
91
- end
92
- end
93
- end
94
-
95
-
96
-
@@ -1,42 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
- require 'ostruct'
3
-
4
- describe IMW::Parsers::RegexpParser do
5
-
6
- before do
7
- @path = "foobar.dat"
8
- @text = <<EOF
9
- 151.199.53.145 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/main.php HTTP/1.0
10
- 81.227.179.120 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/libraries/select_lang.lib.php HTTP/1.0
11
- 81.3.107.173 14-Oct-2007:13:54:26-0500 GET / HTTP/1.1
12
- EOF
13
- File.open(@path, 'w') { |f| f.write(@text) }
14
- @file = File.new(@path)
15
-
16
- @regexp = %r{^([\d\.]+) (\d{2}-\w{3}-\d{4}:\d{2}:\d{2}:\d{2}-\d{4}) (\w+) ([^\s]+) HTTP/([\d.]{3})$}
17
- @fields = [:ip, :timestamp, :verb, :url, :version]
18
-
19
- @parser = IMW::Parsers::RegexpParser.new :by_regexp => @regexp, :into_fields => @fields
20
- end
21
-
22
- describe "parsing a line which matches its regexp" do
23
- it "should return an appropriate hash" do
24
- @parser.parse_line(@file.readline).should == {:ip => '151.199.53.145', :timestamp => '14-Oct-2007:13:34:34-0500', :verb => 'GET', :url => '/phpmyadmin/main.php', :version => "1.0"}
25
- end
26
- end
27
-
28
- describe "parsing a line which doesn't match its regexp" do
29
- before { @parser.regexp = /foobar/ }
30
-
31
- it "return an empty hash if not parsing strictly" do
32
- @parser.parse_line(@file.readline).should == {}
33
- end
34
-
35
- it "should raise an error if parsing strictly" do
36
- @parser.strict = true
37
- lambda { @parser.parse_line(@file.readline) }.should raise_error IMW::ParseError
38
- end
39
- end
40
- end
41
-
42
-