imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Formats::Xml do
|
|
4
|
-
# just spec Xml now as the others are identical
|
|
5
|
-
|
|
6
|
-
before do
|
|
7
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/sgml/sample.xml'))
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should be able to load the XML" do
|
|
11
|
-
((@sample.load/"genus").first/"name").first.inner_text.should == 'Mandrillus'
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it "should yield the XML when load is given a block" do
|
|
15
|
-
@sample.load do |xml|
|
|
16
|
-
((xml/"genus").first/"name").first.inner_text.should == 'Mandrillus'
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
it "should parse the XML" do
|
|
21
|
-
@sample.parse(:species => ['species[@id]'])[:species].size.should == 130
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
|
|
3
|
-
describe IMW::Formats::Yaml do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
@sample = IMW.open(File.join(IMWTest::DATA_DIR, 'formats/yaml/sample.yaml')).load
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
it "should be able to parse the YAML" do
|
|
10
|
-
@sample['Lophocebus'].first[:id].should == 94
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
it "should be able to write YAML" do
|
|
14
|
-
data = { 'foobar' => 3, 'bazbooz' => 4 }
|
|
15
|
-
IMW.open!('test.yaml') { |f| f.emit(data) }
|
|
16
|
-
IMW.open('test.yaml').load['foobar'].should == 3
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
end
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Metadata::ContainsMetadata do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
class Foo
|
|
7
|
-
attr_accessor :contents
|
|
8
|
-
def path ; IMWTest::TMP_DIR ; end
|
|
9
|
-
def basename ; File.basename(IMWTest::TMP_DIR) ; end
|
|
10
|
-
include IMW::Metadata::ContainsMetadata
|
|
11
|
-
end
|
|
12
|
-
@foo = Foo.new
|
|
13
|
-
@foo.contents = []
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
describe 'finding the default metadata URI' do
|
|
17
|
-
it "should return the default metadata URI when 'contents' is empty" do
|
|
18
|
-
@foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it "should return the default metadata URI when 'contents' doesn't contain any metadata files" do
|
|
22
|
-
@foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json'].map { |p| File.join(IMWTest::TMP_DIR, p) }
|
|
23
|
-
@foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, File.basename(IMWTest::TMP_DIR) + ".icss.yaml")
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
%w[my-projects.icss.yaml stupid-crazy-fool-of-a-dataset-icss.json foobar-25.metadata.buzz.yml].each do |basename|
|
|
27
|
-
it "should return the metadata URI when 'contents' contains a URI matching '#{basename}'" do
|
|
28
|
-
@foo.contents.concat ['bar.txt', 'crazy_file.yaml', 'foo.json', basename].map { |p| File.join(IMWTest::TMP_DIR, p) }
|
|
29
|
-
@foo.default_metadata_uri.should == File.join(IMWTest::TMP_DIR, basename)
|
|
30
|
-
end
|
|
31
|
-
end
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
describe 'returning its metadata' do
|
|
36
|
-
it "should return 'nil' when no metadata exists on disk" do
|
|
37
|
-
@foo.metadata.should be_nil
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
it "should return Metadata when metadata exists on disk" do
|
|
41
|
-
IMW.open!(@foo.default_metadata_uri) do |f|
|
|
42
|
-
f.write <<YAML
|
|
43
|
-
---
|
|
44
|
-
foo:
|
|
45
|
-
description: bar
|
|
46
|
-
fields: baz
|
|
47
|
-
YAML
|
|
48
|
-
end
|
|
49
|
-
@foo.metadata.class.should == IMW::Metadata
|
|
50
|
-
@foo.metadata['foo']['description'].should == 'bar'
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
end
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Metadata::Field do
|
|
4
|
-
|
|
5
|
-
describe "initializing" do
|
|
6
|
-
it "should parse a string into a hash" do
|
|
7
|
-
IMW::Metadata::Field.new('foobar').should == { "name" => 'foobar' }
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should raise an error on a Hash without a :name key" do
|
|
11
|
-
lambda { IMW::Metadata::Field.new('foo' => 'bar') }.should raise_error(IMW::ArgumentError)
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it "should accept a Hash with a :name key" do
|
|
15
|
-
data = { 'name' => :foobar, 'title' => "Bazbooz", 'unit' => "m" }
|
|
16
|
-
IMW::Metadata::Field.new(data).should == data
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
it "should dup a field if given one" do
|
|
20
|
-
orig_field = IMW::Metadata::Field.new('foobar')
|
|
21
|
-
IMW::Metadata::Field.new(orig_field).should == orig_field
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
end
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Metadata::HasMetadata do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
class Foo
|
|
7
|
-
def uri ; File.join(IMWTest::TMP_DIR, 'test', 'subdir', 'foobar.csv') ; end
|
|
8
|
-
def basename ; File.basename(uri) ; end
|
|
9
|
-
def extension ; 'csv' ; end
|
|
10
|
-
def dir ; IMW.open(File.join(IMWTest::TMP_DIR, 'test', 'subdir')) ; end
|
|
11
|
-
include IMW::Metadata::HasMetadata
|
|
12
|
-
end
|
|
13
|
-
@foo = Foo.new
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
it "should be able to build a schema" do
|
|
17
|
-
@foo.schema.should include(:type, :namespace, :name, :doc, :fields, :non_avro)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
describe "finding its metadata" do
|
|
21
|
-
|
|
22
|
-
before do
|
|
23
|
-
FileUtils.mkdir_p(@foo.dir.path)
|
|
24
|
-
IMWTest::Random.file(File.join(@foo.dir.path, 'foobar.csv'))
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
it "should return 'nil' when it can't find any metadata" do
|
|
28
|
-
@foo.metadata.should be_nil
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
it "should return 'nil' when a metadata file is found that doesn't describe it" do
|
|
32
|
-
IMW.open!("has_metadata_test.icss.yaml") do |f|
|
|
33
|
-
f.write <<YAML
|
|
34
|
-
---
|
|
35
|
-
foobar.csv:
|
|
36
|
-
description: bar
|
|
37
|
-
fields: ["baz", "booz"]
|
|
38
|
-
YAML
|
|
39
|
-
end
|
|
40
|
-
@foo.metadata.should be_nil
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# it "should return the metadata when a metadata file is found that does describe it" do
|
|
44
|
-
# IMW.open!("has_metadata_test.icss.yaml") do |f|
|
|
45
|
-
# f.write <<YAML
|
|
46
|
-
# ---
|
|
47
|
-
# #{IMWTest::TMP_DIR}/test/subdir/foobar.csv:
|
|
48
|
-
# description: bar
|
|
49
|
-
# fields: ["baz", "booz"]
|
|
50
|
-
# YAML
|
|
51
|
-
# end
|
|
52
|
-
# @foo.metadata.class.should == IMW::Metadata
|
|
53
|
-
# @foo.metadata[@foo]['description'].should == 'bar'
|
|
54
|
-
# end
|
|
55
|
-
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
end
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Metadata::HasSummary do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
class Foo
|
|
7
|
-
def initialize(*args) ; @args = args ; end
|
|
8
|
-
def uri ; File.join(IMWTest::TMP_DIR, *@args) ; end
|
|
9
|
-
def basename ; File.basename(uri) ; end
|
|
10
|
-
def extension ; File.extname(@args.last || '').gsub(/^\./,'') ; end
|
|
11
|
-
include IMW::Metadata::HasSummary
|
|
12
|
-
end
|
|
13
|
-
@foo = Foo.new('foo', 'bar.csv')
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
it "should build a summary from an external summary" do
|
|
17
|
-
@foo.summary.should include(:uri, :basename, :extension)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
it "should build a summary from an external summary and a schema when possible" do
|
|
21
|
-
@foo.stub!(:schema).and_return({:foo => 'bar'})
|
|
22
|
-
@foo.summary[:schema].should == {:foo => 'bar'}
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
it "should be able to build an external summary describing how it's situated in the world" do
|
|
26
|
-
@foo.summary[:uri].should == File.join(IMWTest::TMP_DIR, 'foo', 'bar.csv')
|
|
27
|
-
@foo.summary[:basename].should == 'bar.csv'
|
|
28
|
-
@foo.summary[:extension].should == 'csv'
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
end
|
|
32
|
-
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Metadata::Schema do
|
|
4
|
-
|
|
5
|
-
describe "initializing" do
|
|
6
|
-
it "should merge with a Hash" do
|
|
7
|
-
IMW::Metadata::Schema.new({:foo => 'foobar'}).should == { :foo => 'foobar' }
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should merge with a Schema" do
|
|
11
|
-
IMW::Metadata::Schema.new(IMW::Metadata::Schema.new({:foo => 'foobar'})).should == { :foo => 'foobar' }
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it "should ignore anything else" do
|
|
15
|
-
IMW::Metadata::Schema.new('foobar').should == {}
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
it "should accept empty args" do
|
|
19
|
-
IMW::Metadata::Schema.new.should == {}
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
end
|
data/spec/imw/metadata_spec.rb
DELETED
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Metadata do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
@metadata = IMW::Metadata.new({'foobar' => {'description' => 'buzz', 'fields' => ['a','b', 'c']}, 'http://www.google.com' => { 'description' => 'google', 'fields' => ['d', 'e', 'f'] }})
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
describe "matching URLs without a base" do
|
|
10
|
-
|
|
11
|
-
it "should be able to look up a relative URI literally" do
|
|
12
|
-
@metadata.describes?('foobar').should be_true
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
it "should be able to look up a relative URI when passed an IMW::Resource" do
|
|
16
|
-
@metadata.describes?(IMW.open('foobar')).should be_true
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
it "should be able to look up an absolute URI literally" do
|
|
20
|
-
@metadata.describes?('http://www.google.com').should be_true
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
it "should rightly fail to literally look up a URI it doesn't know about" do
|
|
24
|
-
@metadata.describes?('bungler').should be_false
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
describe "setting URLs" do
|
|
30
|
-
describe "without a base URL" do
|
|
31
|
-
it "should set 'foobar' to 'foobar'" do
|
|
32
|
-
@metadata['foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
|
|
33
|
-
@metadata.keys.should include('foobar')
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
it "should set '/a/b/c/foobar' to '/a/b/c/foobar'" do
|
|
37
|
-
@metadata['/a/b/c/foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
|
|
38
|
-
@metadata.keys.should include('/a/b/c/foobar')
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
describe "with a base URL" do
|
|
44
|
-
before do
|
|
45
|
-
FileUtils.mkdir_p('chimpo')
|
|
46
|
-
@metadata.base = File.join(IMWTest::TMP_DIR, 'chimpo')
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
it "should set 'foobar' to '$base/foobar'" do
|
|
50
|
-
@metadata['foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
|
|
51
|
-
@metadata.keys.should include(File.join(IMWTest::TMP_DIR, 'chimpo', 'foobar'))
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
it "should set '/a/b/c/foobar' to '/a/b/c/foobar'" do
|
|
55
|
-
@metadata['/a/b/c/foobar'] = {'description' => 'bhaarg', 'fields' => ['a','b','c']}
|
|
56
|
-
@metadata.keys.should include('/a/b/c/foobar')
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
describe "matching URLs with a base" do
|
|
63
|
-
|
|
64
|
-
it "should raise an error when trying to use a base URI that doesn't exist" do
|
|
65
|
-
lambda { @metadata.base = 'chimpo' }.should raise_error(IMW::PathError)
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
it "should raise an error when trying to use a base URI that isn't a directory" do
|
|
69
|
-
IMW.open!('chimpo') { |f| f.write('a file') }
|
|
70
|
-
lambda { @metadata.base = 'chimpo' }.should raise_error(IMW::PathError)
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
it "should be able to look up a URI relative to its base" do
|
|
74
|
-
FileUtils.mkdir_p('chimpo')
|
|
75
|
-
@metadata.base = File.join(IMWTest::TMP_DIR, 'chimpo')
|
|
76
|
-
@metadata['foobar'] = {'description' => 'buzz', 'fields' => ['a','b', 'c']}
|
|
77
|
-
@metadata.describe?('foobar').should be_true
|
|
78
|
-
@metadata.describe?(IMW.open('foobar')).should be_true
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
it "should continue to be able to look up an absolute URI literally" do
|
|
82
|
-
@metadata.describes?('http://www.google.com').should be_true
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
end
|
|
86
|
-
end
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require 'ostruct'
|
|
3
|
-
|
|
4
|
-
describe IMW::Parsers::LineParser do
|
|
5
|
-
|
|
6
|
-
before do
|
|
7
|
-
@path = File.join(IMWTest::DATA_DIR, 'formats/delimited/sample.csv')
|
|
8
|
-
@file = File.new(@path)
|
|
9
|
-
@fields = [:id, :name, :genus, :species]
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
describe "without an implemented parsing method" do
|
|
13
|
-
|
|
14
|
-
before do
|
|
15
|
-
@parser = IMW::Parsers::LineParser.new
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
it "should raise an error when attempting to parse a line" do
|
|
19
|
-
lambda { @parser.parse_line "wahtever" }.should raise_error(IMW::NotImplementedError)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
describe "with an implemented parsing method" do
|
|
25
|
-
|
|
26
|
-
before do
|
|
27
|
-
|
|
28
|
-
@parser_class = Class.new(IMW::Parsers::LineParser)
|
|
29
|
-
@parser_class.class_eval do
|
|
30
|
-
def parse_line line
|
|
31
|
-
id, name, genus, species = line.chomp.split(',')
|
|
32
|
-
{ :id => id, :name => name, :genus => genus, :species => species }
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
@parser = @parser_class.new
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
it "should skip lines as needed" do
|
|
40
|
-
@parser.skip_first = 1
|
|
41
|
-
results = @parser.parse!(@file)
|
|
42
|
-
results.length.should == 130
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
it "should read as many lines as it's asked" do
|
|
46
|
-
results = @parser.parse!(@file, :lines => 10)
|
|
47
|
-
results.length.should == 10
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
describe "when parsing into hashes" do
|
|
51
|
-
|
|
52
|
-
it "should return an array of hashes when called without a block" do
|
|
53
|
-
results = @parser.parse!(@file)
|
|
54
|
-
results.length.should == 131
|
|
55
|
-
results.first.should == { :id => "ID", :name => "Name", :genus => "Genus", :species => "Species" }
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
it "should pass each hash to a block when given one" do
|
|
59
|
-
results = [].tap do |array|
|
|
60
|
-
@parser.parse!(@file) do |hsh|
|
|
61
|
-
hsh.delete(:id)
|
|
62
|
-
array << hsh
|
|
63
|
-
end
|
|
64
|
-
end
|
|
65
|
-
results.length.should == 131
|
|
66
|
-
results.first.should == { :name => "Name", :genus => "Genus", :species => "Species" }
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
describe "when parsing into objects" do
|
|
71
|
-
before { @parser.klass = OpenStruct }
|
|
72
|
-
|
|
73
|
-
it "should return an array of objects when defined with a class" do
|
|
74
|
-
results = @parser.parse!(@file)
|
|
75
|
-
results.length.should == 131
|
|
76
|
-
results.first.class.should == OpenStruct
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
it "should pass each object to a block when given one and defined with a class" do
|
|
80
|
-
@parser.klass = OpenStruct
|
|
81
|
-
results = [].tap do |array|
|
|
82
|
-
@parser.parse!(@file) do |obj|
|
|
83
|
-
obj.genus = nil
|
|
84
|
-
array << obj
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
results.length.should == 131
|
|
88
|
-
results.first.class.should == OpenStruct
|
|
89
|
-
results.first.genus.should be_blank
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
require 'ostruct'
|
|
3
|
-
|
|
4
|
-
describe IMW::Parsers::RegexpParser do
|
|
5
|
-
|
|
6
|
-
before do
|
|
7
|
-
@path = "foobar.dat"
|
|
8
|
-
@text = <<EOF
|
|
9
|
-
151.199.53.145 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/main.php HTTP/1.0
|
|
10
|
-
81.227.179.120 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/libraries/select_lang.lib.php HTTP/1.0
|
|
11
|
-
81.3.107.173 14-Oct-2007:13:54:26-0500 GET / HTTP/1.1
|
|
12
|
-
EOF
|
|
13
|
-
File.open(@path, 'w') { |f| f.write(@text) }
|
|
14
|
-
@file = File.new(@path)
|
|
15
|
-
|
|
16
|
-
@regexp = %r{^([\d\.]+) (\d{2}-\w{3}-\d{4}:\d{2}:\d{2}:\d{2}-\d{4}) (\w+) ([^\s]+) HTTP/([\d.]{3})$}
|
|
17
|
-
@fields = [:ip, :timestamp, :verb, :url, :version]
|
|
18
|
-
|
|
19
|
-
@parser = IMW::Parsers::RegexpParser.new :by_regexp => @regexp, :into_fields => @fields
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
describe "parsing a line which matches its regexp" do
|
|
23
|
-
it "should return an appropriate hash" do
|
|
24
|
-
@parser.parse_line(@file.readline).should == {:ip => '151.199.53.145', :timestamp => '14-Oct-2007:13:34:34-0500', :verb => 'GET', :url => '/phpmyadmin/main.php', :version => "1.0"}
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
describe "parsing a line which doesn't match its regexp" do
|
|
29
|
-
before { @parser.regexp = /foobar/ }
|
|
30
|
-
|
|
31
|
-
it "return an empty hash if not parsing strictly" do
|
|
32
|
-
@parser.parse_line(@file.readline).should == {}
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
it "should raise an error if parsing strictly" do
|
|
36
|
-
@parser.strict = true
|
|
37
|
-
lambda { @parser.parse_line(@file.readline) }.should raise_error IMW::ParseError
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
|