datacatalog-importer 0.1.17 → 0.1.18
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/datacatalog-importer.gemspec +2 -2
- data/lib/utility.rb +8 -7
- data/spec/utility_spec.rb +80 -3
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.18
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{datacatalog-importer}
|
8
|
-
s.version = "0.1.
|
8
|
+
s.version = "0.1.18"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["David James"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-06}
|
13
13
|
s.description = %q{This framework makes it easier to write importers for the National Data Catalog.}
|
14
14
|
s.email = %q{djames@sunlightfoundation.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/utility.rb
CHANGED
@@ -8,8 +8,8 @@ module DataCatalog
|
|
8
8
|
|
9
9
|
# == URLs ==
|
10
10
|
|
11
|
-
def self.absolute_url(
|
12
|
-
plain_string(URI.parse(
|
11
|
+
def self.absolute_url(base_url, url)
|
12
|
+
plain_string(URI.parse(base_url).merge(url).to_s)
|
13
13
|
end
|
14
14
|
|
15
15
|
def self.normalize_url(url)
|
@@ -50,7 +50,7 @@ module DataCatalog
|
|
50
50
|
|
51
51
|
def self.headers
|
52
52
|
{
|
53
|
-
"UserAgent" => "National Data Catalog Importer/0.1.
|
53
|
+
"UserAgent" => "National Data Catalog Importer/0.1.18",
|
54
54
|
}
|
55
55
|
end
|
56
56
|
|
@@ -62,17 +62,18 @@ module DataCatalog
|
|
62
62
|
def self.fetch(uri, options={})
|
63
63
|
max_attempts = options[:max_attempts] || 3
|
64
64
|
retry_delay = options[:retry_delay] || 5
|
65
|
+
quiet = options[:quiet] || false
|
65
66
|
attempts = 0
|
66
67
|
loop do
|
67
68
|
begin
|
68
|
-
puts "Fetching #{uri}..."
|
69
|
+
puts "Fetching #{uri}..." unless quiet
|
69
70
|
io = open(uri, headers)
|
70
71
|
return io.read
|
71
72
|
rescue Timeout::Error, StandardError => e
|
72
73
|
attempts += 1
|
73
|
-
puts " Attempt ##{attempts} failed."
|
74
|
-
puts " Error: #{e.inspect}"
|
75
|
-
|
74
|
+
puts " Attempt ##{attempts} failed." unless quiet
|
75
|
+
puts " Error: #{e.inspect}" unless quiet
|
76
|
+
return nil if attempts >= max_attempts
|
76
77
|
sleep(retry_delay)
|
77
78
|
end
|
78
79
|
end
|
data/spec/utility_spec.rb
CHANGED
@@ -6,16 +6,93 @@ describe "Utility" do
|
|
6
6
|
|
7
7
|
describe "normalize_url" do
|
8
8
|
it "add trailing slash if missing" do
|
9
|
-
U.normalize_url("sunlightlabs.com").should ==
|
9
|
+
U.normalize_url("sunlightlabs.com").should ==
|
10
|
+
"http://sunlightlabs.com/"
|
10
11
|
end
|
11
12
|
|
12
13
|
it "lowercases" do
|
13
|
-
U.normalize_url("http://SunlightLabs.com/").should ==
|
14
|
+
U.normalize_url("http://SunlightLabs.com/").should ==
|
15
|
+
"http://sunlightlabs.com/"
|
14
16
|
end
|
15
17
|
|
16
18
|
it "adds http if missing" do
|
17
|
-
U.normalize_url("sunlightlabs.com/").should ==
|
19
|
+
U.normalize_url("sunlightlabs.com/").should ==
|
20
|
+
"http://sunlightlabs.com/"
|
18
21
|
end
|
19
22
|
end
|
20
23
|
|
24
|
+
describe "absolute_url" do
|
25
|
+
it "should work" do
|
26
|
+
U.absolute_url("http://sunlightlabs.com", "/contact").should ==
|
27
|
+
"http://sunlightlabs.com/contact"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "single_line_clean" do
|
32
|
+
it "should clean up leading and trailing whitespace" do
|
33
|
+
U.single_line_clean("\t \ttext\t\t ").should == "text"
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should clean up leading and trailing newlines" do
|
37
|
+
U.single_line_clean("\n\ntext\n\n").should == "text"
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should clean up all newlines" do
|
41
|
+
U.single_line_clean("sunlight\nlabs").should == "sunlight labs"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "multi_line_clean" do
|
46
|
+
it "should remove leading and trailing newlines" do
|
47
|
+
input = "\nline 1\nline 2\nline 3\n"
|
48
|
+
U.multi_line_clean(input).should == "line 1\nline 2\nline 3"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "fetch" do
|
53
|
+
before do
|
54
|
+
@readable = Object.new
|
55
|
+
@readable.stub(:read).and_return("result")
|
56
|
+
|
57
|
+
@sleep_count = 0
|
58
|
+
U.stub(:sleep).and_return {
|
59
|
+
@sleep_count += 1
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should work" do
|
64
|
+
U.stub(:open).and_return(@readable)
|
65
|
+
U.fetch("fake", :quiet => true).should == "result"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "bad fetches below retry limit are ok" do
|
69
|
+
@count = 0
|
70
|
+
U.stub(:open).and_return {
|
71
|
+
@count += 1
|
72
|
+
if @count <= 2
|
73
|
+
raise StandardError
|
74
|
+
else
|
75
|
+
@readable
|
76
|
+
end
|
77
|
+
}
|
78
|
+
U.fetch("fake", :max_attempts => 3, :quiet => true).should == "result"
|
79
|
+
@sleep_count.should == 2
|
80
|
+
end
|
81
|
+
|
82
|
+
it "bad fetches above retry limit give nil" do
|
83
|
+
@count = 0
|
84
|
+
U.stub(:open).and_return {
|
85
|
+
@count += 1
|
86
|
+
if @count <= 2
|
87
|
+
raise StandardError
|
88
|
+
else
|
89
|
+
@readable
|
90
|
+
end
|
91
|
+
}
|
92
|
+
U.fetch("fake", :max_attempts => 2, :quiet => true).should == nil
|
93
|
+
@sleep_count.should == 1
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
21
98
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 18
|
9
|
+
version: 0.1.18
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- David James
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-05-
|
17
|
+
date: 2010-05-06 00:00:00 -04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|