earl 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +4 -15
- data/.rspec +1 -0
- data/.travis.yml +11 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +60 -0
- data/Guardfile +10 -0
- data/LICENSE +2 -4
- data/README.rdoc +145 -0
- data/Rakefile +35 -2
- data/earl.gemspec +13 -7
- data/lib/earl.rb +7 -22
- data/lib/earl/earl.rb +158 -0
- data/lib/earl/scraper.rb +93 -0
- data/lib/earl/version.rb +2 -2
- data/script/console +10 -0
- data/spec/fixtures/bicycles.html +490 -0
- data/spec/fixtures/bicycles_without_description.html +489 -0
- data/spec/fixtures/bicycles_without_images.html +457 -0
- data/spec/fixtures/page_as_atom.html +161 -0
- data/spec/fixtures/page_as_rss.html +151 -0
- data/spec/fixtures/page_with_atom_feed.html +39 -0
- data/spec/fixtures/page_with_rss_and_atom_feeds.html +40 -0
- data/spec/fixtures/page_with_rss_feed.html +39 -0
- data/spec/fixtures/page_without_feeds.html +36 -0
- data/spec/fixtures/youtube.html +1839 -0
- data/spec/integration/feed_spec.rb +78 -0
- data/spec/integration/oembed_spec.rb +40 -0
- data/spec/spec_helper.rb +18 -28
- data/spec/support/fixtures.rb +10 -0
- data/spec/unit/earl/earl_spec.rb +16 -0
- data/spec/unit/earl/feed_spec.rb +59 -0
- data/spec/unit/earl/oembed_spec.rb +49 -0
- data/spec/unit/earl/scraper_spec.rb +48 -0
- data/spec/unit/earl_spec.rb +65 -0
- metadata +123 -46
- data/.rvmrc +0 -48
- data/README.md +0 -41
- data/lib/earl/email_assembler.rb +0 -11
- data/lib/earl/email_entity.rb +0 -27
- data/lib/earl/email_parser.tt +0 -58
- data/lib/earl/entity_base.rb +0 -37
- data/lib/earl/hash_inquirer.rb +0 -16
- data/lib/earl/string_inquirer.rb +0 -11
- data/lib/earl/url_assembler.rb +0 -15
- data/lib/earl/url_entity.rb +0 -23
- data/lib/earl/url_parser.tt +0 -163
- data/spec/earl/earl_spec.rb +0 -17
- data/spec/earl/email_entity_spec.rb +0 -31
- data/spec/earl/email_parser_spec.rb +0 -29
- data/spec/earl/entity_base_spec.rb +0 -39
- data/spec/earl/hash_inquirer_spec.rb +0 -24
- data/spec/earl/string_inquirer_spec.rb +0 -9
- data/spec/earl/url_entity_spec.rb +0 -45
- data/spec/earl/url_parser_spec.rb +0 -189
data/spec/earl/earl_spec.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl do
|
4
|
-
subject { Earl }
|
5
|
-
|
6
|
-
it { should respond_to( :URL ) }
|
7
|
-
describe '#URL' do
|
8
|
-
subject { Earl::URL( 'http://foo.com' ) }
|
9
|
-
it { should be_kind_of( Earl::URLEntity ) }
|
10
|
-
end
|
11
|
-
|
12
|
-
it { should respond_to( :Email ) }
|
13
|
-
describe '#Email' do
|
14
|
-
subject { Earl::Email( 'foo@bar.com' ) }
|
15
|
-
it { should be_kind_of( Earl::EmailEntity ) }
|
16
|
-
end
|
17
|
-
end
|
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl::EmailEntity do
|
4
|
-
subject { Earl::EmailEntity }
|
5
|
-
|
6
|
-
# username
|
7
|
-
|
8
|
-
it { should produce( 'foo@bar.com' ).from( 'baz@bar.com' ).when_given( :username => 'foo' ) }
|
9
|
-
it 'must have a username when parsing an email' do
|
10
|
-
expect { Earl::Email( '@bar.com' ) }.to raise_error( Earl::InvalidURLError )
|
11
|
-
end
|
12
|
-
it 'wont let you set the username to nil' do
|
13
|
-
expect { Earl::Email( 'foo@bar.com' ).username = nil }.to raise_error( Earl::InvalidURLError )
|
14
|
-
end
|
15
|
-
|
16
|
-
# domain
|
17
|
-
|
18
|
-
it { should produce( 'foo@bar.com' ).from( 'foo@baz.com' ).when_given( :domain => 'bar.com' ) }
|
19
|
-
it 'must have a domain when parsing an email' do
|
20
|
-
expect { Earl::Email( 'foo@' ) }.to raise_error( Earl::InvalidURLError )
|
21
|
-
end
|
22
|
-
it 'wont let you set the domain to nil' do
|
23
|
-
expect { Earl::Email( 'foo@bar.com' ).domain = nil }.to raise_error( Earl::InvalidURLError )
|
24
|
-
end
|
25
|
-
|
26
|
-
# contact
|
27
|
-
|
28
|
-
it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com <Foo Bar>' ).when_given( :contact => 'Woot!' ) }
|
29
|
-
it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com' ).when_given( :contact => 'Woot!' ) }
|
30
|
-
it { should produce( 'foo@bar.com' ).from( 'foo@bar.com <Woot!>' ).when_given( :contact => nil ) }
|
31
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl do
|
4
|
-
let( :parser ){ Earl::EmailParser.new }
|
5
|
-
let( :assembler ){ Earl::EmailAssembler.new }
|
6
|
-
|
7
|
-
[
|
8
|
-
[ 'foo@bar.com', {
|
9
|
-
:username => 'foo',
|
10
|
-
:domain => 'bar.com'
|
11
|
-
} ],
|
12
|
-
[ 'foo.bar@baz.com', {
|
13
|
-
:username => 'foo.bar',
|
14
|
-
:domain => 'baz.com'
|
15
|
-
} ],
|
16
|
-
[ 'foo.bar@baz.com <Foo Bar>', {
|
17
|
-
:username => 'foo.bar',
|
18
|
-
:domain => 'baz.com',
|
19
|
-
:contact => 'Foo Bar'
|
20
|
-
} ]
|
21
|
-
].each do |string, parts|
|
22
|
-
it "should correctly parse the email parts for #{string}" do
|
23
|
-
parser.parse( string ).resolve.should eql( parts )
|
24
|
-
end
|
25
|
-
it "should correctly assemble the email parts to #{string}" do
|
26
|
-
assembler.assemble( parts ).should eql( string )
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl::EntityBase do
|
4
|
-
|
5
|
-
describe 'the entity dsl' do
|
6
|
-
subject { Class.new( Earl::EntityBase ) do
|
7
|
-
def initialize; end # so we don't invoke our parser
|
8
|
-
|
9
|
-
part_accessor :foo, :bar
|
10
|
-
|
11
|
-
part_accessor :baz do |value|
|
12
|
-
raise EarlError if value == 'woot!'
|
13
|
-
end
|
14
|
-
end.new }
|
15
|
-
|
16
|
-
it { should be_kind_of( Earl::HashInquirer ) }
|
17
|
-
|
18
|
-
[ :foo, :foo=, :bar, :bar=, :baz, :baz= ].each do |method|
|
19
|
-
it { should respond_to( method ) }
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'should define setters/getters' do
|
23
|
-
subject.foo = 'foo!'
|
24
|
-
subject.foo.should eq( 'foo!' )
|
25
|
-
subject[ :foo ].should eq( 'foo!' )
|
26
|
-
subject.foo?.should eq( true )
|
27
|
-
end
|
28
|
-
|
29
|
-
it 'should call the block if given' do
|
30
|
-
expect { subject.baz = 'baz!' }.not_to raise_error
|
31
|
-
expect { subject.baz = 'woot!' }.to raise_error
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'should return a string inquirer for string attributes' do
|
35
|
-
subject.foo = 'sup'
|
36
|
-
subject.foo.kind_of?( Earl::StringInquirer ).should == true
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl::HashInquirer do
|
4
|
-
subject { Earl::HashInquirer.new :foo => 'bar', :baz => 123, :woo => false }
|
5
|
-
|
6
|
-
it { should be_a( Hash ) }
|
7
|
-
it { should eql( :foo => 'bar', :baz => 123, :woo => false ) }
|
8
|
-
|
9
|
-
describe 'string keys' do
|
10
|
-
its( :foo? ){ should be_true }
|
11
|
-
end
|
12
|
-
|
13
|
-
describe 'numeric keys' do
|
14
|
-
its( :baz? ){ should be_true }
|
15
|
-
end
|
16
|
-
|
17
|
-
describe 'boolean keys' do
|
18
|
-
its( :woo? ){ should be_true }
|
19
|
-
end
|
20
|
-
|
21
|
-
describe 'nonexistant keys' do
|
22
|
-
its( :sup? ){ should be_false }
|
23
|
-
end
|
24
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl::URLEntity do
|
4
|
-
subject { Earl::URLEntity }
|
5
|
-
|
6
|
-
# scheme
|
7
|
-
|
8
|
-
it { should produce( 'https://foo.com' ).from( 'http://foo.com' ).when_given( :scheme => 'https' ) }
|
9
|
-
it { should produce( 'http://foo.com' ).from( 'foo.com' ).when_given( :scheme => 'http' ) }
|
10
|
-
it { should produce( 'foo.com' ).from( 'http://foo.com' ).when_given( :scheme => nil ) }
|
11
|
-
|
12
|
-
# subdomain
|
13
|
-
|
14
|
-
it { should produce( 'baz.bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => 'baz' ) }
|
15
|
-
it { should produce( 'bar.foo.com' ).from( 'foo.com' ).when_given( :subdomain => 'bar' ) }
|
16
|
-
it { should produce( 'bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => nil ) }
|
17
|
-
|
18
|
-
# port
|
19
|
-
|
20
|
-
it { should produce( 'foo.com:4567' ).from( 'foo.com:80' ).when_given( :port => 4567 ) }
|
21
|
-
it { should produce( 'foo.com:4567' ).from( 'foo.com' ).when_given( :port => 4567 ) }
|
22
|
-
it { should produce( 'foo.com' ).from( 'foo.com:4567' ).when_given( :port => nil ) }
|
23
|
-
|
24
|
-
# path
|
25
|
-
|
26
|
-
it { should produce( 'foo.com/bar' ).from( 'foo.com/baz' ).when_given( :path => 'bar' ) }
|
27
|
-
it { should produce( 'foo.com/bar' ).from( 'foo.com' ).when_given( :path => 'bar' ) }
|
28
|
-
it { should produce( 'foo.com' ).from( 'foo.com/bar' ).when_given( :path => nil ) }
|
29
|
-
|
30
|
-
# search
|
31
|
-
|
32
|
-
it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com?bar=baz' ).when_given( :search => 'bar=asdf' ) }
|
33
|
-
it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com' ).when_given( :search => 'bar=asdf' ) }
|
34
|
-
it { should produce( 'foo.com' ).from( 'foo.com?bar=asdf' ).when_given( :search => nil ) }
|
35
|
-
|
36
|
-
# host
|
37
|
-
|
38
|
-
it { should produce( 'www.foo.edu' ).from( 'www.foo.com' ).when_given( :host => 'foo.edu' ) }
|
39
|
-
it 'must have a host when parsing a url' do
|
40
|
-
expect { Earl::URL( 'http://' ) }.to raise_error( Earl::InvalidURLError )
|
41
|
-
end
|
42
|
-
it 'wont let you set the host to nil' do
|
43
|
-
expect { Earl::URL( 'www.foo.com' ).host = nil }.to raise_error( Earl::InvalidURLError )
|
44
|
-
end
|
45
|
-
end
|
@@ -1,189 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl do
|
4
|
-
let( :parser ){ Earl::URLParser.new }
|
5
|
-
let( :assembler ){ Earl::URLAssembler.new }
|
6
|
-
|
7
|
-
[
|
8
|
-
[ 'localhost', {
|
9
|
-
:host => 'localhost'
|
10
|
-
} ],
|
11
|
-
[ 'foo.com', {
|
12
|
-
:host => 'foo.com'
|
13
|
-
} ],
|
14
|
-
|
15
|
-
[ 'foo.edu', {
|
16
|
-
:host => 'foo.edu'
|
17
|
-
} ],
|
18
|
-
|
19
|
-
[ 'foo2bar.biz', {
|
20
|
-
:host => 'foo2bar.biz'
|
21
|
-
} ],
|
22
|
-
|
23
|
-
[ 'www.foo.com', {
|
24
|
-
:host => 'foo.com',
|
25
|
-
:subdomain => 'www'
|
26
|
-
} ],
|
27
|
-
|
28
|
-
[ 'http://localhost', {
|
29
|
-
:scheme => 'http',
|
30
|
-
:host => 'localhost'
|
31
|
-
} ],
|
32
|
-
|
33
|
-
[ 'http://foo.com', {
|
34
|
-
:scheme => 'http',
|
35
|
-
:host => 'foo.com'
|
36
|
-
} ],
|
37
|
-
|
38
|
-
[ 'http://www.foo.com', {
|
39
|
-
:scheme => 'http',
|
40
|
-
:host => 'foo.com',
|
41
|
-
:subdomain => 'www'
|
42
|
-
} ],
|
43
|
-
|
44
|
-
[ 'localhost:3000', {
|
45
|
-
:host => 'localhost',
|
46
|
-
:port => '3000'
|
47
|
-
} ],
|
48
|
-
|
49
|
-
[ 'http://localhost:3000', {
|
50
|
-
:scheme => 'http',
|
51
|
-
:host => 'localhost',
|
52
|
-
:port => '3000'
|
53
|
-
} ],
|
54
|
-
|
55
|
-
[ 'www.foo.com:8080', {
|
56
|
-
:subdomain => 'www',
|
57
|
-
:host => 'foo.com',
|
58
|
-
:port => '8080'
|
59
|
-
} ],
|
60
|
-
|
61
|
-
[ 'http://www.foo.com:8080', {
|
62
|
-
:scheme => 'http',
|
63
|
-
:subdomain => 'www',
|
64
|
-
:host => 'foo.com',
|
65
|
-
:port => '8080'
|
66
|
-
} ],
|
67
|
-
|
68
|
-
[ 'localhost/bar', {
|
69
|
-
:host => 'localhost',
|
70
|
-
:path => 'bar'
|
71
|
-
} ],
|
72
|
-
|
73
|
-
[ 'foo.com/bar', {
|
74
|
-
:host => 'foo.com',
|
75
|
-
:path => 'bar'
|
76
|
-
} ],
|
77
|
-
|
78
|
-
[ 'www.foo.com/bar', {
|
79
|
-
:subdomain => 'www',
|
80
|
-
:host => 'foo.com',
|
81
|
-
:path => 'bar'
|
82
|
-
} ],
|
83
|
-
|
84
|
-
[ 'http://localhost/bar', {
|
85
|
-
:scheme => 'http',
|
86
|
-
:host => 'localhost',
|
87
|
-
:path => 'bar'
|
88
|
-
} ],
|
89
|
-
|
90
|
-
[ 'http://foo.com/bar', {
|
91
|
-
:scheme => 'http',
|
92
|
-
:host => 'foo.com',
|
93
|
-
:path => 'bar'
|
94
|
-
} ],
|
95
|
-
|
96
|
-
[ 'http://www.foo.com/bar', {
|
97
|
-
:scheme => 'http',
|
98
|
-
:subdomain => 'www',
|
99
|
-
:host => 'foo.com',
|
100
|
-
:path => 'bar'
|
101
|
-
} ],
|
102
|
-
|
103
|
-
[ 'localhost?baz=woo', {
|
104
|
-
:host => 'localhost',
|
105
|
-
:search => 'baz=woo'
|
106
|
-
} ],
|
107
|
-
|
108
|
-
[ 'localhost:3000?baz=woo', {
|
109
|
-
:host => 'localhost',
|
110
|
-
:port => '3000',
|
111
|
-
:search => 'baz=woo'
|
112
|
-
} ],
|
113
|
-
|
114
|
-
[ 'localhost:3000/bar?baz=woo', {
|
115
|
-
:host => 'localhost',
|
116
|
-
:port => '3000',
|
117
|
-
:path => 'bar',
|
118
|
-
:search => 'baz=woo'
|
119
|
-
} ],
|
120
|
-
|
121
|
-
[ 'foo.com?baz=woo', {
|
122
|
-
:host => 'foo.com',
|
123
|
-
:search => 'baz=woo'
|
124
|
-
} ],
|
125
|
-
|
126
|
-
[ 'www.foo.com?baz=woo', {
|
127
|
-
:subdomain => 'www',
|
128
|
-
:host => 'foo.com',
|
129
|
-
:search => 'baz=woo'
|
130
|
-
} ],
|
131
|
-
|
132
|
-
[ 'http://foo.com?baz=woo', {
|
133
|
-
:scheme => 'http',
|
134
|
-
:host => 'foo.com',
|
135
|
-
:search => 'baz=woo'
|
136
|
-
} ],
|
137
|
-
|
138
|
-
[ 'http://www.foo.com?baz=woo', {
|
139
|
-
:scheme => 'http',
|
140
|
-
:subdomain => 'www',
|
141
|
-
:host => 'foo.com',
|
142
|
-
:search => 'baz=woo'
|
143
|
-
} ],
|
144
|
-
|
145
|
-
[ 'http://foo.com/bar?baz=woo', {
|
146
|
-
:scheme => 'http',
|
147
|
-
:host => 'foo.com',
|
148
|
-
:path => 'bar',
|
149
|
-
:search => 'baz=woo'
|
150
|
-
} ],
|
151
|
-
|
152
|
-
[ 'http://www.foo.com/bar?baz=woot', {
|
153
|
-
:scheme => 'http',
|
154
|
-
:subdomain => 'www',
|
155
|
-
:host => 'foo.com',
|
156
|
-
:path => 'bar',
|
157
|
-
:search => 'baz=woot'
|
158
|
-
} ],
|
159
|
-
|
160
|
-
[ 'http://localhost:3000?baz=woo', {
|
161
|
-
:scheme => 'http',
|
162
|
-
:host => 'localhost',
|
163
|
-
:port => '3000',
|
164
|
-
:search => 'baz=woo'
|
165
|
-
} ],
|
166
|
-
|
167
|
-
[ 'http://foo.com:8080?baz=woooo', {
|
168
|
-
:scheme => 'http',
|
169
|
-
:host => 'foo.com',
|
170
|
-
:port => '8080',
|
171
|
-
:search => 'baz=woooo'
|
172
|
-
} ],
|
173
|
-
|
174
|
-
[ 'http://foo.com:8080/bar?baz=woo', {
|
175
|
-
:scheme => 'http',
|
176
|
-
:host => 'foo.com',
|
177
|
-
:port => '8080',
|
178
|
-
:path => 'bar',
|
179
|
-
:search => 'baz=woo'
|
180
|
-
} ]
|
181
|
-
].each do |string, parts|
|
182
|
-
it "should correctly parse the url parts for #{string}" do
|
183
|
-
parser.parse( string ).resolve.should eql( parts )
|
184
|
-
end
|
185
|
-
it "should correctly assemble the url parts to #{string}" do
|
186
|
-
assembler.assemble( parts ).should eql( string )
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|