earl 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +4 -15
- data/.rspec +1 -0
- data/.travis.yml +11 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +60 -0
- data/Guardfile +10 -0
- data/LICENSE +2 -4
- data/README.rdoc +145 -0
- data/Rakefile +35 -2
- data/earl.gemspec +13 -7
- data/lib/earl.rb +7 -22
- data/lib/earl/earl.rb +158 -0
- data/lib/earl/scraper.rb +93 -0
- data/lib/earl/version.rb +2 -2
- data/script/console +10 -0
- data/spec/fixtures/bicycles.html +490 -0
- data/spec/fixtures/bicycles_without_description.html +489 -0
- data/spec/fixtures/bicycles_without_images.html +457 -0
- data/spec/fixtures/page_as_atom.html +161 -0
- data/spec/fixtures/page_as_rss.html +151 -0
- data/spec/fixtures/page_with_atom_feed.html +39 -0
- data/spec/fixtures/page_with_rss_and_atom_feeds.html +40 -0
- data/spec/fixtures/page_with_rss_feed.html +39 -0
- data/spec/fixtures/page_without_feeds.html +36 -0
- data/spec/fixtures/youtube.html +1839 -0
- data/spec/integration/feed_spec.rb +78 -0
- data/spec/integration/oembed_spec.rb +40 -0
- data/spec/spec_helper.rb +18 -28
- data/spec/support/fixtures.rb +10 -0
- data/spec/unit/earl/earl_spec.rb +16 -0
- data/spec/unit/earl/feed_spec.rb +59 -0
- data/spec/unit/earl/oembed_spec.rb +49 -0
- data/spec/unit/earl/scraper_spec.rb +48 -0
- data/spec/unit/earl_spec.rb +65 -0
- metadata +123 -46
- data/.rvmrc +0 -48
- data/README.md +0 -41
- data/lib/earl/email_assembler.rb +0 -11
- data/lib/earl/email_entity.rb +0 -27
- data/lib/earl/email_parser.tt +0 -58
- data/lib/earl/entity_base.rb +0 -37
- data/lib/earl/hash_inquirer.rb +0 -16
- data/lib/earl/string_inquirer.rb +0 -11
- data/lib/earl/url_assembler.rb +0 -15
- data/lib/earl/url_entity.rb +0 -23
- data/lib/earl/url_parser.tt +0 -163
- data/spec/earl/earl_spec.rb +0 -17
- data/spec/earl/email_entity_spec.rb +0 -31
- data/spec/earl/email_parser_spec.rb +0 -29
- data/spec/earl/entity_base_spec.rb +0 -39
- data/spec/earl/hash_inquirer_spec.rb +0 -24
- data/spec/earl/string_inquirer_spec.rb +0 -9
- data/spec/earl/url_entity_spec.rb +0 -45
- data/spec/earl/url_parser_spec.rb +0 -189
data/spec/earl/earl_spec.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl do
|
4
|
-
subject { Earl }
|
5
|
-
|
6
|
-
it { should respond_to( :URL ) }
|
7
|
-
describe '#URL' do
|
8
|
-
subject { Earl::URL( 'http://foo.com' ) }
|
9
|
-
it { should be_kind_of( Earl::URLEntity ) }
|
10
|
-
end
|
11
|
-
|
12
|
-
it { should respond_to( :Email ) }
|
13
|
-
describe '#Email' do
|
14
|
-
subject { Earl::Email( 'foo@bar.com' ) }
|
15
|
-
it { should be_kind_of( Earl::EmailEntity ) }
|
16
|
-
end
|
17
|
-
end
|
@@ -1,31 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl::EmailEntity do
|
4
|
-
subject { Earl::EmailEntity }
|
5
|
-
|
6
|
-
# username
|
7
|
-
|
8
|
-
it { should produce( 'foo@bar.com' ).from( 'baz@bar.com' ).when_given( :username => 'foo' ) }
|
9
|
-
it 'must have a username when parsing an email' do
|
10
|
-
expect { Earl::Email( '@bar.com' ) }.to raise_error( Earl::InvalidURLError )
|
11
|
-
end
|
12
|
-
it 'wont let you set the username to nil' do
|
13
|
-
expect { Earl::Email( 'foo@bar.com' ).username = nil }.to raise_error( Earl::InvalidURLError )
|
14
|
-
end
|
15
|
-
|
16
|
-
# domain
|
17
|
-
|
18
|
-
it { should produce( 'foo@bar.com' ).from( 'foo@baz.com' ).when_given( :domain => 'bar.com' ) }
|
19
|
-
it 'must have a domain when parsing an email' do
|
20
|
-
expect { Earl::Email( 'foo@' ) }.to raise_error( Earl::InvalidURLError )
|
21
|
-
end
|
22
|
-
it 'wont let you set the domain to nil' do
|
23
|
-
expect { Earl::Email( 'foo@bar.com' ).domain = nil }.to raise_error( Earl::InvalidURLError )
|
24
|
-
end
|
25
|
-
|
26
|
-
# contact
|
27
|
-
|
28
|
-
it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com <Foo Bar>' ).when_given( :contact => 'Woot!' ) }
|
29
|
-
it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com' ).when_given( :contact => 'Woot!' ) }
|
30
|
-
it { should produce( 'foo@bar.com' ).from( 'foo@bar.com <Woot!>' ).when_given( :contact => nil ) }
|
31
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl do
|
4
|
-
let( :parser ){ Earl::EmailParser.new }
|
5
|
-
let( :assembler ){ Earl::EmailAssembler.new }
|
6
|
-
|
7
|
-
[
|
8
|
-
[ 'foo@bar.com', {
|
9
|
-
:username => 'foo',
|
10
|
-
:domain => 'bar.com'
|
11
|
-
} ],
|
12
|
-
[ 'foo.bar@baz.com', {
|
13
|
-
:username => 'foo.bar',
|
14
|
-
:domain => 'baz.com'
|
15
|
-
} ],
|
16
|
-
[ 'foo.bar@baz.com <Foo Bar>', {
|
17
|
-
:username => 'foo.bar',
|
18
|
-
:domain => 'baz.com',
|
19
|
-
:contact => 'Foo Bar'
|
20
|
-
} ]
|
21
|
-
].each do |string, parts|
|
22
|
-
it "should correctly parse the email parts for #{string}" do
|
23
|
-
parser.parse( string ).resolve.should eql( parts )
|
24
|
-
end
|
25
|
-
it "should correctly assemble the email parts to #{string}" do
|
26
|
-
assembler.assemble( parts ).should eql( string )
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl::EntityBase do
|
4
|
-
|
5
|
-
describe 'the entity dsl' do
|
6
|
-
subject { Class.new( Earl::EntityBase ) do
|
7
|
-
def initialize; end # so we don't invoke our parser
|
8
|
-
|
9
|
-
part_accessor :foo, :bar
|
10
|
-
|
11
|
-
part_accessor :baz do |value|
|
12
|
-
raise EarlError if value == 'woot!'
|
13
|
-
end
|
14
|
-
end.new }
|
15
|
-
|
16
|
-
it { should be_kind_of( Earl::HashInquirer ) }
|
17
|
-
|
18
|
-
[ :foo, :foo=, :bar, :bar=, :baz, :baz= ].each do |method|
|
19
|
-
it { should respond_to( method ) }
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'should define setters/getters' do
|
23
|
-
subject.foo = 'foo!'
|
24
|
-
subject.foo.should eq( 'foo!' )
|
25
|
-
subject[ :foo ].should eq( 'foo!' )
|
26
|
-
subject.foo?.should eq( true )
|
27
|
-
end
|
28
|
-
|
29
|
-
it 'should call the block if given' do
|
30
|
-
expect { subject.baz = 'baz!' }.not_to raise_error
|
31
|
-
expect { subject.baz = 'woot!' }.to raise_error
|
32
|
-
end
|
33
|
-
|
34
|
-
it 'should return a string inquirer for string attributes' do
|
35
|
-
subject.foo = 'sup'
|
36
|
-
subject.foo.kind_of?( Earl::StringInquirer ).should == true
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl::HashInquirer do
|
4
|
-
subject { Earl::HashInquirer.new :foo => 'bar', :baz => 123, :woo => false }
|
5
|
-
|
6
|
-
it { should be_a( Hash ) }
|
7
|
-
it { should eql( :foo => 'bar', :baz => 123, :woo => false ) }
|
8
|
-
|
9
|
-
describe 'string keys' do
|
10
|
-
its( :foo? ){ should be_true }
|
11
|
-
end
|
12
|
-
|
13
|
-
describe 'numeric keys' do
|
14
|
-
its( :baz? ){ should be_true }
|
15
|
-
end
|
16
|
-
|
17
|
-
describe 'boolean keys' do
|
18
|
-
its( :woo? ){ should be_true }
|
19
|
-
end
|
20
|
-
|
21
|
-
describe 'nonexistant keys' do
|
22
|
-
its( :sup? ){ should be_false }
|
23
|
-
end
|
24
|
-
end
|
@@ -1,45 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl::URLEntity do
|
4
|
-
subject { Earl::URLEntity }
|
5
|
-
|
6
|
-
# scheme
|
7
|
-
|
8
|
-
it { should produce( 'https://foo.com' ).from( 'http://foo.com' ).when_given( :scheme => 'https' ) }
|
9
|
-
it { should produce( 'http://foo.com' ).from( 'foo.com' ).when_given( :scheme => 'http' ) }
|
10
|
-
it { should produce( 'foo.com' ).from( 'http://foo.com' ).when_given( :scheme => nil ) }
|
11
|
-
|
12
|
-
# subdomain
|
13
|
-
|
14
|
-
it { should produce( 'baz.bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => 'baz' ) }
|
15
|
-
it { should produce( 'bar.foo.com' ).from( 'foo.com' ).when_given( :subdomain => 'bar' ) }
|
16
|
-
it { should produce( 'bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => nil ) }
|
17
|
-
|
18
|
-
# port
|
19
|
-
|
20
|
-
it { should produce( 'foo.com:4567' ).from( 'foo.com:80' ).when_given( :port => 4567 ) }
|
21
|
-
it { should produce( 'foo.com:4567' ).from( 'foo.com' ).when_given( :port => 4567 ) }
|
22
|
-
it { should produce( 'foo.com' ).from( 'foo.com:4567' ).when_given( :port => nil ) }
|
23
|
-
|
24
|
-
# path
|
25
|
-
|
26
|
-
it { should produce( 'foo.com/bar' ).from( 'foo.com/baz' ).when_given( :path => 'bar' ) }
|
27
|
-
it { should produce( 'foo.com/bar' ).from( 'foo.com' ).when_given( :path => 'bar' ) }
|
28
|
-
it { should produce( 'foo.com' ).from( 'foo.com/bar' ).when_given( :path => nil ) }
|
29
|
-
|
30
|
-
# search
|
31
|
-
|
32
|
-
it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com?bar=baz' ).when_given( :search => 'bar=asdf' ) }
|
33
|
-
it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com' ).when_given( :search => 'bar=asdf' ) }
|
34
|
-
it { should produce( 'foo.com' ).from( 'foo.com?bar=asdf' ).when_given( :search => nil ) }
|
35
|
-
|
36
|
-
# host
|
37
|
-
|
38
|
-
it { should produce( 'www.foo.edu' ).from( 'www.foo.com' ).when_given( :host => 'foo.edu' ) }
|
39
|
-
it 'must have a host when parsing a url' do
|
40
|
-
expect { Earl::URL( 'http://' ) }.to raise_error( Earl::InvalidURLError )
|
41
|
-
end
|
42
|
-
it 'wont let you set the host to nil' do
|
43
|
-
expect { Earl::URL( 'www.foo.com' ).host = nil }.to raise_error( Earl::InvalidURLError )
|
44
|
-
end
|
45
|
-
end
|
@@ -1,189 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Earl do
|
4
|
-
let( :parser ){ Earl::URLParser.new }
|
5
|
-
let( :assembler ){ Earl::URLAssembler.new }
|
6
|
-
|
7
|
-
[
|
8
|
-
[ 'localhost', {
|
9
|
-
:host => 'localhost'
|
10
|
-
} ],
|
11
|
-
[ 'foo.com', {
|
12
|
-
:host => 'foo.com'
|
13
|
-
} ],
|
14
|
-
|
15
|
-
[ 'foo.edu', {
|
16
|
-
:host => 'foo.edu'
|
17
|
-
} ],
|
18
|
-
|
19
|
-
[ 'foo2bar.biz', {
|
20
|
-
:host => 'foo2bar.biz'
|
21
|
-
} ],
|
22
|
-
|
23
|
-
[ 'www.foo.com', {
|
24
|
-
:host => 'foo.com',
|
25
|
-
:subdomain => 'www'
|
26
|
-
} ],
|
27
|
-
|
28
|
-
[ 'http://localhost', {
|
29
|
-
:scheme => 'http',
|
30
|
-
:host => 'localhost'
|
31
|
-
} ],
|
32
|
-
|
33
|
-
[ 'http://foo.com', {
|
34
|
-
:scheme => 'http',
|
35
|
-
:host => 'foo.com'
|
36
|
-
} ],
|
37
|
-
|
38
|
-
[ 'http://www.foo.com', {
|
39
|
-
:scheme => 'http',
|
40
|
-
:host => 'foo.com',
|
41
|
-
:subdomain => 'www'
|
42
|
-
} ],
|
43
|
-
|
44
|
-
[ 'localhost:3000', {
|
45
|
-
:host => 'localhost',
|
46
|
-
:port => '3000'
|
47
|
-
} ],
|
48
|
-
|
49
|
-
[ 'http://localhost:3000', {
|
50
|
-
:scheme => 'http',
|
51
|
-
:host => 'localhost',
|
52
|
-
:port => '3000'
|
53
|
-
} ],
|
54
|
-
|
55
|
-
[ 'www.foo.com:8080', {
|
56
|
-
:subdomain => 'www',
|
57
|
-
:host => 'foo.com',
|
58
|
-
:port => '8080'
|
59
|
-
} ],
|
60
|
-
|
61
|
-
[ 'http://www.foo.com:8080', {
|
62
|
-
:scheme => 'http',
|
63
|
-
:subdomain => 'www',
|
64
|
-
:host => 'foo.com',
|
65
|
-
:port => '8080'
|
66
|
-
} ],
|
67
|
-
|
68
|
-
[ 'localhost/bar', {
|
69
|
-
:host => 'localhost',
|
70
|
-
:path => 'bar'
|
71
|
-
} ],
|
72
|
-
|
73
|
-
[ 'foo.com/bar', {
|
74
|
-
:host => 'foo.com',
|
75
|
-
:path => 'bar'
|
76
|
-
} ],
|
77
|
-
|
78
|
-
[ 'www.foo.com/bar', {
|
79
|
-
:subdomain => 'www',
|
80
|
-
:host => 'foo.com',
|
81
|
-
:path => 'bar'
|
82
|
-
} ],
|
83
|
-
|
84
|
-
[ 'http://localhost/bar', {
|
85
|
-
:scheme => 'http',
|
86
|
-
:host => 'localhost',
|
87
|
-
:path => 'bar'
|
88
|
-
} ],
|
89
|
-
|
90
|
-
[ 'http://foo.com/bar', {
|
91
|
-
:scheme => 'http',
|
92
|
-
:host => 'foo.com',
|
93
|
-
:path => 'bar'
|
94
|
-
} ],
|
95
|
-
|
96
|
-
[ 'http://www.foo.com/bar', {
|
97
|
-
:scheme => 'http',
|
98
|
-
:subdomain => 'www',
|
99
|
-
:host => 'foo.com',
|
100
|
-
:path => 'bar'
|
101
|
-
} ],
|
102
|
-
|
103
|
-
[ 'localhost?baz=woo', {
|
104
|
-
:host => 'localhost',
|
105
|
-
:search => 'baz=woo'
|
106
|
-
} ],
|
107
|
-
|
108
|
-
[ 'localhost:3000?baz=woo', {
|
109
|
-
:host => 'localhost',
|
110
|
-
:port => '3000',
|
111
|
-
:search => 'baz=woo'
|
112
|
-
} ],
|
113
|
-
|
114
|
-
[ 'localhost:3000/bar?baz=woo', {
|
115
|
-
:host => 'localhost',
|
116
|
-
:port => '3000',
|
117
|
-
:path => 'bar',
|
118
|
-
:search => 'baz=woo'
|
119
|
-
} ],
|
120
|
-
|
121
|
-
[ 'foo.com?baz=woo', {
|
122
|
-
:host => 'foo.com',
|
123
|
-
:search => 'baz=woo'
|
124
|
-
} ],
|
125
|
-
|
126
|
-
[ 'www.foo.com?baz=woo', {
|
127
|
-
:subdomain => 'www',
|
128
|
-
:host => 'foo.com',
|
129
|
-
:search => 'baz=woo'
|
130
|
-
} ],
|
131
|
-
|
132
|
-
[ 'http://foo.com?baz=woo', {
|
133
|
-
:scheme => 'http',
|
134
|
-
:host => 'foo.com',
|
135
|
-
:search => 'baz=woo'
|
136
|
-
} ],
|
137
|
-
|
138
|
-
[ 'http://www.foo.com?baz=woo', {
|
139
|
-
:scheme => 'http',
|
140
|
-
:subdomain => 'www',
|
141
|
-
:host => 'foo.com',
|
142
|
-
:search => 'baz=woo'
|
143
|
-
} ],
|
144
|
-
|
145
|
-
[ 'http://foo.com/bar?baz=woo', {
|
146
|
-
:scheme => 'http',
|
147
|
-
:host => 'foo.com',
|
148
|
-
:path => 'bar',
|
149
|
-
:search => 'baz=woo'
|
150
|
-
} ],
|
151
|
-
|
152
|
-
[ 'http://www.foo.com/bar?baz=woot', {
|
153
|
-
:scheme => 'http',
|
154
|
-
:subdomain => 'www',
|
155
|
-
:host => 'foo.com',
|
156
|
-
:path => 'bar',
|
157
|
-
:search => 'baz=woot'
|
158
|
-
} ],
|
159
|
-
|
160
|
-
[ 'http://localhost:3000?baz=woo', {
|
161
|
-
:scheme => 'http',
|
162
|
-
:host => 'localhost',
|
163
|
-
:port => '3000',
|
164
|
-
:search => 'baz=woo'
|
165
|
-
} ],
|
166
|
-
|
167
|
-
[ 'http://foo.com:8080?baz=woooo', {
|
168
|
-
:scheme => 'http',
|
169
|
-
:host => 'foo.com',
|
170
|
-
:port => '8080',
|
171
|
-
:search => 'baz=woooo'
|
172
|
-
} ],
|
173
|
-
|
174
|
-
[ 'http://foo.com:8080/bar?baz=woo', {
|
175
|
-
:scheme => 'http',
|
176
|
-
:host => 'foo.com',
|
177
|
-
:port => '8080',
|
178
|
-
:path => 'bar',
|
179
|
-
:search => 'baz=woo'
|
180
|
-
} ]
|
181
|
-
].each do |string, parts|
|
182
|
-
it "should correctly parse the url parts for #{string}" do
|
183
|
-
parser.parse( string ).resolve.should eql( parts )
|
184
|
-
end
|
185
|
-
it "should correctly assemble the url parts to #{string}" do
|
186
|
-
assembler.assemble( parts ).should eql( string )
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|