earl 0.3.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/.document +5 -0
  2. data/.gitignore +4 -15
  3. data/.rspec +1 -0
  4. data/.travis.yml +11 -0
  5. data/Gemfile +2 -2
  6. data/Gemfile.lock +60 -0
  7. data/Guardfile +10 -0
  8. data/LICENSE +2 -4
  9. data/README.rdoc +145 -0
  10. data/Rakefile +35 -2
  11. data/earl.gemspec +13 -7
  12. data/lib/earl.rb +7 -22
  13. data/lib/earl/earl.rb +158 -0
  14. data/lib/earl/scraper.rb +93 -0
  15. data/lib/earl/version.rb +2 -2
  16. data/script/console +10 -0
  17. data/spec/fixtures/bicycles.html +490 -0
  18. data/spec/fixtures/bicycles_without_description.html +489 -0
  19. data/spec/fixtures/bicycles_without_images.html +457 -0
  20. data/spec/fixtures/page_as_atom.html +161 -0
  21. data/spec/fixtures/page_as_rss.html +151 -0
  22. data/spec/fixtures/page_with_atom_feed.html +39 -0
  23. data/spec/fixtures/page_with_rss_and_atom_feeds.html +40 -0
  24. data/spec/fixtures/page_with_rss_feed.html +39 -0
  25. data/spec/fixtures/page_without_feeds.html +36 -0
  26. data/spec/fixtures/youtube.html +1839 -0
  27. data/spec/integration/feed_spec.rb +78 -0
  28. data/spec/integration/oembed_spec.rb +40 -0
  29. data/spec/spec_helper.rb +18 -28
  30. data/spec/support/fixtures.rb +10 -0
  31. data/spec/unit/earl/earl_spec.rb +16 -0
  32. data/spec/unit/earl/feed_spec.rb +59 -0
  33. data/spec/unit/earl/oembed_spec.rb +49 -0
  34. data/spec/unit/earl/scraper_spec.rb +48 -0
  35. data/spec/unit/earl_spec.rb +65 -0
  36. metadata +123 -46
  37. data/.rvmrc +0 -48
  38. data/README.md +0 -41
  39. data/lib/earl/email_assembler.rb +0 -11
  40. data/lib/earl/email_entity.rb +0 -27
  41. data/lib/earl/email_parser.tt +0 -58
  42. data/lib/earl/entity_base.rb +0 -37
  43. data/lib/earl/hash_inquirer.rb +0 -16
  44. data/lib/earl/string_inquirer.rb +0 -11
  45. data/lib/earl/url_assembler.rb +0 -15
  46. data/lib/earl/url_entity.rb +0 -23
  47. data/lib/earl/url_parser.tt +0 -163
  48. data/spec/earl/earl_spec.rb +0 -17
  49. data/spec/earl/email_entity_spec.rb +0 -31
  50. data/spec/earl/email_parser_spec.rb +0 -29
  51. data/spec/earl/entity_base_spec.rb +0 -39
  52. data/spec/earl/hash_inquirer_spec.rb +0 -24
  53. data/spec/earl/string_inquirer_spec.rb +0 -9
  54. data/spec/earl/url_entity_spec.rb +0 -45
  55. data/spec/earl/url_parser_spec.rb +0 -189
@@ -1,17 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl do
4
- subject { Earl }
5
-
6
- it { should respond_to( :URL ) }
7
- describe '#URL' do
8
- subject { Earl::URL( 'http://foo.com' ) }
9
- it { should be_kind_of( Earl::URLEntity ) }
10
- end
11
-
12
- it { should respond_to( :Email ) }
13
- describe '#Email' do
14
- subject { Earl::Email( 'foo@bar.com' ) }
15
- it { should be_kind_of( Earl::EmailEntity ) }
16
- end
17
- end
@@ -1,31 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::EmailEntity do
4
- subject { Earl::EmailEntity }
5
-
6
- # username
7
-
8
- it { should produce( 'foo@bar.com' ).from( 'baz@bar.com' ).when_given( :username => 'foo' ) }
9
- it 'must have a username when parsing an email' do
10
- expect { Earl::Email( '@bar.com' ) }.to raise_error( Earl::InvalidURLError )
11
- end
12
- it 'wont let you set the username to nil' do
13
- expect { Earl::Email( 'foo@bar.com' ).username = nil }.to raise_error( Earl::InvalidURLError )
14
- end
15
-
16
- # domain
17
-
18
- it { should produce( 'foo@bar.com' ).from( 'foo@baz.com' ).when_given( :domain => 'bar.com' ) }
19
- it 'must have a domain when parsing an email' do
20
- expect { Earl::Email( 'foo@' ) }.to raise_error( Earl::InvalidURLError )
21
- end
22
- it 'wont let you set the domain to nil' do
23
- expect { Earl::Email( 'foo@bar.com' ).domain = nil }.to raise_error( Earl::InvalidURLError )
24
- end
25
-
26
- # contact
27
-
28
- it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com <Foo Bar>' ).when_given( :contact => 'Woot!' ) }
29
- it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com' ).when_given( :contact => 'Woot!' ) }
30
- it { should produce( 'foo@bar.com' ).from( 'foo@bar.com <Woot!>' ).when_given( :contact => nil ) }
31
- end
@@ -1,29 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl do
4
- let( :parser ){ Earl::EmailParser.new }
5
- let( :assembler ){ Earl::EmailAssembler.new }
6
-
7
- [
8
- [ 'foo@bar.com', {
9
- :username => 'foo',
10
- :domain => 'bar.com'
11
- } ],
12
- [ 'foo.bar@baz.com', {
13
- :username => 'foo.bar',
14
- :domain => 'baz.com'
15
- } ],
16
- [ 'foo.bar@baz.com <Foo Bar>', {
17
- :username => 'foo.bar',
18
- :domain => 'baz.com',
19
- :contact => 'Foo Bar'
20
- } ]
21
- ].each do |string, parts|
22
- it "should correctly parse the email parts for #{string}" do
23
- parser.parse( string ).resolve.should eql( parts )
24
- end
25
- it "should correctly assemble the email parts to #{string}" do
26
- assembler.assemble( parts ).should eql( string )
27
- end
28
- end
29
- end
@@ -1,39 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::EntityBase do
4
-
5
- describe 'the entity dsl' do
6
- subject { Class.new( Earl::EntityBase ) do
7
- def initialize; end # so we don't invoke our parser
8
-
9
- part_accessor :foo, :bar
10
-
11
- part_accessor :baz do |value|
12
- raise EarlError if value == 'woot!'
13
- end
14
- end.new }
15
-
16
- it { should be_kind_of( Earl::HashInquirer ) }
17
-
18
- [ :foo, :foo=, :bar, :bar=, :baz, :baz= ].each do |method|
19
- it { should respond_to( method ) }
20
- end
21
-
22
- it 'should define setters/getters' do
23
- subject.foo = 'foo!'
24
- subject.foo.should eq( 'foo!' )
25
- subject[ :foo ].should eq( 'foo!' )
26
- subject.foo?.should eq( true )
27
- end
28
-
29
- it 'should call the block if given' do
30
- expect { subject.baz = 'baz!' }.not_to raise_error
31
- expect { subject.baz = 'woot!' }.to raise_error
32
- end
33
-
34
- it 'should return a string inquirer for string attributes' do
35
- subject.foo = 'sup'
36
- subject.foo.kind_of?( Earl::StringInquirer ).should == true
37
- end
38
- end
39
- end
@@ -1,24 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::HashInquirer do
4
- subject { Earl::HashInquirer.new :foo => 'bar', :baz => 123, :woo => false }
5
-
6
- it { should be_a( Hash ) }
7
- it { should eql( :foo => 'bar', :baz => 123, :woo => false ) }
8
-
9
- describe 'string keys' do
10
- its( :foo? ){ should be_true }
11
- end
12
-
13
- describe 'numeric keys' do
14
- its( :baz? ){ should be_true }
15
- end
16
-
17
- describe 'boolean keys' do
18
- its( :woo? ){ should be_true }
19
- end
20
-
21
- describe 'nonexistant keys' do
22
- its( :sup? ){ should be_false }
23
- end
24
- end
@@ -1,9 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::StringInquirer do
4
- subject { Earl::StringInquirer.new 'foo' }
5
-
6
- it { should be_a( String ) }
7
- its( :foo? ){ should be_true }
8
- its( :bar? ){ should be_false }
9
- end
@@ -1,45 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::URLEntity do
4
- subject { Earl::URLEntity }
5
-
6
- # scheme
7
-
8
- it { should produce( 'https://foo.com' ).from( 'http://foo.com' ).when_given( :scheme => 'https' ) }
9
- it { should produce( 'http://foo.com' ).from( 'foo.com' ).when_given( :scheme => 'http' ) }
10
- it { should produce( 'foo.com' ).from( 'http://foo.com' ).when_given( :scheme => nil ) }
11
-
12
- # subdomain
13
-
14
- it { should produce( 'baz.bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => 'baz' ) }
15
- it { should produce( 'bar.foo.com' ).from( 'foo.com' ).when_given( :subdomain => 'bar' ) }
16
- it { should produce( 'bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => nil ) }
17
-
18
- # port
19
-
20
- it { should produce( 'foo.com:4567' ).from( 'foo.com:80' ).when_given( :port => 4567 ) }
21
- it { should produce( 'foo.com:4567' ).from( 'foo.com' ).when_given( :port => 4567 ) }
22
- it { should produce( 'foo.com' ).from( 'foo.com:4567' ).when_given( :port => nil ) }
23
-
24
- # path
25
-
26
- it { should produce( 'foo.com/bar' ).from( 'foo.com/baz' ).when_given( :path => 'bar' ) }
27
- it { should produce( 'foo.com/bar' ).from( 'foo.com' ).when_given( :path => 'bar' ) }
28
- it { should produce( 'foo.com' ).from( 'foo.com/bar' ).when_given( :path => nil ) }
29
-
30
- # search
31
-
32
- it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com?bar=baz' ).when_given( :search => 'bar=asdf' ) }
33
- it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com' ).when_given( :search => 'bar=asdf' ) }
34
- it { should produce( 'foo.com' ).from( 'foo.com?bar=asdf' ).when_given( :search => nil ) }
35
-
36
- # host
37
-
38
- it { should produce( 'www.foo.edu' ).from( 'www.foo.com' ).when_given( :host => 'foo.edu' ) }
39
- it 'must have a host when parsing a url' do
40
- expect { Earl::URL( 'http://' ) }.to raise_error( Earl::InvalidURLError )
41
- end
42
- it 'wont let you set the host to nil' do
43
- expect { Earl::URL( 'www.foo.com' ).host = nil }.to raise_error( Earl::InvalidURLError )
44
- end
45
- end
@@ -1,189 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl do
4
- let( :parser ){ Earl::URLParser.new }
5
- let( :assembler ){ Earl::URLAssembler.new }
6
-
7
- [
8
- [ 'localhost', {
9
- :host => 'localhost'
10
- } ],
11
- [ 'foo.com', {
12
- :host => 'foo.com'
13
- } ],
14
-
15
- [ 'foo.edu', {
16
- :host => 'foo.edu'
17
- } ],
18
-
19
- [ 'foo2bar.biz', {
20
- :host => 'foo2bar.biz'
21
- } ],
22
-
23
- [ 'www.foo.com', {
24
- :host => 'foo.com',
25
- :subdomain => 'www'
26
- } ],
27
-
28
- [ 'http://localhost', {
29
- :scheme => 'http',
30
- :host => 'localhost'
31
- } ],
32
-
33
- [ 'http://foo.com', {
34
- :scheme => 'http',
35
- :host => 'foo.com'
36
- } ],
37
-
38
- [ 'http://www.foo.com', {
39
- :scheme => 'http',
40
- :host => 'foo.com',
41
- :subdomain => 'www'
42
- } ],
43
-
44
- [ 'localhost:3000', {
45
- :host => 'localhost',
46
- :port => '3000'
47
- } ],
48
-
49
- [ 'http://localhost:3000', {
50
- :scheme => 'http',
51
- :host => 'localhost',
52
- :port => '3000'
53
- } ],
54
-
55
- [ 'www.foo.com:8080', {
56
- :subdomain => 'www',
57
- :host => 'foo.com',
58
- :port => '8080'
59
- } ],
60
-
61
- [ 'http://www.foo.com:8080', {
62
- :scheme => 'http',
63
- :subdomain => 'www',
64
- :host => 'foo.com',
65
- :port => '8080'
66
- } ],
67
-
68
- [ 'localhost/bar', {
69
- :host => 'localhost',
70
- :path => 'bar'
71
- } ],
72
-
73
- [ 'foo.com/bar', {
74
- :host => 'foo.com',
75
- :path => 'bar'
76
- } ],
77
-
78
- [ 'www.foo.com/bar', {
79
- :subdomain => 'www',
80
- :host => 'foo.com',
81
- :path => 'bar'
82
- } ],
83
-
84
- [ 'http://localhost/bar', {
85
- :scheme => 'http',
86
- :host => 'localhost',
87
- :path => 'bar'
88
- } ],
89
-
90
- [ 'http://foo.com/bar', {
91
- :scheme => 'http',
92
- :host => 'foo.com',
93
- :path => 'bar'
94
- } ],
95
-
96
- [ 'http://www.foo.com/bar', {
97
- :scheme => 'http',
98
- :subdomain => 'www',
99
- :host => 'foo.com',
100
- :path => 'bar'
101
- } ],
102
-
103
- [ 'localhost?baz=woo', {
104
- :host => 'localhost',
105
- :search => 'baz=woo'
106
- } ],
107
-
108
- [ 'localhost:3000?baz=woo', {
109
- :host => 'localhost',
110
- :port => '3000',
111
- :search => 'baz=woo'
112
- } ],
113
-
114
- [ 'localhost:3000/bar?baz=woo', {
115
- :host => 'localhost',
116
- :port => '3000',
117
- :path => 'bar',
118
- :search => 'baz=woo'
119
- } ],
120
-
121
- [ 'foo.com?baz=woo', {
122
- :host => 'foo.com',
123
- :search => 'baz=woo'
124
- } ],
125
-
126
- [ 'www.foo.com?baz=woo', {
127
- :subdomain => 'www',
128
- :host => 'foo.com',
129
- :search => 'baz=woo'
130
- } ],
131
-
132
- [ 'http://foo.com?baz=woo', {
133
- :scheme => 'http',
134
- :host => 'foo.com',
135
- :search => 'baz=woo'
136
- } ],
137
-
138
- [ 'http://www.foo.com?baz=woo', {
139
- :scheme => 'http',
140
- :subdomain => 'www',
141
- :host => 'foo.com',
142
- :search => 'baz=woo'
143
- } ],
144
-
145
- [ 'http://foo.com/bar?baz=woo', {
146
- :scheme => 'http',
147
- :host => 'foo.com',
148
- :path => 'bar',
149
- :search => 'baz=woo'
150
- } ],
151
-
152
- [ 'http://www.foo.com/bar?baz=woot', {
153
- :scheme => 'http',
154
- :subdomain => 'www',
155
- :host => 'foo.com',
156
- :path => 'bar',
157
- :search => 'baz=woot'
158
- } ],
159
-
160
- [ 'http://localhost:3000?baz=woo', {
161
- :scheme => 'http',
162
- :host => 'localhost',
163
- :port => '3000',
164
- :search => 'baz=woo'
165
- } ],
166
-
167
- [ 'http://foo.com:8080?baz=woooo', {
168
- :scheme => 'http',
169
- :host => 'foo.com',
170
- :port => '8080',
171
- :search => 'baz=woooo'
172
- } ],
173
-
174
- [ 'http://foo.com:8080/bar?baz=woo', {
175
- :scheme => 'http',
176
- :host => 'foo.com',
177
- :port => '8080',
178
- :path => 'bar',
179
- :search => 'baz=woo'
180
- } ]
181
- ].each do |string, parts|
182
- it "should correctly parse the url parts for #{string}" do
183
- parser.parse( string ).resolve.should eql( parts )
184
- end
185
- it "should correctly assemble the url parts to #{string}" do
186
- assembler.assemble( parts ).should eql( string )
187
- end
188
- end
189
- end