earl 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/.document +5 -0
  2. data/.gitignore +4 -15
  3. data/.rspec +1 -0
  4. data/.travis.yml +11 -0
  5. data/Gemfile +2 -2
  6. data/Gemfile.lock +60 -0
  7. data/Guardfile +10 -0
  8. data/LICENSE +2 -4
  9. data/README.rdoc +145 -0
  10. data/Rakefile +35 -2
  11. data/earl.gemspec +13 -7
  12. data/lib/earl.rb +7 -22
  13. data/lib/earl/earl.rb +158 -0
  14. data/lib/earl/scraper.rb +93 -0
  15. data/lib/earl/version.rb +2 -2
  16. data/script/console +10 -0
  17. data/spec/fixtures/bicycles.html +490 -0
  18. data/spec/fixtures/bicycles_without_description.html +489 -0
  19. data/spec/fixtures/bicycles_without_images.html +457 -0
  20. data/spec/fixtures/page_as_atom.html +161 -0
  21. data/spec/fixtures/page_as_rss.html +151 -0
  22. data/spec/fixtures/page_with_atom_feed.html +39 -0
  23. data/spec/fixtures/page_with_rss_and_atom_feeds.html +40 -0
  24. data/spec/fixtures/page_with_rss_feed.html +39 -0
  25. data/spec/fixtures/page_without_feeds.html +36 -0
  26. data/spec/fixtures/youtube.html +1839 -0
  27. data/spec/integration/feed_spec.rb +78 -0
  28. data/spec/integration/oembed_spec.rb +40 -0
  29. data/spec/spec_helper.rb +18 -28
  30. data/spec/support/fixtures.rb +10 -0
  31. data/spec/unit/earl/earl_spec.rb +16 -0
  32. data/spec/unit/earl/feed_spec.rb +59 -0
  33. data/spec/unit/earl/oembed_spec.rb +49 -0
  34. data/spec/unit/earl/scraper_spec.rb +48 -0
  35. data/spec/unit/earl_spec.rb +65 -0
  36. metadata +123 -46
  37. data/.rvmrc +0 -48
  38. data/README.md +0 -41
  39. data/lib/earl/email_assembler.rb +0 -11
  40. data/lib/earl/email_entity.rb +0 -27
  41. data/lib/earl/email_parser.tt +0 -58
  42. data/lib/earl/entity_base.rb +0 -37
  43. data/lib/earl/hash_inquirer.rb +0 -16
  44. data/lib/earl/string_inquirer.rb +0 -11
  45. data/lib/earl/url_assembler.rb +0 -15
  46. data/lib/earl/url_entity.rb +0 -23
  47. data/lib/earl/url_parser.tt +0 -163
  48. data/spec/earl/earl_spec.rb +0 -17
  49. data/spec/earl/email_entity_spec.rb +0 -31
  50. data/spec/earl/email_parser_spec.rb +0 -29
  51. data/spec/earl/entity_base_spec.rb +0 -39
  52. data/spec/earl/hash_inquirer_spec.rb +0 -24
  53. data/spec/earl/string_inquirer_spec.rb +0 -9
  54. data/spec/earl/url_entity_spec.rb +0 -45
  55. data/spec/earl/url_parser_spec.rb +0 -189
@@ -1,17 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl do
4
- subject { Earl }
5
-
6
- it { should respond_to( :URL ) }
7
- describe '#URL' do
8
- subject { Earl::URL( 'http://foo.com' ) }
9
- it { should be_kind_of( Earl::URLEntity ) }
10
- end
11
-
12
- it { should respond_to( :Email ) }
13
- describe '#Email' do
14
- subject { Earl::Email( 'foo@bar.com' ) }
15
- it { should be_kind_of( Earl::EmailEntity ) }
16
- end
17
- end
@@ -1,31 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::EmailEntity do
4
- subject { Earl::EmailEntity }
5
-
6
- # username
7
-
8
- it { should produce( 'foo@bar.com' ).from( 'baz@bar.com' ).when_given( :username => 'foo' ) }
9
- it 'must have a username when parsing an email' do
10
- expect { Earl::Email( '@bar.com' ) }.to raise_error( Earl::InvalidURLError )
11
- end
12
- it 'wont let you set the username to nil' do
13
- expect { Earl::Email( 'foo@bar.com' ).username = nil }.to raise_error( Earl::InvalidURLError )
14
- end
15
-
16
- # domain
17
-
18
- it { should produce( 'foo@bar.com' ).from( 'foo@baz.com' ).when_given( :domain => 'bar.com' ) }
19
- it 'must have a domain when parsing an email' do
20
- expect { Earl::Email( 'foo@' ) }.to raise_error( Earl::InvalidURLError )
21
- end
22
- it 'wont let you set the domain to nil' do
23
- expect { Earl::Email( 'foo@bar.com' ).domain = nil }.to raise_error( Earl::InvalidURLError )
24
- end
25
-
26
- # contact
27
-
28
- it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com <Foo Bar>' ).when_given( :contact => 'Woot!' ) }
29
- it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com' ).when_given( :contact => 'Woot!' ) }
30
- it { should produce( 'foo@bar.com' ).from( 'foo@bar.com <Woot!>' ).when_given( :contact => nil ) }
31
- end
@@ -1,29 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl do
4
- let( :parser ){ Earl::EmailParser.new }
5
- let( :assembler ){ Earl::EmailAssembler.new }
6
-
7
- [
8
- [ 'foo@bar.com', {
9
- :username => 'foo',
10
- :domain => 'bar.com'
11
- } ],
12
- [ 'foo.bar@baz.com', {
13
- :username => 'foo.bar',
14
- :domain => 'baz.com'
15
- } ],
16
- [ 'foo.bar@baz.com <Foo Bar>', {
17
- :username => 'foo.bar',
18
- :domain => 'baz.com',
19
- :contact => 'Foo Bar'
20
- } ]
21
- ].each do |string, parts|
22
- it "should correctly parse the email parts for #{string}" do
23
- parser.parse( string ).resolve.should eql( parts )
24
- end
25
- it "should correctly assemble the email parts to #{string}" do
26
- assembler.assemble( parts ).should eql( string )
27
- end
28
- end
29
- end
@@ -1,39 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::EntityBase do
4
-
5
- describe 'the entity dsl' do
6
- subject { Class.new( Earl::EntityBase ) do
7
- def initialize; end # so we don't invoke our parser
8
-
9
- part_accessor :foo, :bar
10
-
11
- part_accessor :baz do |value|
12
- raise EarlError if value == 'woot!'
13
- end
14
- end.new }
15
-
16
- it { should be_kind_of( Earl::HashInquirer ) }
17
-
18
- [ :foo, :foo=, :bar, :bar=, :baz, :baz= ].each do |method|
19
- it { should respond_to( method ) }
20
- end
21
-
22
- it 'should define setters/getters' do
23
- subject.foo = 'foo!'
24
- subject.foo.should eq( 'foo!' )
25
- subject[ :foo ].should eq( 'foo!' )
26
- subject.foo?.should eq( true )
27
- end
28
-
29
- it 'should call the block if given' do
30
- expect { subject.baz = 'baz!' }.not_to raise_error
31
- expect { subject.baz = 'woot!' }.to raise_error
32
- end
33
-
34
- it 'should return a string inquirer for string attributes' do
35
- subject.foo = 'sup'
36
- subject.foo.kind_of?( Earl::StringInquirer ).should == true
37
- end
38
- end
39
- end
@@ -1,24 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::HashInquirer do
4
- subject { Earl::HashInquirer.new :foo => 'bar', :baz => 123, :woo => false }
5
-
6
- it { should be_a( Hash ) }
7
- it { should eql( :foo => 'bar', :baz => 123, :woo => false ) }
8
-
9
- describe 'string keys' do
10
- its( :foo? ){ should be_true }
11
- end
12
-
13
- describe 'numeric keys' do
14
- its( :baz? ){ should be_true }
15
- end
16
-
17
- describe 'boolean keys' do
18
- its( :woo? ){ should be_true }
19
- end
20
-
21
- describe 'nonexistant keys' do
22
- its( :sup? ){ should be_false }
23
- end
24
- end
@@ -1,9 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::StringInquirer do
4
- subject { Earl::StringInquirer.new 'foo' }
5
-
6
- it { should be_a( String ) }
7
- its( :foo? ){ should be_true }
8
- its( :bar? ){ should be_false }
9
- end
@@ -1,45 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::URLEntity do
4
- subject { Earl::URLEntity }
5
-
6
- # scheme
7
-
8
- it { should produce( 'https://foo.com' ).from( 'http://foo.com' ).when_given( :scheme => 'https' ) }
9
- it { should produce( 'http://foo.com' ).from( 'foo.com' ).when_given( :scheme => 'http' ) }
10
- it { should produce( 'foo.com' ).from( 'http://foo.com' ).when_given( :scheme => nil ) }
11
-
12
- # subdomain
13
-
14
- it { should produce( 'baz.bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => 'baz' ) }
15
- it { should produce( 'bar.foo.com' ).from( 'foo.com' ).when_given( :subdomain => 'bar' ) }
16
- it { should produce( 'bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => nil ) }
17
-
18
- # port
19
-
20
- it { should produce( 'foo.com:4567' ).from( 'foo.com:80' ).when_given( :port => 4567 ) }
21
- it { should produce( 'foo.com:4567' ).from( 'foo.com' ).when_given( :port => 4567 ) }
22
- it { should produce( 'foo.com' ).from( 'foo.com:4567' ).when_given( :port => nil ) }
23
-
24
- # path
25
-
26
- it { should produce( 'foo.com/bar' ).from( 'foo.com/baz' ).when_given( :path => 'bar' ) }
27
- it { should produce( 'foo.com/bar' ).from( 'foo.com' ).when_given( :path => 'bar' ) }
28
- it { should produce( 'foo.com' ).from( 'foo.com/bar' ).when_given( :path => nil ) }
29
-
30
- # search
31
-
32
- it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com?bar=baz' ).when_given( :search => 'bar=asdf' ) }
33
- it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com' ).when_given( :search => 'bar=asdf' ) }
34
- it { should produce( 'foo.com' ).from( 'foo.com?bar=asdf' ).when_given( :search => nil ) }
35
-
36
- # host
37
-
38
- it { should produce( 'www.foo.edu' ).from( 'www.foo.com' ).when_given( :host => 'foo.edu' ) }
39
- it 'must have a host when parsing a url' do
40
- expect { Earl::URL( 'http://' ) }.to raise_error( Earl::InvalidURLError )
41
- end
42
- it 'wont let you set the host to nil' do
43
- expect { Earl::URL( 'www.foo.com' ).host = nil }.to raise_error( Earl::InvalidURLError )
44
- end
45
- end
@@ -1,189 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl do
4
- let( :parser ){ Earl::URLParser.new }
5
- let( :assembler ){ Earl::URLAssembler.new }
6
-
7
- [
8
- [ 'localhost', {
9
- :host => 'localhost'
10
- } ],
11
- [ 'foo.com', {
12
- :host => 'foo.com'
13
- } ],
14
-
15
- [ 'foo.edu', {
16
- :host => 'foo.edu'
17
- } ],
18
-
19
- [ 'foo2bar.biz', {
20
- :host => 'foo2bar.biz'
21
- } ],
22
-
23
- [ 'www.foo.com', {
24
- :host => 'foo.com',
25
- :subdomain => 'www'
26
- } ],
27
-
28
- [ 'http://localhost', {
29
- :scheme => 'http',
30
- :host => 'localhost'
31
- } ],
32
-
33
- [ 'http://foo.com', {
34
- :scheme => 'http',
35
- :host => 'foo.com'
36
- } ],
37
-
38
- [ 'http://www.foo.com', {
39
- :scheme => 'http',
40
- :host => 'foo.com',
41
- :subdomain => 'www'
42
- } ],
43
-
44
- [ 'localhost:3000', {
45
- :host => 'localhost',
46
- :port => '3000'
47
- } ],
48
-
49
- [ 'http://localhost:3000', {
50
- :scheme => 'http',
51
- :host => 'localhost',
52
- :port => '3000'
53
- } ],
54
-
55
- [ 'www.foo.com:8080', {
56
- :subdomain => 'www',
57
- :host => 'foo.com',
58
- :port => '8080'
59
- } ],
60
-
61
- [ 'http://www.foo.com:8080', {
62
- :scheme => 'http',
63
- :subdomain => 'www',
64
- :host => 'foo.com',
65
- :port => '8080'
66
- } ],
67
-
68
- [ 'localhost/bar', {
69
- :host => 'localhost',
70
- :path => 'bar'
71
- } ],
72
-
73
- [ 'foo.com/bar', {
74
- :host => 'foo.com',
75
- :path => 'bar'
76
- } ],
77
-
78
- [ 'www.foo.com/bar', {
79
- :subdomain => 'www',
80
- :host => 'foo.com',
81
- :path => 'bar'
82
- } ],
83
-
84
- [ 'http://localhost/bar', {
85
- :scheme => 'http',
86
- :host => 'localhost',
87
- :path => 'bar'
88
- } ],
89
-
90
- [ 'http://foo.com/bar', {
91
- :scheme => 'http',
92
- :host => 'foo.com',
93
- :path => 'bar'
94
- } ],
95
-
96
- [ 'http://www.foo.com/bar', {
97
- :scheme => 'http',
98
- :subdomain => 'www',
99
- :host => 'foo.com',
100
- :path => 'bar'
101
- } ],
102
-
103
- [ 'localhost?baz=woo', {
104
- :host => 'localhost',
105
- :search => 'baz=woo'
106
- } ],
107
-
108
- [ 'localhost:3000?baz=woo', {
109
- :host => 'localhost',
110
- :port => '3000',
111
- :search => 'baz=woo'
112
- } ],
113
-
114
- [ 'localhost:3000/bar?baz=woo', {
115
- :host => 'localhost',
116
- :port => '3000',
117
- :path => 'bar',
118
- :search => 'baz=woo'
119
- } ],
120
-
121
- [ 'foo.com?baz=woo', {
122
- :host => 'foo.com',
123
- :search => 'baz=woo'
124
- } ],
125
-
126
- [ 'www.foo.com?baz=woo', {
127
- :subdomain => 'www',
128
- :host => 'foo.com',
129
- :search => 'baz=woo'
130
- } ],
131
-
132
- [ 'http://foo.com?baz=woo', {
133
- :scheme => 'http',
134
- :host => 'foo.com',
135
- :search => 'baz=woo'
136
- } ],
137
-
138
- [ 'http://www.foo.com?baz=woo', {
139
- :scheme => 'http',
140
- :subdomain => 'www',
141
- :host => 'foo.com',
142
- :search => 'baz=woo'
143
- } ],
144
-
145
- [ 'http://foo.com/bar?baz=woo', {
146
- :scheme => 'http',
147
- :host => 'foo.com',
148
- :path => 'bar',
149
- :search => 'baz=woo'
150
- } ],
151
-
152
- [ 'http://www.foo.com/bar?baz=woot', {
153
- :scheme => 'http',
154
- :subdomain => 'www',
155
- :host => 'foo.com',
156
- :path => 'bar',
157
- :search => 'baz=woot'
158
- } ],
159
-
160
- [ 'http://localhost:3000?baz=woo', {
161
- :scheme => 'http',
162
- :host => 'localhost',
163
- :port => '3000',
164
- :search => 'baz=woo'
165
- } ],
166
-
167
- [ 'http://foo.com:8080?baz=woooo', {
168
- :scheme => 'http',
169
- :host => 'foo.com',
170
- :port => '8080',
171
- :search => 'baz=woooo'
172
- } ],
173
-
174
- [ 'http://foo.com:8080/bar?baz=woo', {
175
- :scheme => 'http',
176
- :host => 'foo.com',
177
- :port => '8080',
178
- :path => 'bar',
179
- :search => 'baz=woo'
180
- } ]
181
- ].each do |string, parts|
182
- it "should correctly parse the url parts for #{string}" do
183
- parser.parse( string ).resolve.should eql( parts )
184
- end
185
- it "should correctly assemble the url parts to #{string}" do
186
- assembler.assemble( parts ).should eql( string )
187
- end
188
- end
189
- end