earl 0.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ruby-tests.yml +32 -0
  3. data/.gitignore +5 -0
  4. data/.rubocop.yml +35 -0
  5. data/.rubocop_todo.yml +22 -0
  6. data/.ruby-gemset +1 -0
  7. data/.ruby-version +1 -0
  8. data/Gemfile +13 -1
  9. data/Guardfile +15 -0
  10. data/LICENSE +2 -2
  11. data/README.md +127 -25
  12. data/Rakefile +10 -2
  13. data/earl.gemspec +19 -14
  14. data/lib/earl/earl.rb +172 -0
  15. data/lib/earl/scraper.rb +92 -0
  16. data/lib/earl/version.rb +4 -2
  17. data/lib/earl.rb +11 -20
  18. data/spec/fixtures/bicycles.html +490 -0
  19. data/spec/fixtures/bicycles_without_description.html +489 -0
  20. data/spec/fixtures/bicycles_without_images.html +457 -0
  21. data/spec/fixtures/cassettes/feed/is_atom_feed.yml +2298 -0
  22. data/spec/fixtures/cassettes/feed/is_rss_feed.yml +48 -0
  23. data/spec/fixtures/cassettes/feed/no_feed.yml +69 -0
  24. data/spec/fixtures/cassettes/feed/with_atom_and_rss_feed.yml +1471 -0
  25. data/spec/fixtures/cassettes/feed/with_rss_feed.yml +47 -0
  26. data/spec/fixtures/cassettes/oembed/no_oembed.yml +101 -0
  27. data/spec/fixtures/cassettes/oembed/youtube_oembed.yml +129 -0
  28. data/spec/fixtures/page_as_atom.html +161 -0
  29. data/spec/fixtures/page_as_rss.html +151 -0
  30. data/spec/fixtures/page_with_atom_feed.html +39 -0
  31. data/spec/fixtures/page_with_rss_and_atom_feeds.html +40 -0
  32. data/spec/fixtures/page_with_rss_feed.html +39 -0
  33. data/spec/fixtures/page_without_feeds.html +36 -0
  34. data/spec/fixtures/youtube.html +1839 -0
  35. data/spec/integration/feed_spec.rb +78 -0
  36. data/spec/integration/oembed_spec.rb +36 -0
  37. data/spec/spec_helper.rb +21 -29
  38. data/spec/support/fixtures.rb +15 -0
  39. data/spec/support/vcr.rb +9 -0
  40. data/spec/unit/earl/earl_spec.rb +15 -0
  41. data/spec/unit/earl/feed_spec.rb +62 -0
  42. data/spec/unit/earl/oembed_spec.rb +50 -0
  43. data/spec/unit/earl/scraper_spec.rb +49 -0
  44. data/spec/unit/earl_spec.rb +74 -0
  45. metadata +90 -62
  46. data/.rvmrc +0 -48
  47. data/lib/earl/email_assembler.rb +0 -11
  48. data/lib/earl/email_entity.rb +0 -27
  49. data/lib/earl/email_parser.tt +0 -58
  50. data/lib/earl/entity_base.rb +0 -37
  51. data/lib/earl/hash_inquirer.rb +0 -16
  52. data/lib/earl/string_inquirer.rb +0 -11
  53. data/lib/earl/url_assembler.rb +0 -15
  54. data/lib/earl/url_entity.rb +0 -23
  55. data/lib/earl/url_parser.tt +0 -163
  56. data/spec/earl/earl_spec.rb +0 -17
  57. data/spec/earl/email_entity_spec.rb +0 -31
  58. data/spec/earl/email_parser_spec.rb +0 -29
  59. data/spec/earl/entity_base_spec.rb +0 -39
  60. data/spec/earl/hash_inquirer_spec.rb +0 -24
  61. data/spec/earl/string_inquirer_spec.rb +0 -9
  62. data/spec/earl/url_entity_spec.rb +0 -45
  63. data/spec/earl/url_parser_spec.rb +0 -189
data/.rvmrc DELETED
@@ -1,48 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
- # development environment upon cd'ing into the directory
5
-
6
- # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
- # Only full ruby name is supported here, for short names use:
8
- # echo "rvm use 1.9.3" > .rvmrc
9
- environment_id="ruby-1.9.3-p125@earl"
10
-
11
- # Uncomment the following lines if you want to verify rvm version per project
12
- # rvmrc_rvm_version="1.10.3" # 1.10.1 seams as a safe start
13
- # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
- # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
- # return 1
16
- # }
17
-
18
- # First we attempt to load the desired environment directly from the environment
19
- # file. This is very fast and efficient compared to running through the entire
20
- # CLI and selector. If you want feedback on which environment was used then
21
- # insert the word 'use' after --create as this triggers verbose mode.
22
- if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
- && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
- then
25
- \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
- [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
- \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
- else
29
- # If the environment file has not yet been created, use the RVM CLI to select.
30
- rvm --create "$environment_id" || {
31
- echo "Failed to create RVM environment '${environment_id}'."
32
- return 1
33
- }
34
- fi
35
-
36
- # If you use bundler, this might be useful to you:
37
- # if [[ -s Gemfile ]] && {
38
- # ! builtin command -v bundle >/dev/null ||
39
- # builtin command -v bundle | grep $rvm_path/bin/bundle >/dev/null
40
- # }
41
- # then
42
- # printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43
- # gem install bundler
44
- # fi
45
- # if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46
- # then
47
- # bundle install | grep -vE '^Using|Your bundle is complete'
48
- # fi
@@ -1,11 +0,0 @@
1
- module Earl
2
- class EmailAssembler
3
-
4
- def assemble( parts={} )
5
- ''.tap do |email|
6
- email << [ parts[ :username ], parts[ :domain ] ].join( '@' )
7
- email << " <#{parts[ :contact ]}>" if parts[ :contact ]
8
- end
9
- end
10
- end
11
- end
@@ -1,27 +0,0 @@
1
- require 'treetop'
2
- require 'earl/email_parser'
3
-
4
- module Earl
5
- class EmailEntity < EntityBase
6
-
7
- part_accessor :contact
8
-
9
- part_accessor :username do |value|
10
- raise InvalidURLError if value.nil?
11
- end
12
-
13
- part_accessor :domain do |value|
14
- raise InvalidURLError if value.nil?
15
- end
16
-
17
- protected
18
-
19
- def parser
20
- @parser ||= EmailParser.new
21
- end
22
-
23
- def assembler
24
- @assembler ||= EmailAssembler.new
25
- end
26
- end
27
- end
@@ -1,58 +0,0 @@
1
- module Earl
2
- grammar Email
3
-
4
- rule program
5
- whitespace v:( email ) whitespace {
6
- def resolve
7
- { }.merge v.resolve
8
- end
9
- }
10
- end
11
-
12
- rule whitespace
13
- [\s]*
14
- end
15
-
16
- rule email
17
- username '@' domain whitespace contact {
18
- def resolve
19
- username.resolve.merge domain.resolve.merge contact.resolve
20
- end
21
- }
22
- /
23
- username '@' domain {
24
- def resolve
25
- username.resolve.merge domain.resolve
26
- end
27
- }
28
- end
29
-
30
- rule username
31
- [^@\s]+ {
32
- def resolve
33
- { :username => text_value }
34
- end
35
- }
36
- end
37
-
38
- rule domain
39
- characters '.' characters {
40
- def resolve
41
- { :domain => text_value }
42
- end
43
- }
44
- end
45
-
46
- rule contact
47
- '<' [^>]+ '>' {
48
- def resolve
49
- { :contact => elements[ 1 ].text_value }
50
- end
51
- }
52
- end
53
-
54
- rule characters
55
- [a-zA-Z0-9]+
56
- end
57
- end
58
- end
@@ -1,37 +0,0 @@
1
- module Earl
2
- class EntityBase < HashInquirer
3
- def initialize( source )
4
- super parser.parse( source ).resolve rescue raise InvalidURLError
5
- end
6
-
7
- def to_s
8
- assembler.assemble self
9
- end
10
-
11
- def self.part_accessor( *parts, &block )
12
- parts.each do |part|
13
- define_method part do
14
- if self[ part ].is_a? String
15
- StringInquirer.new self[ part ]
16
- else
17
- self[ part ]
18
- end
19
- end
20
- define_method :"#{part}=" do |value|
21
- self[ part ] = value
22
- yield value if block_given?
23
- end
24
- end
25
- end
26
-
27
- protected
28
-
29
- def parser
30
- raise SubclassError
31
- end
32
-
33
- def assembler
34
- raise SubclassError
35
- end
36
- end
37
- end
@@ -1,16 +0,0 @@
1
- module Earl
2
- class HashInquirer < ::Hash
3
- def initialize( hash, &block )
4
- merge! hash
5
- super block
6
- end
7
-
8
- def method_missing( meth, *args, &block )
9
- if meth.to_s[ -1 ] == '?'
10
- self.has_key? meth.to_s[ 0..-2 ].to_sym
11
- else
12
- super
13
- end
14
- end
15
- end
16
- end
@@ -1,11 +0,0 @@
1
- module Earl
2
- class StringInquirer < ::String
3
- def method_missing( meth, *args, &block )
4
- if meth.to_s[ -1 ] == '?'
5
- meth.to_s[ 0..-2 ] == to_s
6
- else
7
- super
8
- end
9
- end
10
- end
11
- end
@@ -1,15 +0,0 @@
1
- module Earl
2
- class URLAssembler
3
-
4
- def assemble( parts={} )
5
- ''.tap do |url|
6
- url << ( parts[ :scheme ] + '://' ) if parts[ :scheme ]
7
- url << ( parts[ :subdomain ] + '.' ) if parts[ :subdomain ]
8
- url << ( parts[ :host ] ) if parts[ :host ]
9
- url << ( ':' + parts[ :port ].to_s ) if parts[ :port ]
10
- url << ( '/' + parts[ :path ] ) if parts[ :path ]
11
- url << ( '?' + parts[ :search ] ) if parts[ :search ]
12
- end
13
- end
14
- end
15
- end
@@ -1,23 +0,0 @@
1
- require 'treetop'
2
- require 'earl/url_parser'
3
-
4
- module Earl
5
- class URLEntity < EntityBase
6
-
7
- part_accessor :scheme, :subdomain, :port, :path, :search
8
-
9
- part_accessor :host do |value|
10
- raise InvalidURLError if value.nil?
11
- end
12
-
13
- protected
14
-
15
- def parser
16
- @parser ||= URLParser.new
17
- end
18
-
19
- def assembler
20
- @assembler ||= URLAssembler.new
21
- end
22
- end
23
- end
@@ -1,163 +0,0 @@
1
- module Earl
2
- grammar URL
3
-
4
- rule program
5
- whitespace v:( url ) whitespace {
6
- def resolve
7
- { }.merge v.resolve
8
- end
9
- }
10
- end
11
-
12
- rule whitespace
13
- [\s]*
14
- end
15
-
16
- rule url
17
- scheme host port path search {
18
- def resolve
19
- scheme.resolve.merge port.resolve.merge host.resolve.merge path.resolve.merge search.resolve
20
- end
21
- }
22
- /
23
- scheme host port path {
24
- def resolve
25
- scheme.resolve.merge port.resolve.merge host.resolve.merge path.resolve
26
- end
27
- }
28
- /
29
- scheme host port search {
30
- def resolve
31
- scheme.resolve.merge port.resolve.merge host.resolve.merge search.resolve
32
- end
33
- }
34
- /
35
- scheme host port {
36
- def resolve
37
- scheme.resolve.merge port.resolve.merge host.resolve
38
- end
39
- }
40
- /
41
- scheme host path search {
42
- def resolve
43
- scheme.resolve.merge host.resolve.merge path.resolve.merge search.resolve
44
- end
45
- }
46
- /
47
- scheme host path {
48
- def resolve
49
- scheme.resolve.merge host.resolve.merge path.resolve
50
- end
51
- }
52
- /
53
- scheme host search {
54
- def resolve
55
- scheme.resolve.merge host.resolve.merge search.resolve
56
- end
57
- }
58
- /
59
- scheme host {
60
- def resolve
61
- scheme.resolve.merge host.resolve
62
- end
63
- }
64
- /
65
- host port path search {
66
- def resolve
67
- port.resolve.merge host.resolve.merge path.resolve.merge search.resolve
68
- end
69
- }
70
- /
71
- host port path {
72
- def resolve
73
- port.resolve.merge host.resolve.merge path.resolve
74
- end
75
- }
76
- /
77
- host port search {
78
- def resolve
79
- port.resolve.merge host.resolve.merge search.resolve
80
- end
81
- }
82
- /
83
- host port {
84
- def resolve
85
- port.resolve.merge host.resolve
86
- end
87
- }
88
- /
89
- host path {
90
- def resolve
91
- host.resolve.merge path.resolve
92
- end
93
- }
94
- /
95
- host search {
96
- def resolve
97
- host.resolve.merge search.resolve
98
- end
99
- }
100
- /
101
- host
102
- end
103
-
104
- rule scheme
105
- characters '://' {
106
- def resolve
107
- { :scheme => characters.text_value }
108
- end
109
- }
110
- end
111
-
112
- rule host
113
- subdomain:characters '.' domain:characters '.' tld:characters {
114
- def resolve
115
- {
116
- :subdomain => subdomain.text_value,
117
- :host => "#{domain.text_value}.#{tld.text_value}"
118
- }
119
- end
120
- }
121
- /
122
- domain:characters '.' tld:characters {
123
- def resolve
124
- { :host => text_value }
125
- end
126
- }
127
- /
128
- characters {
129
- def resolve
130
- { :host => text_value }
131
- end
132
- }
133
- end
134
-
135
- rule port
136
- ':' port:([0-9]1..4) {
137
- def resolve
138
- { :port => port.text_value }
139
- end
140
- }
141
- end
142
-
143
- rule path
144
- '/' characters {
145
- def resolve
146
- { :path => characters.text_value }
147
- end
148
- }
149
- end
150
-
151
- rule search
152
- '?' search:( characters '=' characters ) {
153
- def resolve
154
- { :search => search.text_value }
155
- end
156
- }
157
- end
158
-
159
- rule characters
160
- [a-zA-Z0-9]+
161
- end
162
- end
163
- end
@@ -1,17 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl do
4
- subject { Earl }
5
-
6
- it { should respond_to( :URL ) }
7
- describe '#URL' do
8
- subject { Earl::URL( 'http://foo.com' ) }
9
- it { should be_kind_of( Earl::URLEntity ) }
10
- end
11
-
12
- it { should respond_to( :Email ) }
13
- describe '#Email' do
14
- subject { Earl::Email( 'foo@bar.com' ) }
15
- it { should be_kind_of( Earl::EmailEntity ) }
16
- end
17
- end
@@ -1,31 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::EmailEntity do
4
- subject { Earl::EmailEntity }
5
-
6
- # username
7
-
8
- it { should produce( 'foo@bar.com' ).from( 'baz@bar.com' ).when_given( :username => 'foo' ) }
9
- it 'must have a username when parsing an email' do
10
- expect { Earl::Email( '@bar.com' ) }.to raise_error( Earl::InvalidURLError )
11
- end
12
- it 'wont let you set the username to nil' do
13
- expect { Earl::Email( 'foo@bar.com' ).username = nil }.to raise_error( Earl::InvalidURLError )
14
- end
15
-
16
- # domain
17
-
18
- it { should produce( 'foo@bar.com' ).from( 'foo@baz.com' ).when_given( :domain => 'bar.com' ) }
19
- it 'must have a domain when parsing an email' do
20
- expect { Earl::Email( 'foo@' ) }.to raise_error( Earl::InvalidURLError )
21
- end
22
- it 'wont let you set the domain to nil' do
23
- expect { Earl::Email( 'foo@bar.com' ).domain = nil }.to raise_error( Earl::InvalidURLError )
24
- end
25
-
26
- # contact
27
-
28
- it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com <Foo Bar>' ).when_given( :contact => 'Woot!' ) }
29
- it { should produce( 'foo@bar.com <Woot!>' ).from( 'foo@bar.com' ).when_given( :contact => 'Woot!' ) }
30
- it { should produce( 'foo@bar.com' ).from( 'foo@bar.com <Woot!>' ).when_given( :contact => nil ) }
31
- end
@@ -1,29 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl do
4
- let( :parser ){ Earl::EmailParser.new }
5
- let( :assembler ){ Earl::EmailAssembler.new }
6
-
7
- [
8
- [ 'foo@bar.com', {
9
- :username => 'foo',
10
- :domain => 'bar.com'
11
- } ],
12
- [ 'foo.bar@baz.com', {
13
- :username => 'foo.bar',
14
- :domain => 'baz.com'
15
- } ],
16
- [ 'foo.bar@baz.com <Foo Bar>', {
17
- :username => 'foo.bar',
18
- :domain => 'baz.com',
19
- :contact => 'Foo Bar'
20
- } ]
21
- ].each do |string, parts|
22
- it "should correctly parse the email parts for #{string}" do
23
- parser.parse( string ).resolve.should eql( parts )
24
- end
25
- it "should correctly assemble the email parts to #{string}" do
26
- assembler.assemble( parts ).should eql( string )
27
- end
28
- end
29
- end
@@ -1,39 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::EntityBase do
4
-
5
- describe 'the entity dsl' do
6
- subject { Class.new( Earl::EntityBase ) do
7
- def initialize; end # so we don't invoke our parser
8
-
9
- part_accessor :foo, :bar
10
-
11
- part_accessor :baz do |value|
12
- raise EarlError if value == 'woot!'
13
- end
14
- end.new }
15
-
16
- it { should be_kind_of( Earl::HashInquirer ) }
17
-
18
- [ :foo, :foo=, :bar, :bar=, :baz, :baz= ].each do |method|
19
- it { should respond_to( method ) }
20
- end
21
-
22
- it 'should define setters/getters' do
23
- subject.foo = 'foo!'
24
- subject.foo.should eq( 'foo!' )
25
- subject[ :foo ].should eq( 'foo!' )
26
- subject.foo?.should eq( true )
27
- end
28
-
29
- it 'should call the block if given' do
30
- expect { subject.baz = 'baz!' }.not_to raise_error
31
- expect { subject.baz = 'woot!' }.to raise_error
32
- end
33
-
34
- it 'should return a string inquirer for string attributes' do
35
- subject.foo = 'sup'
36
- subject.foo.kind_of?( Earl::StringInquirer ).should == true
37
- end
38
- end
39
- end
@@ -1,24 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::HashInquirer do
4
- subject { Earl::HashInquirer.new :foo => 'bar', :baz => 123, :woo => false }
5
-
6
- it { should be_a( Hash ) }
7
- it { should eql( :foo => 'bar', :baz => 123, :woo => false ) }
8
-
9
- describe 'string keys' do
10
- its( :foo? ){ should be_true }
11
- end
12
-
13
- describe 'numeric keys' do
14
- its( :baz? ){ should be_true }
15
- end
16
-
17
- describe 'boolean keys' do
18
- its( :woo? ){ should be_true }
19
- end
20
-
21
- describe 'nonexistant keys' do
22
- its( :sup? ){ should be_false }
23
- end
24
- end
@@ -1,9 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::StringInquirer do
4
- subject { Earl::StringInquirer.new 'foo' }
5
-
6
- it { should be_a( String ) }
7
- its( :foo? ){ should be_true }
8
- its( :bar? ){ should be_false }
9
- end
@@ -1,45 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Earl::URLEntity do
4
- subject { Earl::URLEntity }
5
-
6
- # scheme
7
-
8
- it { should produce( 'https://foo.com' ).from( 'http://foo.com' ).when_given( :scheme => 'https' ) }
9
- it { should produce( 'http://foo.com' ).from( 'foo.com' ).when_given( :scheme => 'http' ) }
10
- it { should produce( 'foo.com' ).from( 'http://foo.com' ).when_given( :scheme => nil ) }
11
-
12
- # subdomain
13
-
14
- it { should produce( 'baz.bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => 'baz' ) }
15
- it { should produce( 'bar.foo.com' ).from( 'foo.com' ).when_given( :subdomain => 'bar' ) }
16
- it { should produce( 'bar.com' ).from( 'foo.bar.com' ).when_given( :subdomain => nil ) }
17
-
18
- # port
19
-
20
- it { should produce( 'foo.com:4567' ).from( 'foo.com:80' ).when_given( :port => 4567 ) }
21
- it { should produce( 'foo.com:4567' ).from( 'foo.com' ).when_given( :port => 4567 ) }
22
- it { should produce( 'foo.com' ).from( 'foo.com:4567' ).when_given( :port => nil ) }
23
-
24
- # path
25
-
26
- it { should produce( 'foo.com/bar' ).from( 'foo.com/baz' ).when_given( :path => 'bar' ) }
27
- it { should produce( 'foo.com/bar' ).from( 'foo.com' ).when_given( :path => 'bar' ) }
28
- it { should produce( 'foo.com' ).from( 'foo.com/bar' ).when_given( :path => nil ) }
29
-
30
- # search
31
-
32
- it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com?bar=baz' ).when_given( :search => 'bar=asdf' ) }
33
- it { should produce( 'foo.com?bar=asdf' ).from( 'foo.com' ).when_given( :search => 'bar=asdf' ) }
34
- it { should produce( 'foo.com' ).from( 'foo.com?bar=asdf' ).when_given( :search => nil ) }
35
-
36
- # host
37
-
38
- it { should produce( 'www.foo.edu' ).from( 'www.foo.com' ).when_given( :host => 'foo.edu' ) }
39
- it 'must have a host when parsing a url' do
40
- expect { Earl::URL( 'http://' ) }.to raise_error( Earl::InvalidURLError )
41
- end
42
- it 'wont let you set the host to nil' do
43
- expect { Earl::URL( 'www.foo.com' ).host = nil }.to raise_error( Earl::InvalidURLError )
44
- end
45
- end