earl 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +4 -15
- data/.rspec +1 -0
- data/.travis.yml +11 -0
- data/Gemfile +2 -2
- data/Gemfile.lock +60 -0
- data/Guardfile +10 -0
- data/LICENSE +2 -4
- data/README.rdoc +145 -0
- data/Rakefile +35 -2
- data/earl.gemspec +13 -7
- data/lib/earl.rb +7 -22
- data/lib/earl/earl.rb +158 -0
- data/lib/earl/scraper.rb +93 -0
- data/lib/earl/version.rb +2 -2
- data/script/console +10 -0
- data/spec/fixtures/bicycles.html +490 -0
- data/spec/fixtures/bicycles_without_description.html +489 -0
- data/spec/fixtures/bicycles_without_images.html +457 -0
- data/spec/fixtures/page_as_atom.html +161 -0
- data/spec/fixtures/page_as_rss.html +151 -0
- data/spec/fixtures/page_with_atom_feed.html +39 -0
- data/spec/fixtures/page_with_rss_and_atom_feeds.html +40 -0
- data/spec/fixtures/page_with_rss_feed.html +39 -0
- data/spec/fixtures/page_without_feeds.html +36 -0
- data/spec/fixtures/youtube.html +1839 -0
- data/spec/integration/feed_spec.rb +78 -0
- data/spec/integration/oembed_spec.rb +40 -0
- data/spec/spec_helper.rb +18 -28
- data/spec/support/fixtures.rb +10 -0
- data/spec/unit/earl/earl_spec.rb +16 -0
- data/spec/unit/earl/feed_spec.rb +59 -0
- data/spec/unit/earl/oembed_spec.rb +49 -0
- data/spec/unit/earl/scraper_spec.rb +48 -0
- data/spec/unit/earl_spec.rb +65 -0
- metadata +123 -46
- data/.rvmrc +0 -48
- data/README.md +0 -41
- data/lib/earl/email_assembler.rb +0 -11
- data/lib/earl/email_entity.rb +0 -27
- data/lib/earl/email_parser.tt +0 -58
- data/lib/earl/entity_base.rb +0 -37
- data/lib/earl/hash_inquirer.rb +0 -16
- data/lib/earl/string_inquirer.rb +0 -11
- data/lib/earl/url_assembler.rb +0 -15
- data/lib/earl/url_entity.rb +0 -23
- data/lib/earl/url_parser.tt +0 -163
- data/spec/earl/earl_spec.rb +0 -17
- data/spec/earl/email_entity_spec.rb +0 -31
- data/spec/earl/email_parser_spec.rb +0 -29
- data/spec/earl/entity_base_spec.rb +0 -39
- data/spec/earl/hash_inquirer_spec.rb +0 -24
- data/spec/earl/string_inquirer_spec.rb +0 -9
- data/spec/earl/url_entity_spec.rb +0 -45
- data/spec/earl/url_parser_spec.rb +0 -189
data/.rvmrc
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
|
3
|
-
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
-
# development environment upon cd'ing into the directory
|
5
|
-
|
6
|
-
# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
|
7
|
-
# Only full ruby name is supported here, for short names use:
|
8
|
-
# echo "rvm use 1.9.3" > .rvmrc
|
9
|
-
environment_id="ruby-1.9.3-p125@earl"
|
10
|
-
|
11
|
-
# Uncomment the following lines if you want to verify rvm version per project
|
12
|
-
# rvmrc_rvm_version="1.10.3" # 1.10.1 seams as a safe start
|
13
|
-
# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
|
14
|
-
# echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
|
15
|
-
# return 1
|
16
|
-
# }
|
17
|
-
|
18
|
-
# First we attempt to load the desired environment directly from the environment
|
19
|
-
# file. This is very fast and efficient compared to running through the entire
|
20
|
-
# CLI and selector. If you want feedback on which environment was used then
|
21
|
-
# insert the word 'use' after --create as this triggers verbose mode.
|
22
|
-
if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
|
23
|
-
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
|
24
|
-
then
|
25
|
-
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
26
|
-
[[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
|
27
|
-
\. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
|
28
|
-
else
|
29
|
-
# If the environment file has not yet been created, use the RVM CLI to select.
|
30
|
-
rvm --create "$environment_id" || {
|
31
|
-
echo "Failed to create RVM environment '${environment_id}'."
|
32
|
-
return 1
|
33
|
-
}
|
34
|
-
fi
|
35
|
-
|
36
|
-
# If you use bundler, this might be useful to you:
|
37
|
-
# if [[ -s Gemfile ]] && {
|
38
|
-
# ! builtin command -v bundle >/dev/null ||
|
39
|
-
# builtin command -v bundle | grep $rvm_path/bin/bundle >/dev/null
|
40
|
-
# }
|
41
|
-
# then
|
42
|
-
# printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
|
43
|
-
# gem install bundler
|
44
|
-
# fi
|
45
|
-
# if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
|
46
|
-
# then
|
47
|
-
# bundle install | grep -vE '^Using|Your bundle is complete'
|
48
|
-
# fi
|
data/README.md
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# Earl
|
2
|
-
|
3
|
-
What URI wishes it could look like.
|
4
|
-
|
5
|
-
## Installation
|
6
|
-
|
7
|
-
Add this line to your application's Gemfile:
|
8
|
-
|
9
|
-
gem 'earl'
|
10
|
-
|
11
|
-
And then execute:
|
12
|
-
|
13
|
-
$ bundle
|
14
|
-
|
15
|
-
Or install it yourself as:
|
16
|
-
|
17
|
-
$ gem install earl
|
18
|
-
|
19
|
-
## Usage
|
20
|
-
|
21
|
-
``` rb
|
22
|
-
url = Earl::URL.new 'http://www.foo.com'
|
23
|
-
|
24
|
-
url.scheme # => 'http'
|
25
|
-
url.scheme? # => true
|
26
|
-
|
27
|
-
url.subdomain # => 'www'
|
28
|
-
url.subdomain.www? # => true
|
29
|
-
url.subdomain.baz? # => false
|
30
|
-
|
31
|
-
url.host = 'foo.edu'
|
32
|
-
url.to_s # => 'http://www.foo.edu'
|
33
|
-
```
|
34
|
-
|
35
|
-
## Contributing
|
36
|
-
|
37
|
-
1. Fork it
|
38
|
-
2. Create your feature branch (`git checkout -b my-new-feature`)
|
39
|
-
3. Commit your changes (`git commit -am 'Added some feature'`)
|
40
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
41
|
-
5. Create new Pull Request
|
data/lib/earl/email_assembler.rb
DELETED
data/lib/earl/email_entity.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
require 'treetop'
|
2
|
-
require 'earl/email_parser'
|
3
|
-
|
4
|
-
module Earl
|
5
|
-
class EmailEntity < EntityBase
|
6
|
-
|
7
|
-
part_accessor :contact
|
8
|
-
|
9
|
-
part_accessor :username do |value|
|
10
|
-
raise InvalidURLError if value.nil?
|
11
|
-
end
|
12
|
-
|
13
|
-
part_accessor :domain do |value|
|
14
|
-
raise InvalidURLError if value.nil?
|
15
|
-
end
|
16
|
-
|
17
|
-
protected
|
18
|
-
|
19
|
-
def parser
|
20
|
-
@parser ||= EmailParser.new
|
21
|
-
end
|
22
|
-
|
23
|
-
def assembler
|
24
|
-
@assembler ||= EmailAssembler.new
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
data/lib/earl/email_parser.tt
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
module Earl
|
2
|
-
grammar Email
|
3
|
-
|
4
|
-
rule program
|
5
|
-
whitespace v:( email ) whitespace {
|
6
|
-
def resolve
|
7
|
-
{ }.merge v.resolve
|
8
|
-
end
|
9
|
-
}
|
10
|
-
end
|
11
|
-
|
12
|
-
rule whitespace
|
13
|
-
[\s]*
|
14
|
-
end
|
15
|
-
|
16
|
-
rule email
|
17
|
-
username '@' domain whitespace contact {
|
18
|
-
def resolve
|
19
|
-
username.resolve.merge domain.resolve.merge contact.resolve
|
20
|
-
end
|
21
|
-
}
|
22
|
-
/
|
23
|
-
username '@' domain {
|
24
|
-
def resolve
|
25
|
-
username.resolve.merge domain.resolve
|
26
|
-
end
|
27
|
-
}
|
28
|
-
end
|
29
|
-
|
30
|
-
rule username
|
31
|
-
[^@\s]+ {
|
32
|
-
def resolve
|
33
|
-
{ :username => text_value }
|
34
|
-
end
|
35
|
-
}
|
36
|
-
end
|
37
|
-
|
38
|
-
rule domain
|
39
|
-
characters '.' characters {
|
40
|
-
def resolve
|
41
|
-
{ :domain => text_value }
|
42
|
-
end
|
43
|
-
}
|
44
|
-
end
|
45
|
-
|
46
|
-
rule contact
|
47
|
-
'<' [^>]+ '>' {
|
48
|
-
def resolve
|
49
|
-
{ :contact => elements[ 1 ].text_value }
|
50
|
-
end
|
51
|
-
}
|
52
|
-
end
|
53
|
-
|
54
|
-
rule characters
|
55
|
-
[a-zA-Z0-9]+
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
data/lib/earl/entity_base.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
module Earl
|
2
|
-
class EntityBase < HashInquirer
|
3
|
-
def initialize( source )
|
4
|
-
super parser.parse( source ).resolve rescue raise InvalidURLError
|
5
|
-
end
|
6
|
-
|
7
|
-
def to_s
|
8
|
-
assembler.assemble self
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.part_accessor( *parts, &block )
|
12
|
-
parts.each do |part|
|
13
|
-
define_method part do
|
14
|
-
if self[ part ].is_a? String
|
15
|
-
StringInquirer.new self[ part ]
|
16
|
-
else
|
17
|
-
self[ part ]
|
18
|
-
end
|
19
|
-
end
|
20
|
-
define_method :"#{part}=" do |value|
|
21
|
-
self[ part ] = value
|
22
|
-
yield value if block_given?
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
protected
|
28
|
-
|
29
|
-
def parser
|
30
|
-
raise SubclassError
|
31
|
-
end
|
32
|
-
|
33
|
-
def assembler
|
34
|
-
raise SubclassError
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
data/lib/earl/hash_inquirer.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
module Earl
|
2
|
-
class HashInquirer < ::Hash
|
3
|
-
def initialize( hash, &block )
|
4
|
-
merge! hash
|
5
|
-
super block
|
6
|
-
end
|
7
|
-
|
8
|
-
def method_missing( meth, *args, &block )
|
9
|
-
if meth.to_s[ -1 ] == '?'
|
10
|
-
self.has_key? meth.to_s[ 0..-2 ].to_sym
|
11
|
-
else
|
12
|
-
super
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
data/lib/earl/string_inquirer.rb
DELETED
data/lib/earl/url_assembler.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
module Earl
|
2
|
-
class URLAssembler
|
3
|
-
|
4
|
-
def assemble( parts={} )
|
5
|
-
''.tap do |url|
|
6
|
-
url << ( parts[ :scheme ] + '://' ) if parts[ :scheme ]
|
7
|
-
url << ( parts[ :subdomain ] + '.' ) if parts[ :subdomain ]
|
8
|
-
url << ( parts[ :host ] ) if parts[ :host ]
|
9
|
-
url << ( ':' + parts[ :port ].to_s ) if parts[ :port ]
|
10
|
-
url << ( '/' + parts[ :path ] ) if parts[ :path ]
|
11
|
-
url << ( '?' + parts[ :search ] ) if parts[ :search ]
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
data/lib/earl/url_entity.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
require 'treetop'
|
2
|
-
require 'earl/url_parser'
|
3
|
-
|
4
|
-
module Earl
|
5
|
-
class URLEntity < EntityBase
|
6
|
-
|
7
|
-
part_accessor :scheme, :subdomain, :port, :path, :search
|
8
|
-
|
9
|
-
part_accessor :host do |value|
|
10
|
-
raise InvalidURLError if value.nil?
|
11
|
-
end
|
12
|
-
|
13
|
-
protected
|
14
|
-
|
15
|
-
def parser
|
16
|
-
@parser ||= URLParser.new
|
17
|
-
end
|
18
|
-
|
19
|
-
def assembler
|
20
|
-
@assembler ||= URLAssembler.new
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
data/lib/earl/url_parser.tt
DELETED
@@ -1,163 +0,0 @@
|
|
1
|
-
module Earl
|
2
|
-
grammar URL
|
3
|
-
|
4
|
-
rule program
|
5
|
-
whitespace v:( url ) whitespace {
|
6
|
-
def resolve
|
7
|
-
{ }.merge v.resolve
|
8
|
-
end
|
9
|
-
}
|
10
|
-
end
|
11
|
-
|
12
|
-
rule whitespace
|
13
|
-
[\s]*
|
14
|
-
end
|
15
|
-
|
16
|
-
rule url
|
17
|
-
scheme host port path search {
|
18
|
-
def resolve
|
19
|
-
scheme.resolve.merge port.resolve.merge host.resolve.merge path.resolve.merge search.resolve
|
20
|
-
end
|
21
|
-
}
|
22
|
-
/
|
23
|
-
scheme host port path {
|
24
|
-
def resolve
|
25
|
-
scheme.resolve.merge port.resolve.merge host.resolve.merge path.resolve
|
26
|
-
end
|
27
|
-
}
|
28
|
-
/
|
29
|
-
scheme host port search {
|
30
|
-
def resolve
|
31
|
-
scheme.resolve.merge port.resolve.merge host.resolve.merge search.resolve
|
32
|
-
end
|
33
|
-
}
|
34
|
-
/
|
35
|
-
scheme host port {
|
36
|
-
def resolve
|
37
|
-
scheme.resolve.merge port.resolve.merge host.resolve
|
38
|
-
end
|
39
|
-
}
|
40
|
-
/
|
41
|
-
scheme host path search {
|
42
|
-
def resolve
|
43
|
-
scheme.resolve.merge host.resolve.merge path.resolve.merge search.resolve
|
44
|
-
end
|
45
|
-
}
|
46
|
-
/
|
47
|
-
scheme host path {
|
48
|
-
def resolve
|
49
|
-
scheme.resolve.merge host.resolve.merge path.resolve
|
50
|
-
end
|
51
|
-
}
|
52
|
-
/
|
53
|
-
scheme host search {
|
54
|
-
def resolve
|
55
|
-
scheme.resolve.merge host.resolve.merge search.resolve
|
56
|
-
end
|
57
|
-
}
|
58
|
-
/
|
59
|
-
scheme host {
|
60
|
-
def resolve
|
61
|
-
scheme.resolve.merge host.resolve
|
62
|
-
end
|
63
|
-
}
|
64
|
-
/
|
65
|
-
host port path search {
|
66
|
-
def resolve
|
67
|
-
port.resolve.merge host.resolve.merge path.resolve.merge search.resolve
|
68
|
-
end
|
69
|
-
}
|
70
|
-
/
|
71
|
-
host port path {
|
72
|
-
def resolve
|
73
|
-
port.resolve.merge host.resolve.merge path.resolve
|
74
|
-
end
|
75
|
-
}
|
76
|
-
/
|
77
|
-
host port search {
|
78
|
-
def resolve
|
79
|
-
port.resolve.merge host.resolve.merge search.resolve
|
80
|
-
end
|
81
|
-
}
|
82
|
-
/
|
83
|
-
host port {
|
84
|
-
def resolve
|
85
|
-
port.resolve.merge host.resolve
|
86
|
-
end
|
87
|
-
}
|
88
|
-
/
|
89
|
-
host path {
|
90
|
-
def resolve
|
91
|
-
host.resolve.merge path.resolve
|
92
|
-
end
|
93
|
-
}
|
94
|
-
/
|
95
|
-
host search {
|
96
|
-
def resolve
|
97
|
-
host.resolve.merge search.resolve
|
98
|
-
end
|
99
|
-
}
|
100
|
-
/
|
101
|
-
host
|
102
|
-
end
|
103
|
-
|
104
|
-
rule scheme
|
105
|
-
characters '://' {
|
106
|
-
def resolve
|
107
|
-
{ :scheme => characters.text_value }
|
108
|
-
end
|
109
|
-
}
|
110
|
-
end
|
111
|
-
|
112
|
-
rule host
|
113
|
-
subdomain:characters '.' domain:characters '.' tld:characters {
|
114
|
-
def resolve
|
115
|
-
{
|
116
|
-
:subdomain => subdomain.text_value,
|
117
|
-
:host => "#{domain.text_value}.#{tld.text_value}"
|
118
|
-
}
|
119
|
-
end
|
120
|
-
}
|
121
|
-
/
|
122
|
-
domain:characters '.' tld:characters {
|
123
|
-
def resolve
|
124
|
-
{ :host => text_value }
|
125
|
-
end
|
126
|
-
}
|
127
|
-
/
|
128
|
-
characters {
|
129
|
-
def resolve
|
130
|
-
{ :host => text_value }
|
131
|
-
end
|
132
|
-
}
|
133
|
-
end
|
134
|
-
|
135
|
-
rule port
|
136
|
-
':' port:([0-9]1..4) {
|
137
|
-
def resolve
|
138
|
-
{ :port => port.text_value }
|
139
|
-
end
|
140
|
-
}
|
141
|
-
end
|
142
|
-
|
143
|
-
rule path
|
144
|
-
'/' characters {
|
145
|
-
def resolve
|
146
|
-
{ :path => characters.text_value }
|
147
|
-
end
|
148
|
-
}
|
149
|
-
end
|
150
|
-
|
151
|
-
rule search
|
152
|
-
'?' search:( characters '=' characters ) {
|
153
|
-
def resolve
|
154
|
-
{ :search => search.text_value }
|
155
|
-
end
|
156
|
-
}
|
157
|
-
end
|
158
|
-
|
159
|
-
rule characters
|
160
|
-
[a-zA-Z0-9]+
|
161
|
-
end
|
162
|
-
end
|
163
|
-
end
|