earl 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/.document +5 -0
  2. data/.gitignore +4 -15
  3. data/.rspec +1 -0
  4. data/.travis.yml +11 -0
  5. data/Gemfile +2 -2
  6. data/Gemfile.lock +60 -0
  7. data/Guardfile +10 -0
  8. data/LICENSE +2 -4
  9. data/README.rdoc +145 -0
  10. data/Rakefile +35 -2
  11. data/earl.gemspec +13 -7
  12. data/lib/earl.rb +7 -22
  13. data/lib/earl/earl.rb +158 -0
  14. data/lib/earl/scraper.rb +93 -0
  15. data/lib/earl/version.rb +2 -2
  16. data/script/console +10 -0
  17. data/spec/fixtures/bicycles.html +490 -0
  18. data/spec/fixtures/bicycles_without_description.html +489 -0
  19. data/spec/fixtures/bicycles_without_images.html +457 -0
  20. data/spec/fixtures/page_as_atom.html +161 -0
  21. data/spec/fixtures/page_as_rss.html +151 -0
  22. data/spec/fixtures/page_with_atom_feed.html +39 -0
  23. data/spec/fixtures/page_with_rss_and_atom_feeds.html +40 -0
  24. data/spec/fixtures/page_with_rss_feed.html +39 -0
  25. data/spec/fixtures/page_without_feeds.html +36 -0
  26. data/spec/fixtures/youtube.html +1839 -0
  27. data/spec/integration/feed_spec.rb +78 -0
  28. data/spec/integration/oembed_spec.rb +40 -0
  29. data/spec/spec_helper.rb +18 -28
  30. data/spec/support/fixtures.rb +10 -0
  31. data/spec/unit/earl/earl_spec.rb +16 -0
  32. data/spec/unit/earl/feed_spec.rb +59 -0
  33. data/spec/unit/earl/oembed_spec.rb +49 -0
  34. data/spec/unit/earl/scraper_spec.rb +48 -0
  35. data/spec/unit/earl_spec.rb +65 -0
  36. metadata +123 -46
  37. data/.rvmrc +0 -48
  38. data/README.md +0 -41
  39. data/lib/earl/email_assembler.rb +0 -11
  40. data/lib/earl/email_entity.rb +0 -27
  41. data/lib/earl/email_parser.tt +0 -58
  42. data/lib/earl/entity_base.rb +0 -37
  43. data/lib/earl/hash_inquirer.rb +0 -16
  44. data/lib/earl/string_inquirer.rb +0 -11
  45. data/lib/earl/url_assembler.rb +0 -15
  46. data/lib/earl/url_entity.rb +0 -23
  47. data/lib/earl/url_parser.tt +0 -163
  48. data/spec/earl/earl_spec.rb +0 -17
  49. data/spec/earl/email_entity_spec.rb +0 -31
  50. data/spec/earl/email_parser_spec.rb +0 -29
  51. data/spec/earl/entity_base_spec.rb +0 -39
  52. data/spec/earl/hash_inquirer_spec.rb +0 -24
  53. data/spec/earl/string_inquirer_spec.rb +0 -9
  54. data/spec/earl/url_entity_spec.rb +0 -45
  55. data/spec/earl/url_parser_spec.rb +0 -189
data/.rvmrc DELETED
@@ -1,48 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
- # development environment upon cd'ing into the directory
5
-
6
- # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
- # Only full ruby name is supported here, for short names use:
8
- # echo "rvm use 1.9.3" > .rvmrc
9
- environment_id="ruby-1.9.3-p125@earl"
10
-
11
- # Uncomment the following lines if you want to verify rvm version per project
12
- # rvmrc_rvm_version="1.10.3" # 1.10.1 seams as a safe start
13
- # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
- # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
- # return 1
16
- # }
17
-
18
- # First we attempt to load the desired environment directly from the environment
19
- # file. This is very fast and efficient compared to running through the entire
20
- # CLI and selector. If you want feedback on which environment was used then
21
- # insert the word 'use' after --create as this triggers verbose mode.
22
- if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
- && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
- then
25
- \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
- [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
- \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
- else
29
- # If the environment file has not yet been created, use the RVM CLI to select.
30
- rvm --create "$environment_id" || {
31
- echo "Failed to create RVM environment '${environment_id}'."
32
- return 1
33
- }
34
- fi
35
-
36
- # If you use bundler, this might be useful to you:
37
- # if [[ -s Gemfile ]] && {
38
- # ! builtin command -v bundle >/dev/null ||
39
- # builtin command -v bundle | grep $rvm_path/bin/bundle >/dev/null
40
- # }
41
- # then
42
- # printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43
- # gem install bundler
44
- # fi
45
- # if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46
- # then
47
- # bundle install | grep -vE '^Using|Your bundle is complete'
48
- # fi
data/README.md DELETED
@@ -1,41 +0,0 @@
1
- # Earl
2
-
3
- What URI wishes it could look like.
4
-
5
- ## Installation
6
-
7
- Add this line to your application's Gemfile:
8
-
9
- gem 'earl'
10
-
11
- And then execute:
12
-
13
- $ bundle
14
-
15
- Or install it yourself as:
16
-
17
- $ gem install earl
18
-
19
- ## Usage
20
-
21
- ``` rb
22
- url = Earl::URL.new 'http://www.foo.com'
23
-
24
- url.scheme # => 'http'
25
- url.scheme? # => true
26
-
27
- url.subdomain # => 'www'
28
- url.subdomain.www? # => true
29
- url.subdomain.baz? # => false
30
-
31
- url.host = 'foo.edu'
32
- url.to_s # => 'http://www.foo.edu'
33
- ```
34
-
35
- ## Contributing
36
-
37
- 1. Fork it
38
- 2. Create your feature branch (`git checkout -b my-new-feature`)
39
- 3. Commit your changes (`git commit -am 'Added some feature'`)
40
- 4. Push to the branch (`git push origin my-new-feature`)
41
- 5. Create new Pull Request
@@ -1,11 +0,0 @@
1
- module Earl
2
- class EmailAssembler
3
-
4
- def assemble( parts={} )
5
- ''.tap do |email|
6
- email << [ parts[ :username ], parts[ :domain ] ].join( '@' )
7
- email << " <#{parts[ :contact ]}>" if parts[ :contact ]
8
- end
9
- end
10
- end
11
- end
@@ -1,27 +0,0 @@
1
- require 'treetop'
2
- require 'earl/email_parser'
3
-
4
- module Earl
5
- class EmailEntity < EntityBase
6
-
7
- part_accessor :contact
8
-
9
- part_accessor :username do |value|
10
- raise InvalidURLError if value.nil?
11
- end
12
-
13
- part_accessor :domain do |value|
14
- raise InvalidURLError if value.nil?
15
- end
16
-
17
- protected
18
-
19
- def parser
20
- @parser ||= EmailParser.new
21
- end
22
-
23
- def assembler
24
- @assembler ||= EmailAssembler.new
25
- end
26
- end
27
- end
@@ -1,58 +0,0 @@
1
- module Earl
2
- grammar Email
3
-
4
- rule program
5
- whitespace v:( email ) whitespace {
6
- def resolve
7
- { }.merge v.resolve
8
- end
9
- }
10
- end
11
-
12
- rule whitespace
13
- [\s]*
14
- end
15
-
16
- rule email
17
- username '@' domain whitespace contact {
18
- def resolve
19
- username.resolve.merge domain.resolve.merge contact.resolve
20
- end
21
- }
22
- /
23
- username '@' domain {
24
- def resolve
25
- username.resolve.merge domain.resolve
26
- end
27
- }
28
- end
29
-
30
- rule username
31
- [^@\s]+ {
32
- def resolve
33
- { :username => text_value }
34
- end
35
- }
36
- end
37
-
38
- rule domain
39
- characters '.' characters {
40
- def resolve
41
- { :domain => text_value }
42
- end
43
- }
44
- end
45
-
46
- rule contact
47
- '<' [^>]+ '>' {
48
- def resolve
49
- { :contact => elements[ 1 ].text_value }
50
- end
51
- }
52
- end
53
-
54
- rule characters
55
- [a-zA-Z0-9]+
56
- end
57
- end
58
- end
@@ -1,37 +0,0 @@
1
- module Earl
2
- class EntityBase < HashInquirer
3
- def initialize( source )
4
- super parser.parse( source ).resolve rescue raise InvalidURLError
5
- end
6
-
7
- def to_s
8
- assembler.assemble self
9
- end
10
-
11
- def self.part_accessor( *parts, &block )
12
- parts.each do |part|
13
- define_method part do
14
- if self[ part ].is_a? String
15
- StringInquirer.new self[ part ]
16
- else
17
- self[ part ]
18
- end
19
- end
20
- define_method :"#{part}=" do |value|
21
- self[ part ] = value
22
- yield value if block_given?
23
- end
24
- end
25
- end
26
-
27
- protected
28
-
29
- def parser
30
- raise SubclassError
31
- end
32
-
33
- def assembler
34
- raise SubclassError
35
- end
36
- end
37
- end
@@ -1,16 +0,0 @@
1
- module Earl
2
- class HashInquirer < ::Hash
3
- def initialize( hash, &block )
4
- merge! hash
5
- super block
6
- end
7
-
8
- def method_missing( meth, *args, &block )
9
- if meth.to_s[ -1 ] == '?'
10
- self.has_key? meth.to_s[ 0..-2 ].to_sym
11
- else
12
- super
13
- end
14
- end
15
- end
16
- end
@@ -1,11 +0,0 @@
1
- module Earl
2
- class StringInquirer < ::String
3
- def method_missing( meth, *args, &block )
4
- if meth.to_s[ -1 ] == '?'
5
- meth.to_s[ 0..-2 ] == to_s
6
- else
7
- super
8
- end
9
- end
10
- end
11
- end
@@ -1,15 +0,0 @@
1
- module Earl
2
- class URLAssembler
3
-
4
- def assemble( parts={} )
5
- ''.tap do |url|
6
- url << ( parts[ :scheme ] + '://' ) if parts[ :scheme ]
7
- url << ( parts[ :subdomain ] + '.' ) if parts[ :subdomain ]
8
- url << ( parts[ :host ] ) if parts[ :host ]
9
- url << ( ':' + parts[ :port ].to_s ) if parts[ :port ]
10
- url << ( '/' + parts[ :path ] ) if parts[ :path ]
11
- url << ( '?' + parts[ :search ] ) if parts[ :search ]
12
- end
13
- end
14
- end
15
- end
@@ -1,23 +0,0 @@
1
- require 'treetop'
2
- require 'earl/url_parser'
3
-
4
- module Earl
5
- class URLEntity < EntityBase
6
-
7
- part_accessor :scheme, :subdomain, :port, :path, :search
8
-
9
- part_accessor :host do |value|
10
- raise InvalidURLError if value.nil?
11
- end
12
-
13
- protected
14
-
15
- def parser
16
- @parser ||= URLParser.new
17
- end
18
-
19
- def assembler
20
- @assembler ||= URLAssembler.new
21
- end
22
- end
23
- end
@@ -1,163 +0,0 @@
1
- module Earl
2
- grammar URL
3
-
4
- rule program
5
- whitespace v:( url ) whitespace {
6
- def resolve
7
- { }.merge v.resolve
8
- end
9
- }
10
- end
11
-
12
- rule whitespace
13
- [\s]*
14
- end
15
-
16
- rule url
17
- scheme host port path search {
18
- def resolve
19
- scheme.resolve.merge port.resolve.merge host.resolve.merge path.resolve.merge search.resolve
20
- end
21
- }
22
- /
23
- scheme host port path {
24
- def resolve
25
- scheme.resolve.merge port.resolve.merge host.resolve.merge path.resolve
26
- end
27
- }
28
- /
29
- scheme host port search {
30
- def resolve
31
- scheme.resolve.merge port.resolve.merge host.resolve.merge search.resolve
32
- end
33
- }
34
- /
35
- scheme host port {
36
- def resolve
37
- scheme.resolve.merge port.resolve.merge host.resolve
38
- end
39
- }
40
- /
41
- scheme host path search {
42
- def resolve
43
- scheme.resolve.merge host.resolve.merge path.resolve.merge search.resolve
44
- end
45
- }
46
- /
47
- scheme host path {
48
- def resolve
49
- scheme.resolve.merge host.resolve.merge path.resolve
50
- end
51
- }
52
- /
53
- scheme host search {
54
- def resolve
55
- scheme.resolve.merge host.resolve.merge search.resolve
56
- end
57
- }
58
- /
59
- scheme host {
60
- def resolve
61
- scheme.resolve.merge host.resolve
62
- end
63
- }
64
- /
65
- host port path search {
66
- def resolve
67
- port.resolve.merge host.resolve.merge path.resolve.merge search.resolve
68
- end
69
- }
70
- /
71
- host port path {
72
- def resolve
73
- port.resolve.merge host.resolve.merge path.resolve
74
- end
75
- }
76
- /
77
- host port search {
78
- def resolve
79
- port.resolve.merge host.resolve.merge search.resolve
80
- end
81
- }
82
- /
83
- host port {
84
- def resolve
85
- port.resolve.merge host.resolve
86
- end
87
- }
88
- /
89
- host path {
90
- def resolve
91
- host.resolve.merge path.resolve
92
- end
93
- }
94
- /
95
- host search {
96
- def resolve
97
- host.resolve.merge search.resolve
98
- end
99
- }
100
- /
101
- host
102
- end
103
-
104
- rule scheme
105
- characters '://' {
106
- def resolve
107
- { :scheme => characters.text_value }
108
- end
109
- }
110
- end
111
-
112
- rule host
113
- subdomain:characters '.' domain:characters '.' tld:characters {
114
- def resolve
115
- {
116
- :subdomain => subdomain.text_value,
117
- :host => "#{domain.text_value}.#{tld.text_value}"
118
- }
119
- end
120
- }
121
- /
122
- domain:characters '.' tld:characters {
123
- def resolve
124
- { :host => text_value }
125
- end
126
- }
127
- /
128
- characters {
129
- def resolve
130
- { :host => text_value }
131
- end
132
- }
133
- end
134
-
135
- rule port
136
- ':' port:([0-9]1..4) {
137
- def resolve
138
- { :port => port.text_value }
139
- end
140
- }
141
- end
142
-
143
- rule path
144
- '/' characters {
145
- def resolve
146
- { :path => characters.text_value }
147
- end
148
- }
149
- end
150
-
151
- rule search
152
- '?' search:( characters '=' characters ) {
153
- def resolve
154
- { :search => search.text_value }
155
- end
156
- }
157
- end
158
-
159
- rule characters
160
- [a-zA-Z0-9]+
161
- end
162
- end
163
- end