metainspector 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +2 -1
- data/lib/meta_inspector/scraper.rb +8 -0
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/metainspector_spec.rb +5 -0
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -32,6 +32,8 @@ Then you can see the scraped data like this:
|
|
32
32
|
page.meta_description # meta description, as string
|
33
33
|
page.meta_keywords # meta keywords, as string
|
34
34
|
page.image # Most relevant image, if defined with og:image
|
35
|
+
page.images # array of strings, with every img found on the page
|
36
|
+
page.absolute_images # array of all the images converted to absolute urls
|
35
37
|
page.feed # Get rss or atom links in meta data fields as array
|
36
38
|
page.meta_og_title # opengraph title
|
37
39
|
page.meta_og_image # opengraph image
|
@@ -101,7 +103,6 @@ You're welcome to fork this project and send pull requests. I want to thank spec
|
|
101
103
|
|
102
104
|
* Get page.base_dir from the URL
|
103
105
|
* Distinguish between external and internal links, returning page.links for all of them as found, page.external_links and page.internal_links converted to absolute URLs
|
104
|
-
* Return array of images in page as absolute URLs
|
105
106
|
* Be able to set a timeout in seconds
|
106
107
|
* If keywords seem to be separated by blank spaces, replace them with commas
|
107
108
|
* Mocks
|
@@ -30,11 +30,19 @@ module MetaInspector
|
|
30
30
|
.map {|link| link.attributes["href"] \
|
31
31
|
.to_s.strip}.uniq) rescue nil
|
32
32
|
end
|
33
|
+
|
34
|
+
def images
|
35
|
+
@data.images ||= parsed_document.search('//img').map{ |i| i.attributes['src'].value }.uniq
|
36
|
+
end
|
33
37
|
|
34
38
|
# Returns the links converted to absolute urls
|
35
39
|
def absolute_links
|
36
40
|
@data.absolute_links ||= links.map { |l| absolutify_url(l) }
|
37
41
|
end
|
42
|
+
|
43
|
+
def absolute_images
|
44
|
+
@data.absolute_images ||= images.map{ |i| absolutify_url(i) }
|
45
|
+
end
|
38
46
|
|
39
47
|
# Returns the parsed document meta rss links
|
40
48
|
def feed
|
data/spec/metainspector_spec.rb
CHANGED
@@ -45,6 +45,11 @@ describe MetaInspector do
|
|
45
45
|
@m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
46
46
|
@m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
|
47
47
|
end
|
48
|
+
|
49
|
+
it "should find all page images" do
|
50
|
+
@m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
|
51
|
+
@m.images == ["/images/pagerank_alert.png?1309512337"]
|
52
|
+
end
|
48
53
|
|
49
54
|
it "should have a Nokogiri::HTML::Document as parsed_document" do
|
50
55
|
@m.parsed_document.class.should == Nokogiri::HTML::Document
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
7
|
+
- 7
|
8
8
|
- 0
|
9
|
-
version: 1.
|
9
|
+
version: 1.7.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jaime Iniesta
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-07-05 00:00:00 +02:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|