website_information 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1f3b31146d3da07c6be7952cbe78e6121da9e86
|
4
|
+
data.tar.gz: b165e9c3c14503ca4efc03ead798179324b306d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bdee9b5cd2efe9da8b0e011e6a13024ab0ccf1fb68fb08d7e55d3eb39e98dc956209f2d20586e9952c505d97de85320d00c8d68ec1b24113feba8d1c76002919
|
7
|
+
data.tar.gz: 32811dc8836e78dd65b175f14adf7338d0d81e8256ea970d923405f2d90b46104f86f035e6729f70fa59bee7151bafaa85995934267c5fd4c5cc4cfca2e8ce24
|
data/README.md
CHANGED
@@ -34,9 +34,26 @@ Please install web driver if you want to use site capture.
|
|
34
34
|
scraped_params = WebsiteInformation::Website.new('scraping_url(ex. https://www.google.com)').scraped_params
|
35
35
|
|
36
36
|
# ex) get website title
|
37
|
-
scraped_params.title
|
37
|
+
scraped_params.title # => Google
|
38
|
+
|
38
39
|
```
|
39
40
|
|
41
|
+
### Output
|
42
|
+
The list of allowed get `params` values.
|
43
|
+
|
44
|
+
* ```.title``` — ```<title>``` tag value
|
45
|
+
* ```.meta.description``` — ```<meta name="description" >``` tag content
|
46
|
+
* ```.meta.keyword``` — ```<meta name="keyword" >``` tag content
|
47
|
+
* ```.og.site_name``` — ```<meta property="og:site_name" >``` tag content
|
48
|
+
* ```.og.description``` — ```<meta property="og:description" >``` tag content
|
49
|
+
* ```.og.url``` — ```<meta property="og:url" >``` tag content
|
50
|
+
* ```.og.type``` — ```<meta property="og:type" >``` tag content
|
51
|
+
* ```.og.image``` — ```<meta property="og:image" >``` tag content
|
52
|
+
* ```.feed``` — ```<link rel="alternate" >``` tag href application/atom+xml or application/rss+xml type
|
53
|
+
* ```.sns.facebook``` — Facebook page plugin (https://developers.facebook.com/docs/plugins/page-plugin/) from data-href
|
54
|
+
* ```.sns.twitter``` — Twitter embedded timelines (https://dev.twitter.com/web/embedded-timelines/list) from href
|
55
|
+
|
56
|
+
|
40
57
|
## Contributing
|
41
58
|
|
42
59
|
Bug reports and pull requests are welcome on GitHub at https://github.com/miraoto/website_information. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'watir'
|
2
|
+
|
1
3
|
module WebsiteInformation
|
2
4
|
module Params
|
3
5
|
class Site
|
@@ -7,6 +9,7 @@ module WebsiteInformation
|
|
7
9
|
@url = url
|
8
10
|
@meta = WebsiteInformation::Params::Meta.new
|
9
11
|
@og = WebsiteInformation::Params::Og.new
|
12
|
+
@sns = WebsiteInformation::Params::Sns.new
|
10
13
|
end
|
11
14
|
|
12
15
|
def meta
|
@@ -17,6 +20,10 @@ module WebsiteInformation
|
|
17
20
|
@og
|
18
21
|
end
|
19
22
|
|
23
|
+
def sns
|
24
|
+
@sns
|
25
|
+
end
|
26
|
+
|
20
27
|
def capture
|
21
28
|
if @capture.nil?
|
22
29
|
browser = Watir::Browser.new
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require 'watir'
|
3
2
|
|
4
3
|
module WebsiteInformation
|
5
4
|
class Website
|
@@ -26,11 +25,19 @@ module WebsiteInformation
|
|
26
25
|
@params.og.type = doc.css('//meta[property="og:type"]/@content').to_s
|
27
26
|
@params.og.image = doc.css('//meta[property="og:image"]/@content').to_s
|
28
27
|
feed(doc)
|
28
|
+
sns(doc)
|
29
29
|
end
|
30
30
|
|
31
31
|
def feed(doc)
|
32
32
|
@params.feed = doc.css('//link[@rel="alternate"][@type="application/atom+xml"]/@href')[0].to_s
|
33
33
|
@params.feed = doc.css('//link[@rel="alternate"][@type="application/rss+xml"]/@href')[0].to_s if @params.feed.empty?
|
34
34
|
end
|
35
|
+
|
36
|
+
def sns(doc)
|
37
|
+
# scrape facebook url from page plugin (https://developers.facebook.com/docs/plugins/page-plugin/)
|
38
|
+
@params.sns.facebook = doc.css('//div[class$="fb-page"]/@data-href')[0].to_s
|
39
|
+
# scrape twitter url from embedded timelines (https://dev.twitter.com/web/embedded-timelines/list)
|
40
|
+
@params.sns.twitter = doc.css('//a[class$="twitter-timeline"]/@href')[0].to_s
|
41
|
+
end
|
35
42
|
end
|
36
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: website_information
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- miraoto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- lib/website_information/params/meta.rb
|
118
118
|
- lib/website_information/params/og.rb
|
119
119
|
- lib/website_information/params/site.rb
|
120
|
+
- lib/website_information/params/sns.rb
|
120
121
|
- lib/website_information/version.rb
|
121
122
|
- lib/website_information/website.rb
|
122
123
|
- website_information.gemspec
|