meta_information 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -6
- data/lib/meta_information.rb +55 -52
- data/lib/meta_information/version.rb +1 -1
- data/spec/lib/meta_information_spec.rb +108 -42
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f65e782ac7eaa37bbdf77b9b44ae5424ed3172b2
|
4
|
+
data.tar.gz: a51c2c644b512bebdc08d7bf268f2e7fefd22f18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a8e67c5268d725856ea3e2c001b022892b0d2b2ca557b175a94e95920b7e0833b9d28c967b19b2e91318378973c1644fdb5153c35536471baa69e3ae6f42b26
|
7
|
+
data.tar.gz: 2057cf51280e5d455370a188b7a98bb6652d0c9452d67ea75095ec3f17fdbe198a35c2fb18c88cb9ae388bdcad4f9e3fd5bea2b5c67663898cd8552f362a3dad
|
data/README.md
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
# MetaInformation
|
2
|
+
[](https://badge.fury.io/rb/meta_information)
|
3
|
+
|
2
4
|
Simple gem for parsing meta information from websites. It scan all meta-tags by name or property attributes.
|
3
5
|
## Instalation
|
4
6
|
Add this line to your application's Gemfile:
|
@@ -21,15 +23,18 @@ pp meta
|
|
21
23
|
# :all_meta=>
|
22
24
|
# [{:type=>"name",
|
23
25
|
# :name=>"viewport",
|
26
|
+
# :property=>nil,
|
24
27
|
# :content=>"width=device-width, initial-scale=1.0"},
|
25
|
-
# {:type=>"name", :name=>"description", :content=>"some description"},
|
26
|
-
# {:type=>"name", :name=>"title", :content=>"i am title"},
|
27
|
-
# {:type=>"name", :name=>"og:title", :content=>"some content"},
|
28
|
-
# {:type=>"name", :name=>"og:description", :content=>"some description"},
|
28
|
+
# {:type=>"name", :name=>"description", :property=>nil, :content=>"some description"},
|
29
|
+
# {:type=>"name", :name=>"title", :property=>nil, :content=>"i am title"},
|
30
|
+
# {:type=>"name", :name=>"og:title", :property=>nil, :content=>"some content"},
|
31
|
+
# {:type=>"name", :name=>"og:description", :property=>nil, :content=>"some description"},
|
29
32
|
# {:type=>"name",
|
30
33
|
# :name=>"og:image",
|
31
|
-
# :
|
34
|
+
# :property=>nil,
|
35
|
+
# :content=> "https://www.awesome_site.com/assets/awesome_picture.jpg"}]},
|
36
|
+
# {:type=>"property", :name=>nil, :property=>"fb:app_id", :content=>"1234567890"},
|
32
37
|
###
|
33
38
|
```
|
34
39
|
## License
|
35
|
-
MIT License.
|
40
|
+
MIT License.
|
data/lib/meta_information.rb
CHANGED
@@ -7,68 +7,71 @@ require 'open-uri'
|
|
7
7
|
# for usage
|
8
8
|
# MetaInformation.get_meta('https://some_site.com/some_page')
|
9
9
|
module MetaInformation
|
10
|
-
|
11
|
-
def get_meta(input_url)
|
12
|
-
return not_valid_url_error unless valid_url?(input_url)
|
10
|
+
extend self
|
13
11
|
|
14
|
-
|
15
|
-
|
12
|
+
def get_meta(input_url)
|
13
|
+
return not_valid_url_error unless valid_url?(input_url)
|
16
14
|
|
17
|
-
|
18
|
-
|
19
|
-
end
|
15
|
+
document = create_document(input_url)
|
16
|
+
return nokogiri_error if document == false
|
20
17
|
|
21
|
-
|
18
|
+
meta_hash = create_meta_array(document)
|
19
|
+
success_hash.merge(all_meta: meta_hash)
|
20
|
+
end
|
22
21
|
|
23
|
-
|
24
|
-
array = []
|
25
|
-
document.css('meta').each do |node|
|
26
|
-
if !node['name'].nil?
|
27
|
-
array.push(
|
28
|
-
type: 'name',
|
29
|
-
name: node['name'],
|
30
|
-
content: node['content']
|
31
|
-
)
|
32
|
-
elsif !node['property'].nil?
|
33
|
-
array.push(
|
34
|
-
type: 'property',
|
35
|
-
property: node['property'],
|
36
|
-
content: node['content']
|
37
|
-
)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
array
|
41
|
-
end
|
22
|
+
private
|
42
23
|
|
43
|
-
|
44
|
-
|
24
|
+
def create_meta_array(document)
|
25
|
+
array = []
|
26
|
+
document.css('meta').each do |node|
|
27
|
+
array.push(
|
28
|
+
type: node_type(node),
|
29
|
+
name: node['name'],
|
30
|
+
property: node['property'],
|
31
|
+
content: node['content']
|
32
|
+
)
|
45
33
|
end
|
34
|
+
array
|
35
|
+
end
|
46
36
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
37
|
+
def node_type(node)
|
38
|
+
if !node['name'].nil?
|
39
|
+
'name'
|
40
|
+
elsif !node['property'].nil?
|
41
|
+
'property'
|
42
|
+
else
|
43
|
+
''
|
51
44
|
end
|
45
|
+
end
|
52
46
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
error: 'url is not valid'
|
57
|
-
}
|
58
|
-
end
|
47
|
+
def valid_url?(uri)
|
48
|
+
!(uri =~ URI.regexp).nil?
|
49
|
+
end
|
59
50
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
end
|
51
|
+
def create_document(input_url)
|
52
|
+
Nokogiri::HTML(open(input_url))
|
53
|
+
rescue
|
54
|
+
false
|
55
|
+
end
|
66
56
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
57
|
+
def not_valid_url_error
|
58
|
+
{
|
59
|
+
success: false,
|
60
|
+
error: 'url is not valid'
|
61
|
+
}
|
62
|
+
end
|
63
|
+
|
64
|
+
def nokogiri_error
|
65
|
+
{
|
66
|
+
success: false,
|
67
|
+
error: 'error with parsing a document'
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
def success_hash
|
72
|
+
{
|
73
|
+
succes: 'true',
|
74
|
+
error: ''
|
75
|
+
}
|
73
76
|
end
|
74
77
|
end
|
@@ -1,38 +1,68 @@
|
|
1
1
|
require './lib/meta_information'
|
2
|
+
require './spec/shared_stuff.rb'
|
3
|
+
require './spec/config.rb'
|
2
4
|
|
3
5
|
RSpec.describe 'MetaInformation' do
|
4
|
-
|
6
|
+
include_context 'shared stuff'
|
7
|
+
|
8
|
+
describe 'get_meta' do
|
9
|
+
it 'should return not_valid_url_error' do
|
10
|
+
allow(MetaInformation).to receive(:valid_url?).and_return(false)
|
11
|
+
result = MetaInformation.get_meta('http://some_url.com')
|
12
|
+
expect(result).to(
|
13
|
+
eq(
|
14
|
+
success: false,
|
15
|
+
error: 'url is not valid'
|
16
|
+
)
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'with mock for valid_url?' do
|
21
|
+
before do
|
22
|
+
allow(MetaInformation).to receive(:valid_url?).and_return(true)
|
23
|
+
end
|
24
|
+
it 'should return nokogiri_error' do
|
25
|
+
allow(MetaInformation).to receive(:create_document).and_return(false)
|
26
|
+
|
27
|
+
result = MetaInformation.get_meta('http://some_url.com')
|
28
|
+
expect(result).to(
|
29
|
+
eq(
|
30
|
+
success: false,
|
31
|
+
error: 'error with parsing a document'
|
32
|
+
)
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'should return success_hash' do
|
37
|
+
allow(MetaInformation).to(
|
38
|
+
receive(:create_document).and_return(
|
39
|
+
Nokogiri::HTML(default_html)
|
40
|
+
)
|
41
|
+
)
|
42
|
+
|
43
|
+
result = MetaInformation.get_meta('http://some_url.com')
|
44
|
+
expect(result).to(
|
45
|
+
eq(
|
46
|
+
succes: 'true',
|
47
|
+
error: '',
|
48
|
+
all_meta: default_html_meta_array
|
49
|
+
)
|
50
|
+
)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
5
55
|
describe 'create_meta_array' do
|
6
56
|
describe 'we have meta' do
|
7
57
|
it 'must create array' do
|
8
|
-
document = Nokogiri::HTML(
|
9
|
-
|
10
|
-
|
11
|
-
<meta name="title" content="some title" />
|
12
|
-
<meta property="author" content="Bob" />
|
13
|
-
<meta property="og:title" content="og_title" />
|
14
|
-
<meta property="twitter:image" content="http://some_host.com/some_path" />
|
15
|
-
<meta property="og:locale" content="ru_RU" />
|
16
|
-
<meta property="al:ios:app_store_id" content="12345678900" />
|
17
|
-
<body>
|
18
|
-
<h1>Mr. Belvedere Fan Club</h1>
|
19
|
-
</body>
|
20
|
-
</html>
|
21
|
-
')
|
22
|
-
expect(MetaInformation.send(:create_meta_array, document)).to eq([
|
23
|
-
{ type: 'name', name: 'description', content: '' },
|
24
|
-
{ type: 'name', name: 'title', content: 'some title' },
|
25
|
-
{ type: 'property', property: 'author', content: 'Bob' },
|
26
|
-
{ type: 'property', property: 'og:title', content: 'og_title' },
|
27
|
-
{ type: 'property', property: 'twitter:image', content: 'http://some_host.com/some_path' },
|
28
|
-
{ type: 'property', property: 'og:locale', content: 'ru_RU' },
|
29
|
-
{ type: 'property', property: 'al:ios:app_store_id', content: '12345678900' }
|
30
|
-
])
|
58
|
+
document = Nokogiri::HTML(default_html)
|
59
|
+
result = MetaInformation.send(:create_meta_array, document)
|
60
|
+
expect(result).to eq(default_html_meta_array)
|
31
61
|
end
|
32
62
|
end
|
33
|
-
|
34
|
-
describe 'without meta' do
|
35
|
-
it '
|
63
|
+
|
64
|
+
describe 'without meta has empty array' do
|
65
|
+
it 'if have not mate' do
|
36
66
|
first_document = Nokogiri::HTML('
|
37
67
|
<html>
|
38
68
|
<body>
|
@@ -40,31 +70,45 @@ RSpec.describe 'MetaInformation' do
|
|
40
70
|
</body>
|
41
71
|
</html>
|
42
72
|
')
|
43
|
-
second_document = Nokogiri::HTML('')
|
44
73
|
expect(MetaInformation.send(:create_meta_array, first_document)).to eq([])
|
74
|
+
end
|
75
|
+
|
76
|
+
it 'if html is empty' do
|
77
|
+
second_document = Nokogiri::HTML('')
|
45
78
|
expect(MetaInformation.send(:create_meta_array, second_document)).to eq([])
|
46
79
|
end
|
47
80
|
end
|
48
81
|
end
|
49
82
|
|
50
83
|
describe 'valid_url?' do
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
expect(MetaInformation.send(:valid_url?, 'wwwsome_site.ru')).to be_truthy
|
56
|
-
expect(MetaInformation.send(:valid_url?, 'https://somesite.com/some_page')).to be_truthy
|
57
|
-
expect(MetaInformation.send(:valid_url?, 'https://somesite.com/some_page/page')).to be_truthy
|
58
|
-
expect(MetaInformation.send(:valid_url?, 'https://somesite.com.uk/some_page')).to be_truthy
|
84
|
+
def self.validate_valid_url(url)
|
85
|
+
it "#{url} must be valid" do
|
86
|
+
expect(MetaInformation.send(:valid_url?, url)).to be_truthy
|
87
|
+
end
|
59
88
|
end
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
89
|
+
|
90
|
+
validate_valid_url('http://www.somesite.com')
|
91
|
+
validate_valid_url('https://www.somesite.com')
|
92
|
+
validate_valid_url('https://somesite.com')
|
93
|
+
validate_valid_url('http://www.siteforadmin.ru')
|
94
|
+
validate_valid_url('https://somesite.com/some_page')
|
95
|
+
validate_valid_url('https://somesite.com/some_page/page')
|
96
|
+
validate_valid_url('http://somesite.com/some_page/2012/12/page/another_page')
|
97
|
+
validate_valid_url('http://somesite.com/2012/12/page-page-page')
|
98
|
+
validate_valid_url('https://somesite.com.uk/some_page')
|
99
|
+
validate_valid_url('https://meduza.io/short/2017/03/25/v-londone-proshel-mnogotysyachnyy-marsh-protiv-brekzita-fotografiya')
|
100
|
+
|
101
|
+
def self.validate_invalid_url(url)
|
102
|
+
it "#{url} must be invalid" do
|
103
|
+
expect(MetaInformation.send(:valid_url?, url)).to be_falsey
|
104
|
+
end
|
65
105
|
end
|
106
|
+
|
107
|
+
validate_invalid_url('some_site')
|
108
|
+
validate_invalid_url('http\\:wwwsome_site.ru')
|
109
|
+
validate_invalid_url('com.some_site')
|
66
110
|
end
|
67
|
-
|
111
|
+
|
68
112
|
describe 'private hash equal' do
|
69
113
|
it 'not_valid_url_error hash' do
|
70
114
|
expect(MetaInformation.send(:not_valid_url_error)).to eq({
|
@@ -87,4 +131,26 @@ RSpec.describe 'MetaInformation' do
|
|
87
131
|
})
|
88
132
|
end
|
89
133
|
end
|
134
|
+
|
135
|
+
describe 'node_type' do
|
136
|
+
it 'must return name' do
|
137
|
+
document = Nokogiri::HTML('<meta name="description" content="" />')
|
138
|
+
node = document.css('meta').first
|
139
|
+
expect(MetaInformation.send(:node_type, node)).to eq('name')
|
140
|
+
end
|
141
|
+
|
142
|
+
it 'must return property' do
|
143
|
+
document = Nokogiri::HTML(
|
144
|
+
'<meta property="og:title" content="og_title" />'
|
145
|
+
)
|
146
|
+
node = document.css('meta').first
|
147
|
+
expect(MetaInformation.send(:node_type, node)).to eq('property')
|
148
|
+
end
|
149
|
+
|
150
|
+
it 'must return empty string' do
|
151
|
+
document = Nokogiri::HTML('<meta content="og_title" />')
|
152
|
+
node = document.css('meta').first
|
153
|
+
expect(MetaInformation.send(:node_type, node)).to eq('')
|
154
|
+
end
|
155
|
+
end
|
90
156
|
end
|