meta_information 1.0.4 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -0
- data/Gemfile +9 -2
- data/README.md +10 -0
- data/ROADMAP.md +3 -0
- data/lib/meta_information.rb +41 -18
- data/lib/meta_information/version.rb +3 -1
- data/meta_information.gemspec +14 -6
- data/spec/config.rb +3 -1
- data/spec/lib/meta_information_spec.rb +22 -13
- data/spec/shared_stuff.rb +2 -0
- metadata +50 -14
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a986aa80d954fde7cd758f23b62fa4bc6c6ef564ab06a86b55f8b4980739812e
|
4
|
+
data.tar.gz: f2d878d8d1c57665677608e71e6d5f75dcfccf256ea4b625750695bab8740ca1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00ad8369eb9559cb958b4fd2443c5e09d42a2c03d0ed1d4425ca7247f02d2abc90b66ecb4884a6d11faed122d74a812dbb87e36a273c2c0e8bad63f8dfbd1c87
|
7
|
+
data.tar.gz: 32798218b16280ca2cc0e7f58659d04f8b91853935a00646dade439b85e927305cc2a2f4d8e1bc8d5c0ff72cebfc22c29b45a1d18c753b70102ee13db6ec0d54
|
data/CHANGELOG.md
ADDED
data/Gemfile
CHANGED
@@ -1,5 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
source 'https://rubygems.org'
|
2
|
-
ruby ['>= 2.1.0', '<= 2.
|
4
|
+
ruby ['>= 2.1.0', '<= 2.7.0']
|
3
5
|
|
4
6
|
gem 'nokogiri'
|
5
|
-
|
7
|
+
|
8
|
+
group :development, :test do
|
9
|
+
gem 'pry'
|
10
|
+
gem 'rspec'
|
11
|
+
gem 'rubocop'
|
12
|
+
end
|
data/README.md
CHANGED
@@ -1,18 +1,26 @@
|
|
1
1
|
# MetaInformation
|
2
|
+
|
2
3
|
[![Gem Version](https://badge.fury.io/rb/meta_information.svg)](https://badge.fury.io/rb/meta_information)
|
3
4
|
|
4
5
|
Simple gem for parsing meta information from websites. It scan all meta-tags by name or property attributes.
|
6
|
+
|
5
7
|
## Instalation
|
8
|
+
|
6
9
|
Add this line to your application's Gemfile:
|
10
|
+
|
7
11
|
```ruby
|
8
12
|
gem 'meta_information'
|
9
13
|
```
|
14
|
+
|
10
15
|
Then run `bundle install`
|
11
16
|
Or install it yourself as:
|
17
|
+
|
12
18
|
```sh
|
13
19
|
gem install meta_information
|
14
20
|
```
|
21
|
+
|
15
22
|
## Usage
|
23
|
+
|
16
24
|
```ruby
|
17
25
|
require 'pp'
|
18
26
|
meta = MetaInformation.get_meta('https://www.awesome_site.com/awesome_page')
|
@@ -36,5 +44,7 @@ pp meta
|
|
36
44
|
# {:type=>"property", :name=>nil, :property=>"fb:app_id", :content=>"1234567890"},
|
37
45
|
###
|
38
46
|
```
|
47
|
+
|
39
48
|
## License
|
49
|
+
|
40
50
|
MIT License.
|
data/ROADMAP.md
ADDED
data/lib/meta_information.rb
CHANGED
@@ -1,16 +1,18 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'meta_information/version'
|
2
4
|
require 'nokogiri'
|
3
|
-
require '
|
5
|
+
require 'uri'
|
6
|
+
require 'net/http'
|
4
7
|
|
5
|
-
# MetaInformation - module for scaning meta information
|
6
|
-
# form web page
|
7
|
-
# for usage
|
8
|
+
# MetaInformation - module for scaning meta information from web page
|
8
9
|
# MetaInformation.get_meta('https://some_site.com/some_page')
|
9
10
|
module MetaInformation
|
10
11
|
extend self
|
11
12
|
|
12
13
|
def get_meta(input_url)
|
13
14
|
return not_valid_url_error unless valid_url?(input_url)
|
15
|
+
return not_valid_url_scheme unless valid_url_scheme?(input_url)
|
14
16
|
|
15
17
|
document = create_document(input_url)
|
16
18
|
return nokogiri_error if document == false
|
@@ -21,16 +23,19 @@ module MetaInformation
|
|
21
23
|
|
22
24
|
private
|
23
25
|
|
26
|
+
# TODO: change to struct
|
24
27
|
def create_meta_array(document)
|
25
|
-
document
|
26
|
-
{
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
28
|
+
document
|
29
|
+
.css('meta').reject { |node| node_type(node).nil? }
|
30
|
+
.map do |node|
|
31
|
+
{
|
32
|
+
type: node_type(node),
|
33
|
+
name: node['name'],
|
34
|
+
property: node['property'],
|
35
|
+
content: node['content'],
|
36
|
+
itemprop: node['itemprop']
|
37
|
+
}
|
38
|
+
end
|
34
39
|
end
|
35
40
|
|
36
41
|
def node_type(node)
|
@@ -41,17 +46,28 @@ module MetaInformation
|
|
41
46
|
elsif !node['itemprop'].nil?
|
42
47
|
'itemprop'
|
43
48
|
else
|
44
|
-
|
49
|
+
nil
|
45
50
|
end
|
46
51
|
end
|
47
52
|
|
48
53
|
def valid_url?(uri)
|
49
|
-
!(uri =~ URI.
|
54
|
+
!(uri =~ URI::DEFAULT_PARSER.make_regexp).nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
def valid_url_scheme?(input_url)
|
58
|
+
URI(input_url).is_a?(URI::HTTP)
|
50
59
|
end
|
51
60
|
|
52
61
|
def create_document(input_url)
|
53
|
-
|
54
|
-
|
62
|
+
uri = URI(input_url)
|
63
|
+
res = Net::HTTP.get_response(uri)
|
64
|
+
|
65
|
+
raise 'Response code is not 2xx' if !(res.code.to_i >= 200 && res.code.to_i <= 299)
|
66
|
+
raise 'Response is without body' unless res.class.body_permitted?
|
67
|
+
|
68
|
+
Nokogiri::HTML(res.body)
|
69
|
+
rescue StandardError => e
|
70
|
+
puts e
|
55
71
|
false
|
56
72
|
end
|
57
73
|
|
@@ -62,6 +78,13 @@ module MetaInformation
|
|
62
78
|
}
|
63
79
|
end
|
64
80
|
|
81
|
+
def not_valid_url_scheme
|
82
|
+
{
|
83
|
+
success: false,
|
84
|
+
error: 'url must be http(s)'
|
85
|
+
}
|
86
|
+
end
|
87
|
+
|
65
88
|
def nokogiri_error
|
66
89
|
{
|
67
90
|
success: false,
|
data/meta_information.gemspec
CHANGED
@@ -1,17 +1,25 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
|
+
require 'meta_information/version'
|
3
5
|
|
4
6
|
Gem::Specification.new do |s|
|
7
|
+
s.required_ruby_version = '>= 2.1.0'
|
5
8
|
s.name = 'meta_information'
|
6
9
|
s.version = MetaInformation::VERSION
|
7
|
-
s.date = '
|
10
|
+
s.date = '2021-02-12'
|
8
11
|
s.summary = 'MetaInformation - Simple gem for parsing meta information'
|
9
|
-
s.description = 'Simple gem for parsing meta information from websites. It
|
12
|
+
s.description = 'Simple gem for parsing meta information from websites. It scans all meta-tags by name, itemprop or property attributes.'
|
10
13
|
s.author = 'Vladislav Kopylov'
|
11
14
|
s.email = 'kopylov.vlad@gmail.com'
|
12
|
-
s.files = `git ls-files`.split("\
|
15
|
+
s.files = `git ls-files -z`.split("\x0").reject { |f| f =~ /^bin/ }
|
16
|
+
s.executables = []
|
13
17
|
s.homepage = 'https://github.com/kopylovvlad/meta_information'
|
14
18
|
s.license = 'MIT'
|
15
19
|
|
16
|
-
s.add_dependency('nokogiri'
|
20
|
+
s.add_dependency('nokogiri')
|
21
|
+
|
22
|
+
s.add_development_dependency('pry')
|
23
|
+
s.add_development_dependency('rspec')
|
24
|
+
s.add_development_dependency('rubocop')
|
17
25
|
end
|
data/spec/config.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require './lib/meta_information'
|
2
|
-
require './spec/shared_stuff
|
3
|
-
require './spec/config
|
4
|
+
require './spec/shared_stuff'
|
5
|
+
require './spec/config'
|
4
6
|
|
5
7
|
RSpec.describe 'MetaInformation' do
|
6
8
|
include_context 'shared stuff'
|
@@ -17,6 +19,13 @@ RSpec.describe 'MetaInformation' do
|
|
17
19
|
)
|
18
20
|
end
|
19
21
|
|
22
|
+
it 'should return not_valid_url_scheme_error' do
|
23
|
+
result = MetaInformation.get_meta('ftp://some_url.com')
|
24
|
+
expect(result).to(
|
25
|
+
eq(success: false, error: 'url must be http(s)')
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
20
29
|
describe 'with mock for valid_url?' do
|
21
30
|
before do
|
22
31
|
allow(MetaInformation).to receive(:valid_url?).and_return(true)
|
@@ -112,23 +121,23 @@ RSpec.describe 'MetaInformation' do
|
|
112
121
|
describe 'private hash equal' do
|
113
122
|
it 'not_valid_url_error hash' do
|
114
123
|
expect(MetaInformation.send(:not_valid_url_error)).to eq({
|
115
|
-
|
116
|
-
|
117
|
-
|
124
|
+
success: false,
|
125
|
+
error: 'url is not valid'
|
126
|
+
})
|
118
127
|
end
|
119
128
|
|
120
129
|
it 'nokogiri_error hash' do
|
121
130
|
expect(MetaInformation.send(:nokogiri_error)).to eq({
|
122
|
-
|
123
|
-
|
124
|
-
|
131
|
+
success: false,
|
132
|
+
error: 'error with parsing a document'
|
133
|
+
})
|
125
134
|
end
|
126
135
|
|
127
136
|
it 'success_hash hash' do
|
128
137
|
expect(MetaInformation.send(:success_hash)).to eq({
|
129
|
-
|
130
|
-
|
131
|
-
|
138
|
+
succes: 'true',
|
139
|
+
error: ''
|
140
|
+
})
|
132
141
|
end
|
133
142
|
end
|
134
143
|
|
@@ -155,10 +164,10 @@ RSpec.describe 'MetaInformation' do
|
|
155
164
|
expect(MetaInformation.send(:node_type, node)).to eq('itemprop')
|
156
165
|
end
|
157
166
|
|
158
|
-
it 'must return
|
167
|
+
it 'must return nil' do
|
159
168
|
document = Nokogiri::HTML('<meta content="og_title" />')
|
160
169
|
node = document.css('meta').first
|
161
|
-
expect(MetaInformation.send(:node_type, node)).to eq(
|
170
|
+
expect(MetaInformation.send(:node_type, node)).to eq(nil)
|
162
171
|
end
|
163
172
|
end
|
164
173
|
end
|
data/spec/shared_stuff.rb
CHANGED
metadata
CHANGED
@@ -1,36 +1,72 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: meta_information
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vladislav Kopylov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.7'
|
20
17
|
- - ">="
|
21
18
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
19
|
+
version: '0'
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
26
23
|
requirements:
|
27
|
-
- - "
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pry
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
28
39
|
- !ruby/object:Gem::Version
|
29
|
-
version: '
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
30
45
|
- - ">="
|
31
46
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
-
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Simple gem for parsing meta information from websites. It scans all meta-tags
|
34
70
|
by name, itemprop or property attributes.
|
35
71
|
email: kopylov.vlad@gmail.com
|
36
72
|
executables: []
|
@@ -39,10 +75,11 @@ extra_rdoc_files: []
|
|
39
75
|
files:
|
40
76
|
- ".gitignore"
|
41
77
|
- ".rspec"
|
42
|
-
-
|
78
|
+
- CHANGELOG.md
|
43
79
|
- Gemfile
|
44
80
|
- LICENSE.txt
|
45
81
|
- README.md
|
82
|
+
- ROADMAP.md
|
46
83
|
- lib/meta_information.rb
|
47
84
|
- lib/meta_information/version.rb
|
48
85
|
- meta_information.gemspec
|
@@ -61,15 +98,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
61
98
|
requirements:
|
62
99
|
- - ">="
|
63
100
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
101
|
+
version: 2.1.0
|
65
102
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
103
|
requirements:
|
67
104
|
- - ">="
|
68
105
|
- !ruby/object:Gem::Version
|
69
106
|
version: '0'
|
70
107
|
requirements: []
|
71
|
-
|
72
|
-
rubygems_version: 2.6.10
|
108
|
+
rubygems_version: 3.1.2
|
73
109
|
signing_key:
|
74
110
|
specification_version: 4
|
75
111
|
summary: MetaInformation - Simple gem for parsing meta information
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
ruby-2.3.3
|