meta_information 1.0.4 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -0
- data/Gemfile +9 -2
- data/README.md +10 -0
- data/ROADMAP.md +3 -0
- data/lib/meta_information.rb +41 -18
- data/lib/meta_information/version.rb +3 -1
- data/meta_information.gemspec +14 -6
- data/spec/config.rb +3 -1
- data/spec/lib/meta_information_spec.rb +22 -13
- data/spec/shared_stuff.rb +2 -0
- metadata +50 -14
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a986aa80d954fde7cd758f23b62fa4bc6c6ef564ab06a86b55f8b4980739812e
|
4
|
+
data.tar.gz: f2d878d8d1c57665677608e71e6d5f75dcfccf256ea4b625750695bab8740ca1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00ad8369eb9559cb958b4fd2443c5e09d42a2c03d0ed1d4425ca7247f02d2abc90b66ecb4884a6d11faed122d74a812dbb87e36a273c2c0e8bad63f8dfbd1c87
|
7
|
+
data.tar.gz: 32798218b16280ca2cc0e7f58659d04f8b91853935a00646dade439b85e927305cc2a2f4d8e1bc8d5c0ff72cebfc22c29b45a1d18c753b70102ee13db6ec0d54
|
data/CHANGELOG.md
ADDED
data/Gemfile
CHANGED
@@ -1,5 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
source 'https://rubygems.org'
|
2
|
-
ruby ['>= 2.1.0', '<= 2.
|
4
|
+
ruby ['>= 2.1.0', '<= 2.7.0']
|
3
5
|
|
4
6
|
gem 'nokogiri'
|
5
|
-
|
7
|
+
|
8
|
+
group :development, :test do
|
9
|
+
gem 'pry'
|
10
|
+
gem 'rspec'
|
11
|
+
gem 'rubocop'
|
12
|
+
end
|
data/README.md
CHANGED
@@ -1,18 +1,26 @@
|
|
1
1
|
# MetaInformation
|
2
|
+
|
2
3
|
[](https://badge.fury.io/rb/meta_information)
|
3
4
|
|
4
5
|
Simple gem for parsing meta information from websites. It scan all meta-tags by name or property attributes.
|
6
|
+
|
5
7
|
## Instalation
|
8
|
+
|
6
9
|
Add this line to your application's Gemfile:
|
10
|
+
|
7
11
|
```ruby
|
8
12
|
gem 'meta_information'
|
9
13
|
```
|
14
|
+
|
10
15
|
Then run `bundle install`
|
11
16
|
Or install it yourself as:
|
17
|
+
|
12
18
|
```sh
|
13
19
|
gem install meta_information
|
14
20
|
```
|
21
|
+
|
15
22
|
## Usage
|
23
|
+
|
16
24
|
```ruby
|
17
25
|
require 'pp'
|
18
26
|
meta = MetaInformation.get_meta('https://www.awesome_site.com/awesome_page')
|
@@ -36,5 +44,7 @@ pp meta
|
|
36
44
|
# {:type=>"property", :name=>nil, :property=>"fb:app_id", :content=>"1234567890"},
|
37
45
|
###
|
38
46
|
```
|
47
|
+
|
39
48
|
## License
|
49
|
+
|
40
50
|
MIT License.
|
data/ROADMAP.md
ADDED
data/lib/meta_information.rb
CHANGED
@@ -1,16 +1,18 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'meta_information/version'
|
2
4
|
require 'nokogiri'
|
3
|
-
require '
|
5
|
+
require 'uri'
|
6
|
+
require 'net/http'
|
4
7
|
|
5
|
-
# MetaInformation - module for scaning meta information
|
6
|
-
# form web page
|
7
|
-
# for usage
|
8
|
+
# MetaInformation - module for scaning meta information from web page
|
8
9
|
# MetaInformation.get_meta('https://some_site.com/some_page')
|
9
10
|
module MetaInformation
|
10
11
|
extend self
|
11
12
|
|
12
13
|
def get_meta(input_url)
|
13
14
|
return not_valid_url_error unless valid_url?(input_url)
|
15
|
+
return not_valid_url_scheme unless valid_url_scheme?(input_url)
|
14
16
|
|
15
17
|
document = create_document(input_url)
|
16
18
|
return nokogiri_error if document == false
|
@@ -21,16 +23,19 @@ module MetaInformation
|
|
21
23
|
|
22
24
|
private
|
23
25
|
|
26
|
+
# TODO: change to struct
|
24
27
|
def create_meta_array(document)
|
25
|
-
document
|
26
|
-
{
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
28
|
+
document
|
29
|
+
.css('meta').reject { |node| node_type(node).nil? }
|
30
|
+
.map do |node|
|
31
|
+
{
|
32
|
+
type: node_type(node),
|
33
|
+
name: node['name'],
|
34
|
+
property: node['property'],
|
35
|
+
content: node['content'],
|
36
|
+
itemprop: node['itemprop']
|
37
|
+
}
|
38
|
+
end
|
34
39
|
end
|
35
40
|
|
36
41
|
def node_type(node)
|
@@ -41,17 +46,28 @@ module MetaInformation
|
|
41
46
|
elsif !node['itemprop'].nil?
|
42
47
|
'itemprop'
|
43
48
|
else
|
44
|
-
|
49
|
+
nil
|
45
50
|
end
|
46
51
|
end
|
47
52
|
|
48
53
|
def valid_url?(uri)
|
49
|
-
!(uri =~ URI.
|
54
|
+
!(uri =~ URI::DEFAULT_PARSER.make_regexp).nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
def valid_url_scheme?(input_url)
|
58
|
+
URI(input_url).is_a?(URI::HTTP)
|
50
59
|
end
|
51
60
|
|
52
61
|
def create_document(input_url)
|
53
|
-
|
54
|
-
|
62
|
+
uri = URI(input_url)
|
63
|
+
res = Net::HTTP.get_response(uri)
|
64
|
+
|
65
|
+
raise 'Response code is not 2xx' if !(res.code.to_i >= 200 && res.code.to_i <= 299)
|
66
|
+
raise 'Response is without body' unless res.class.body_permitted?
|
67
|
+
|
68
|
+
Nokogiri::HTML(res.body)
|
69
|
+
rescue StandardError => e
|
70
|
+
puts e
|
55
71
|
false
|
56
72
|
end
|
57
73
|
|
@@ -62,6 +78,13 @@ module MetaInformation
|
|
62
78
|
}
|
63
79
|
end
|
64
80
|
|
81
|
+
def not_valid_url_scheme
|
82
|
+
{
|
83
|
+
success: false,
|
84
|
+
error: 'url must be http(s)'
|
85
|
+
}
|
86
|
+
end
|
87
|
+
|
65
88
|
def nokogiri_error
|
66
89
|
{
|
67
90
|
success: false,
|
data/meta_information.gemspec
CHANGED
@@ -1,17 +1,25 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
|
+
require 'meta_information/version'
|
3
5
|
|
4
6
|
Gem::Specification.new do |s|
|
7
|
+
s.required_ruby_version = '>= 2.1.0'
|
5
8
|
s.name = 'meta_information'
|
6
9
|
s.version = MetaInformation::VERSION
|
7
|
-
s.date = '
|
10
|
+
s.date = '2021-02-12'
|
8
11
|
s.summary = 'MetaInformation - Simple gem for parsing meta information'
|
9
|
-
s.description = 'Simple gem for parsing meta information from websites. It
|
12
|
+
s.description = 'Simple gem for parsing meta information from websites. It scans all meta-tags by name, itemprop or property attributes.'
|
10
13
|
s.author = 'Vladislav Kopylov'
|
11
14
|
s.email = 'kopylov.vlad@gmail.com'
|
12
|
-
s.files = `git ls-files`.split("\
|
15
|
+
s.files = `git ls-files -z`.split("\x0").reject { |f| f =~ /^bin/ }
|
16
|
+
s.executables = []
|
13
17
|
s.homepage = 'https://github.com/kopylovvlad/meta_information'
|
14
18
|
s.license = 'MIT'
|
15
19
|
|
16
|
-
s.add_dependency('nokogiri'
|
20
|
+
s.add_dependency('nokogiri')
|
21
|
+
|
22
|
+
s.add_development_dependency('pry')
|
23
|
+
s.add_development_dependency('rspec')
|
24
|
+
s.add_development_dependency('rubocop')
|
17
25
|
end
|
data/spec/config.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require './lib/meta_information'
|
2
|
-
require './spec/shared_stuff
|
3
|
-
require './spec/config
|
4
|
+
require './spec/shared_stuff'
|
5
|
+
require './spec/config'
|
4
6
|
|
5
7
|
RSpec.describe 'MetaInformation' do
|
6
8
|
include_context 'shared stuff'
|
@@ -17,6 +19,13 @@ RSpec.describe 'MetaInformation' do
|
|
17
19
|
)
|
18
20
|
end
|
19
21
|
|
22
|
+
it 'should return not_valid_url_scheme_error' do
|
23
|
+
result = MetaInformation.get_meta('ftp://some_url.com')
|
24
|
+
expect(result).to(
|
25
|
+
eq(success: false, error: 'url must be http(s)')
|
26
|
+
)
|
27
|
+
end
|
28
|
+
|
20
29
|
describe 'with mock for valid_url?' do
|
21
30
|
before do
|
22
31
|
allow(MetaInformation).to receive(:valid_url?).and_return(true)
|
@@ -112,23 +121,23 @@ RSpec.describe 'MetaInformation' do
|
|
112
121
|
describe 'private hash equal' do
|
113
122
|
it 'not_valid_url_error hash' do
|
114
123
|
expect(MetaInformation.send(:not_valid_url_error)).to eq({
|
115
|
-
|
116
|
-
|
117
|
-
|
124
|
+
success: false,
|
125
|
+
error: 'url is not valid'
|
126
|
+
})
|
118
127
|
end
|
119
128
|
|
120
129
|
it 'nokogiri_error hash' do
|
121
130
|
expect(MetaInformation.send(:nokogiri_error)).to eq({
|
122
|
-
|
123
|
-
|
124
|
-
|
131
|
+
success: false,
|
132
|
+
error: 'error with parsing a document'
|
133
|
+
})
|
125
134
|
end
|
126
135
|
|
127
136
|
it 'success_hash hash' do
|
128
137
|
expect(MetaInformation.send(:success_hash)).to eq({
|
129
|
-
|
130
|
-
|
131
|
-
|
138
|
+
succes: 'true',
|
139
|
+
error: ''
|
140
|
+
})
|
132
141
|
end
|
133
142
|
end
|
134
143
|
|
@@ -155,10 +164,10 @@ RSpec.describe 'MetaInformation' do
|
|
155
164
|
expect(MetaInformation.send(:node_type, node)).to eq('itemprop')
|
156
165
|
end
|
157
166
|
|
158
|
-
it 'must return
|
167
|
+
it 'must return nil' do
|
159
168
|
document = Nokogiri::HTML('<meta content="og_title" />')
|
160
169
|
node = document.css('meta').first
|
161
|
-
expect(MetaInformation.send(:node_type, node)).to eq(
|
170
|
+
expect(MetaInformation.send(:node_type, node)).to eq(nil)
|
162
171
|
end
|
163
172
|
end
|
164
173
|
end
|
data/spec/shared_stuff.rb
CHANGED
metadata
CHANGED
@@ -1,36 +1,72 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: meta_information
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vladislav Kopylov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.7'
|
20
17
|
- - ">="
|
21
18
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
19
|
+
version: '0'
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
26
23
|
requirements:
|
27
|
-
- - "
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pry
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
28
39
|
- !ruby/object:Gem::Version
|
29
|
-
version: '
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
30
45
|
- - ">="
|
31
46
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
-
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Simple gem for parsing meta information from websites. It scans all meta-tags
|
34
70
|
by name, itemprop or property attributes.
|
35
71
|
email: kopylov.vlad@gmail.com
|
36
72
|
executables: []
|
@@ -39,10 +75,11 @@ extra_rdoc_files: []
|
|
39
75
|
files:
|
40
76
|
- ".gitignore"
|
41
77
|
- ".rspec"
|
42
|
-
-
|
78
|
+
- CHANGELOG.md
|
43
79
|
- Gemfile
|
44
80
|
- LICENSE.txt
|
45
81
|
- README.md
|
82
|
+
- ROADMAP.md
|
46
83
|
- lib/meta_information.rb
|
47
84
|
- lib/meta_information/version.rb
|
48
85
|
- meta_information.gemspec
|
@@ -61,15 +98,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
61
98
|
requirements:
|
62
99
|
- - ">="
|
63
100
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
101
|
+
version: 2.1.0
|
65
102
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
103
|
requirements:
|
67
104
|
- - ">="
|
68
105
|
- !ruby/object:Gem::Version
|
69
106
|
version: '0'
|
70
107
|
requirements: []
|
71
|
-
|
72
|
-
rubygems_version: 2.6.10
|
108
|
+
rubygems_version: 3.1.2
|
73
109
|
signing_key:
|
74
110
|
specification_version: 4
|
75
111
|
summary: MetaInformation - Simple gem for parsing meta information
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
ruby-2.3.3
|