meta_information 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d1de3cceee376e56a9f4d49f6608c4d46e8b6054
4
- data.tar.gz: bcb9ea854f3ab2e68f9e510ca88d7100c854a236
2
+ SHA256:
3
+ metadata.gz: a986aa80d954fde7cd758f23b62fa4bc6c6ef564ab06a86b55f8b4980739812e
4
+ data.tar.gz: f2d878d8d1c57665677608e71e6d5f75dcfccf256ea4b625750695bab8740ca1
5
5
  SHA512:
6
- metadata.gz: 669f92134cf2387d3512147dfedc9857fb2922a4f11393c5bfd77ba1fd59c1444fee60bc08d149e0a69922627da7d9f15207f23a33c9015740d891aa3885215a
7
- data.tar.gz: f0b443ef842d188520586e568201a403869988238a0ce3801d2cca86c16b07d96bab1a472d4421686c7dd09a51dcfe0e6f9a5d903e98efe0ddeb0180f5aabac8
6
+ metadata.gz: 00ad8369eb9559cb958b4fd2443c5e09d42a2c03d0ed1d4425ca7247f02d2abc90b66ecb4884a6d11faed122d74a812dbb87e36a273c2c0e8bad63f8dfbd1c87
7
+ data.tar.gz: 32798218b16280ca2cc0e7f58659d04f8b91853935a00646dade439b85e927305cc2a2f4d8e1bc8d5c0ff72cebfc22c29b45a1d18c753b70102ee13db6ec0d54
data/CHANGELOG.md ADDED
@@ -0,0 +1,6 @@
1
+ ## Changelog
2
+
3
+ ### 1.1.0
4
+
5
+ - Added rubocop
6
+ - Replaced open-uri by net/http
data/Gemfile CHANGED
@@ -1,5 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
- ruby ['>= 2.1.0', '<= 2.4.0'] # 2.4.0 is latest ruby on 2017-07-09
4
+ ruby ['>= 2.1.0', '<= 2.7.0']
3
5
 
4
6
  gem 'nokogiri'
5
- gem 'rspec'
7
+
8
+ group :development, :test do
9
+ gem 'pry'
10
+ gem 'rspec'
11
+ gem 'rubocop'
12
+ end
data/README.md CHANGED
@@ -1,18 +1,26 @@
1
1
  # MetaInformation
2
+
2
3
  [![Gem Version](https://badge.fury.io/rb/meta_information.svg)](https://badge.fury.io/rb/meta_information)
3
4
 
4
5
  Simple gem for parsing meta information from websites. It scan all meta-tags by name or property attributes.
6
+
5
7
  ## Instalation
8
+
6
9
  Add this line to your application's Gemfile:
10
+
7
11
  ```ruby
8
12
  gem 'meta_information'
9
13
  ```
14
+
10
15
  Then run `bundle install`
11
16
  Or install it yourself as:
17
+
12
18
  ```sh
13
19
  gem install meta_information
14
20
  ```
21
+
15
22
  ## Usage
23
+
16
24
  ```ruby
17
25
  require 'pp'
18
26
  meta = MetaInformation.get_meta('https://www.awesome_site.com/awesome_page')
@@ -36,5 +44,7 @@ pp meta
36
44
  # {:type=>"property", :name=>nil, :property=>"fb:app_id", :content=>"1234567890"},
37
45
  ###
38
46
  ```
47
+
39
48
  ## License
49
+
40
50
  MIT License.
data/ROADMAP.md ADDED
@@ -0,0 +1,3 @@
1
+ ## ROADMAP
2
+
3
+ - Instead of returing hash, module `MetaInformation` should return struct object as a result. Each node should be a struct with methods to recognize a tag such as `.twitter?`, `.og?`, `.fb?`, '.vk?' (check it by property attr).
@@ -1,16 +1,18 @@
1
- require 'meta_information/version'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'meta_information/version'
2
4
  require 'nokogiri'
3
- require 'open-uri'
5
+ require 'uri'
6
+ require 'net/http'
4
7
 
5
- # MetaInformation - module for scaning meta information
6
- # form web page
7
- # for usage
8
+ # MetaInformation - module for scaning meta information from web page
8
9
  # MetaInformation.get_meta('https://some_site.com/some_page')
9
10
  module MetaInformation
10
11
  extend self
11
12
 
12
13
  def get_meta(input_url)
13
14
  return not_valid_url_error unless valid_url?(input_url)
15
+ return not_valid_url_scheme unless valid_url_scheme?(input_url)
14
16
 
15
17
  document = create_document(input_url)
16
18
  return nokogiri_error if document == false
@@ -21,16 +23,19 @@ module MetaInformation
21
23
 
22
24
  private
23
25
 
26
+ # TODO: change to struct
24
27
  def create_meta_array(document)
25
- document.css('meta').map do |node|
26
- {
27
- type: node_type(node),
28
- name: node['name'],
29
- property: node['property'],
30
- content: node['content'],
31
- itemprop: node['itemprop']
32
- }
33
- end
28
+ document
29
+ .css('meta').reject { |node| node_type(node).nil? }
30
+ .map do |node|
31
+ {
32
+ type: node_type(node),
33
+ name: node['name'],
34
+ property: node['property'],
35
+ content: node['content'],
36
+ itemprop: node['itemprop']
37
+ }
38
+ end
34
39
  end
35
40
 
36
41
  def node_type(node)
@@ -41,17 +46,28 @@ module MetaInformation
41
46
  elsif !node['itemprop'].nil?
42
47
  'itemprop'
43
48
  else
44
- ''
49
+ nil
45
50
  end
46
51
  end
47
52
 
48
53
  def valid_url?(uri)
49
- !(uri =~ URI.regexp).nil?
54
+ !(uri =~ URI::DEFAULT_PARSER.make_regexp).nil?
55
+ end
56
+
57
+ def valid_url_scheme?(input_url)
58
+ URI(input_url).is_a?(URI::HTTP)
50
59
  end
51
60
 
52
61
  def create_document(input_url)
53
- Nokogiri::HTML(open(input_url))
54
- rescue
62
+ uri = URI(input_url)
63
+ res = Net::HTTP.get_response(uri)
64
+
65
+ raise 'Response code is not 2xx' if !(res.code.to_i >= 200 && res.code.to_i <= 299)
66
+ raise 'Response is without body' unless res.class.body_permitted?
67
+
68
+ Nokogiri::HTML(res.body)
69
+ rescue StandardError => e
70
+ puts e
55
71
  false
56
72
  end
57
73
 
@@ -62,6 +78,13 @@ module MetaInformation
62
78
  }
63
79
  end
64
80
 
81
+ def not_valid_url_scheme
82
+ {
83
+ success: false,
84
+ error: 'url must be http(s)'
85
+ }
86
+ end
87
+
65
88
  def nokogiri_error
66
89
  {
67
90
  success: false,
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module MetaInformation
2
- VERSION = '1.0.4'
4
+ VERSION = '1.1.0'
3
5
  end
@@ -1,17 +1,25 @@
1
- $:.push File.expand_path("../lib", __FILE__)
2
- require "meta_information/version"
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.push File.expand_path('lib', __dir__)
4
+ require 'meta_information/version'
3
5
 
4
6
  Gem::Specification.new do |s|
7
+ s.required_ruby_version = '>= 2.1.0'
5
8
  s.name = 'meta_information'
6
9
  s.version = MetaInformation::VERSION
7
- s.date = '2017-07-31'
10
+ s.date = '2021-02-12'
8
11
  s.summary = 'MetaInformation - Simple gem for parsing meta information'
9
- s.description = 'Simple gem for parsing meta information from websites. It scan all meta-tags by name, itemprop or property attributes.'
12
+ s.description = 'Simple gem for parsing meta information from websites. It scans all meta-tags by name, itemprop or property attributes.'
10
13
  s.author = 'Vladislav Kopylov'
11
14
  s.email = 'kopylov.vlad@gmail.com'
12
- s.files = `git ls-files`.split("\n")
15
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f =~ /^bin/ }
16
+ s.executables = []
13
17
  s.homepage = 'https://github.com/kopylovvlad/meta_information'
14
18
  s.license = 'MIT'
15
19
 
16
- s.add_dependency('nokogiri', '~> 1.7', '>= 1.7.0')
20
+ s.add_dependency('nokogiri')
21
+
22
+ s.add_development_dependency('pry')
23
+ s.add_development_dependency('rspec')
24
+ s.add_development_dependency('rubocop')
17
25
  end
data/spec/config.rb CHANGED
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  RSpec.configure do |rspec|
2
4
  rspec.shared_context_metadata_behavior = :apply_to_host_groups
3
5
  rspec.include_context 'shared stuff', include_shared: true
4
- end
6
+ end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require './lib/meta_information'
2
- require './spec/shared_stuff.rb'
3
- require './spec/config.rb'
4
+ require './spec/shared_stuff'
5
+ require './spec/config'
4
6
 
5
7
  RSpec.describe 'MetaInformation' do
6
8
  include_context 'shared stuff'
@@ -17,6 +19,13 @@ RSpec.describe 'MetaInformation' do
17
19
  )
18
20
  end
19
21
 
22
+ it 'should return not_valid_url_scheme_error' do
23
+ result = MetaInformation.get_meta('ftp://some_url.com')
24
+ expect(result).to(
25
+ eq(success: false, error: 'url must be http(s)')
26
+ )
27
+ end
28
+
20
29
  describe 'with mock for valid_url?' do
21
30
  before do
22
31
  allow(MetaInformation).to receive(:valid_url?).and_return(true)
@@ -112,23 +121,23 @@ RSpec.describe 'MetaInformation' do
112
121
  describe 'private hash equal' do
113
122
  it 'not_valid_url_error hash' do
114
123
  expect(MetaInformation.send(:not_valid_url_error)).to eq({
115
- success: false,
116
- error: 'url is not valid'
117
- })
124
+ success: false,
125
+ error: 'url is not valid'
126
+ })
118
127
  end
119
128
 
120
129
  it 'nokogiri_error hash' do
121
130
  expect(MetaInformation.send(:nokogiri_error)).to eq({
122
- success: false,
123
- error: 'error with parsing a document'
124
- })
131
+ success: false,
132
+ error: 'error with parsing a document'
133
+ })
125
134
  end
126
135
 
127
136
  it 'success_hash hash' do
128
137
  expect(MetaInformation.send(:success_hash)).to eq({
129
- succes: 'true',
130
- error: ''
131
- })
138
+ succes: 'true',
139
+ error: ''
140
+ })
132
141
  end
133
142
  end
134
143
 
@@ -155,10 +164,10 @@ RSpec.describe 'MetaInformation' do
155
164
  expect(MetaInformation.send(:node_type, node)).to eq('itemprop')
156
165
  end
157
166
 
158
- it 'must return empty string' do
167
+ it 'must return nil' do
159
168
  document = Nokogiri::HTML('<meta content="og_title" />')
160
169
  node = document.css('meta').first
161
- expect(MetaInformation.send(:node_type, node)).to eq('')
170
+ expect(MetaInformation.send(:node_type, node)).to eq(nil)
162
171
  end
163
172
  end
164
173
  end
data/spec/shared_stuff.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  RSpec.shared_context 'shared stuff', shared_context: :metadata do
2
4
  let(:default_html) do
3
5
  '
metadata CHANGED
@@ -1,36 +1,72 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: meta_information
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vladislav Kopylov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-31 00:00:00.000000000 Z
11
+ date: 2021-02-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.7'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
- version: 1.7.0
19
+ version: '0'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
- - - "~>"
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
28
39
  - !ruby/object:Gem::Version
29
- version: '1.7'
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
30
45
  - - ">="
31
46
  - !ruby/object:Gem::Version
32
- version: 1.7.0
33
- description: Simple gem for parsing meta information from websites. It scan all meta-tags
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Simple gem for parsing meta information from websites. It scans all meta-tags
34
70
  by name, itemprop or property attributes.
35
71
  email: kopylov.vlad@gmail.com
36
72
  executables: []
@@ -39,10 +75,11 @@ extra_rdoc_files: []
39
75
  files:
40
76
  - ".gitignore"
41
77
  - ".rspec"
42
- - ".ruby-version"
78
+ - CHANGELOG.md
43
79
  - Gemfile
44
80
  - LICENSE.txt
45
81
  - README.md
82
+ - ROADMAP.md
46
83
  - lib/meta_information.rb
47
84
  - lib/meta_information/version.rb
48
85
  - meta_information.gemspec
@@ -61,15 +98,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
61
98
  requirements:
62
99
  - - ">="
63
100
  - !ruby/object:Gem::Version
64
- version: '0'
101
+ version: 2.1.0
65
102
  required_rubygems_version: !ruby/object:Gem::Requirement
66
103
  requirements:
67
104
  - - ">="
68
105
  - !ruby/object:Gem::Version
69
106
  version: '0'
70
107
  requirements: []
71
- rubyforge_project:
72
- rubygems_version: 2.6.10
108
+ rubygems_version: 3.1.2
73
109
  signing_key:
74
110
  specification_version: 4
75
111
  summary: MetaInformation - Simple gem for parsing meta information
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- ruby-2.3.3