meta_information 1.0.4 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d1de3cceee376e56a9f4d49f6608c4d46e8b6054
4
- data.tar.gz: bcb9ea854f3ab2e68f9e510ca88d7100c854a236
2
+ SHA256:
3
+ metadata.gz: a986aa80d954fde7cd758f23b62fa4bc6c6ef564ab06a86b55f8b4980739812e
4
+ data.tar.gz: f2d878d8d1c57665677608e71e6d5f75dcfccf256ea4b625750695bab8740ca1
5
5
  SHA512:
6
- metadata.gz: 669f92134cf2387d3512147dfedc9857fb2922a4f11393c5bfd77ba1fd59c1444fee60bc08d149e0a69922627da7d9f15207f23a33c9015740d891aa3885215a
7
- data.tar.gz: f0b443ef842d188520586e568201a403869988238a0ce3801d2cca86c16b07d96bab1a472d4421686c7dd09a51dcfe0e6f9a5d903e98efe0ddeb0180f5aabac8
6
+ metadata.gz: 00ad8369eb9559cb958b4fd2443c5e09d42a2c03d0ed1d4425ca7247f02d2abc90b66ecb4884a6d11faed122d74a812dbb87e36a273c2c0e8bad63f8dfbd1c87
7
+ data.tar.gz: 32798218b16280ca2cc0e7f58659d04f8b91853935a00646dade439b85e927305cc2a2f4d8e1bc8d5c0ff72cebfc22c29b45a1d18c753b70102ee13db6ec0d54
data/CHANGELOG.md ADDED
@@ -0,0 +1,6 @@
1
+ ## Changelog
2
+
3
+ ### 1.1.0
4
+
5
+ - Added rubocop
6
+ - Replaced open-uri by net/http
data/Gemfile CHANGED
@@ -1,5 +1,12 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
- ruby ['>= 2.1.0', '<= 2.4.0'] # 2.4.0 is latest ruby on 2017-07-09
4
+ ruby ['>= 2.1.0', '<= 2.7.0']
3
5
 
4
6
  gem 'nokogiri'
5
- gem 'rspec'
7
+
8
+ group :development, :test do
9
+ gem 'pry'
10
+ gem 'rspec'
11
+ gem 'rubocop'
12
+ end
data/README.md CHANGED
@@ -1,18 +1,26 @@
1
1
  # MetaInformation
2
+
2
3
  [![Gem Version](https://badge.fury.io/rb/meta_information.svg)](https://badge.fury.io/rb/meta_information)
3
4
 
4
5
  Simple gem for parsing meta information from websites. It scan all meta-tags by name or property attributes.
6
+
5
7
  ## Instalation
8
+
6
9
  Add this line to your application's Gemfile:
10
+
7
11
  ```ruby
8
12
  gem 'meta_information'
9
13
  ```
14
+
10
15
  Then run `bundle install`
11
16
  Or install it yourself as:
17
+
12
18
  ```sh
13
19
  gem install meta_information
14
20
  ```
21
+
15
22
  ## Usage
23
+
16
24
  ```ruby
17
25
  require 'pp'
18
26
  meta = MetaInformation.get_meta('https://www.awesome_site.com/awesome_page')
@@ -36,5 +44,7 @@ pp meta
36
44
  # {:type=>"property", :name=>nil, :property=>"fb:app_id", :content=>"1234567890"},
37
45
  ###
38
46
  ```
47
+
39
48
  ## License
49
+
40
50
  MIT License.
data/ROADMAP.md ADDED
@@ -0,0 +1,3 @@
1
+ ## ROADMAP
2
+
3
+ - Instead of returing hash, module `MetaInformation` should return struct object as a result. Each node should be a struct with methods to recognize a tag such as `.twitter?`, `.og?`, `.fb?`, '.vk?' (check it by property attr).
@@ -1,16 +1,18 @@
1
- require 'meta_information/version'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'meta_information/version'
2
4
  require 'nokogiri'
3
- require 'open-uri'
5
+ require 'uri'
6
+ require 'net/http'
4
7
 
5
- # MetaInformation - module for scaning meta information
6
- # form web page
7
- # for usage
8
+ # MetaInformation - module for scaning meta information from web page
8
9
  # MetaInformation.get_meta('https://some_site.com/some_page')
9
10
  module MetaInformation
10
11
  extend self
11
12
 
12
13
  def get_meta(input_url)
13
14
  return not_valid_url_error unless valid_url?(input_url)
15
+ return not_valid_url_scheme unless valid_url_scheme?(input_url)
14
16
 
15
17
  document = create_document(input_url)
16
18
  return nokogiri_error if document == false
@@ -21,16 +23,19 @@ module MetaInformation
21
23
 
22
24
  private
23
25
 
26
+ # TODO: change to struct
24
27
  def create_meta_array(document)
25
- document.css('meta').map do |node|
26
- {
27
- type: node_type(node),
28
- name: node['name'],
29
- property: node['property'],
30
- content: node['content'],
31
- itemprop: node['itemprop']
32
- }
33
- end
28
+ document
29
+ .css('meta').reject { |node| node_type(node).nil? }
30
+ .map do |node|
31
+ {
32
+ type: node_type(node),
33
+ name: node['name'],
34
+ property: node['property'],
35
+ content: node['content'],
36
+ itemprop: node['itemprop']
37
+ }
38
+ end
34
39
  end
35
40
 
36
41
  def node_type(node)
@@ -41,17 +46,28 @@ module MetaInformation
41
46
  elsif !node['itemprop'].nil?
42
47
  'itemprop'
43
48
  else
44
- ''
49
+ nil
45
50
  end
46
51
  end
47
52
 
48
53
  def valid_url?(uri)
49
- !(uri =~ URI.regexp).nil?
54
+ !(uri =~ URI::DEFAULT_PARSER.make_regexp).nil?
55
+ end
56
+
57
+ def valid_url_scheme?(input_url)
58
+ URI(input_url).is_a?(URI::HTTP)
50
59
  end
51
60
 
52
61
  def create_document(input_url)
53
- Nokogiri::HTML(open(input_url))
54
- rescue
62
+ uri = URI(input_url)
63
+ res = Net::HTTP.get_response(uri)
64
+
65
+ raise 'Response code is not 2xx' if !(res.code.to_i >= 200 && res.code.to_i <= 299)
66
+ raise 'Response is without body' unless res.class.body_permitted?
67
+
68
+ Nokogiri::HTML(res.body)
69
+ rescue StandardError => e
70
+ puts e
55
71
  false
56
72
  end
57
73
 
@@ -62,6 +78,13 @@ module MetaInformation
62
78
  }
63
79
  end
64
80
 
81
+ def not_valid_url_scheme
82
+ {
83
+ success: false,
84
+ error: 'url must be http(s)'
85
+ }
86
+ end
87
+
65
88
  def nokogiri_error
66
89
  {
67
90
  success: false,
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module MetaInformation
2
- VERSION = '1.0.4'
4
+ VERSION = '1.1.0'
3
5
  end
@@ -1,17 +1,25 @@
1
- $:.push File.expand_path("../lib", __FILE__)
2
- require "meta_information/version"
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.push File.expand_path('lib', __dir__)
4
+ require 'meta_information/version'
3
5
 
4
6
  Gem::Specification.new do |s|
7
+ s.required_ruby_version = '>= 2.1.0'
5
8
  s.name = 'meta_information'
6
9
  s.version = MetaInformation::VERSION
7
- s.date = '2017-07-31'
10
+ s.date = '2021-02-12'
8
11
  s.summary = 'MetaInformation - Simple gem for parsing meta information'
9
- s.description = 'Simple gem for parsing meta information from websites. It scan all meta-tags by name, itemprop or property attributes.'
12
+ s.description = 'Simple gem for parsing meta information from websites. It scans all meta-tags by name, itemprop or property attributes.'
10
13
  s.author = 'Vladislav Kopylov'
11
14
  s.email = 'kopylov.vlad@gmail.com'
12
- s.files = `git ls-files`.split("\n")
15
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f =~ /^bin/ }
16
+ s.executables = []
13
17
  s.homepage = 'https://github.com/kopylovvlad/meta_information'
14
18
  s.license = 'MIT'
15
19
 
16
- s.add_dependency('nokogiri', '~> 1.7', '>= 1.7.0')
20
+ s.add_dependency('nokogiri')
21
+
22
+ s.add_development_dependency('pry')
23
+ s.add_development_dependency('rspec')
24
+ s.add_development_dependency('rubocop')
17
25
  end
data/spec/config.rb CHANGED
@@ -1,4 +1,6 @@
1
+ # frozen_string_literal: true
2
+
1
3
  RSpec.configure do |rspec|
2
4
  rspec.shared_context_metadata_behavior = :apply_to_host_groups
3
5
  rspec.include_context 'shared stuff', include_shared: true
4
- end
6
+ end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require './lib/meta_information'
2
- require './spec/shared_stuff.rb'
3
- require './spec/config.rb'
4
+ require './spec/shared_stuff'
5
+ require './spec/config'
4
6
 
5
7
  RSpec.describe 'MetaInformation' do
6
8
  include_context 'shared stuff'
@@ -17,6 +19,13 @@ RSpec.describe 'MetaInformation' do
17
19
  )
18
20
  end
19
21
 
22
+ it 'should return not_valid_url_scheme_error' do
23
+ result = MetaInformation.get_meta('ftp://some_url.com')
24
+ expect(result).to(
25
+ eq(success: false, error: 'url must be http(s)')
26
+ )
27
+ end
28
+
20
29
  describe 'with mock for valid_url?' do
21
30
  before do
22
31
  allow(MetaInformation).to receive(:valid_url?).and_return(true)
@@ -112,23 +121,23 @@ RSpec.describe 'MetaInformation' do
112
121
  describe 'private hash equal' do
113
122
  it 'not_valid_url_error hash' do
114
123
  expect(MetaInformation.send(:not_valid_url_error)).to eq({
115
- success: false,
116
- error: 'url is not valid'
117
- })
124
+ success: false,
125
+ error: 'url is not valid'
126
+ })
118
127
  end
119
128
 
120
129
  it 'nokogiri_error hash' do
121
130
  expect(MetaInformation.send(:nokogiri_error)).to eq({
122
- success: false,
123
- error: 'error with parsing a document'
124
- })
131
+ success: false,
132
+ error: 'error with parsing a document'
133
+ })
125
134
  end
126
135
 
127
136
  it 'success_hash hash' do
128
137
  expect(MetaInformation.send(:success_hash)).to eq({
129
- succes: 'true',
130
- error: ''
131
- })
138
+ succes: 'true',
139
+ error: ''
140
+ })
132
141
  end
133
142
  end
134
143
 
@@ -155,10 +164,10 @@ RSpec.describe 'MetaInformation' do
155
164
  expect(MetaInformation.send(:node_type, node)).to eq('itemprop')
156
165
  end
157
166
 
158
- it 'must return empty string' do
167
+ it 'must return nil' do
159
168
  document = Nokogiri::HTML('<meta content="og_title" />')
160
169
  node = document.css('meta').first
161
- expect(MetaInformation.send(:node_type, node)).to eq('')
170
+ expect(MetaInformation.send(:node_type, node)).to eq(nil)
162
171
  end
163
172
  end
164
173
  end
data/spec/shared_stuff.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  RSpec.shared_context 'shared stuff', shared_context: :metadata do
2
4
  let(:default_html) do
3
5
  '
metadata CHANGED
@@ -1,36 +1,72 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: meta_information
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.4
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vladislav Kopylov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-07-31 00:00:00.000000000 Z
11
+ date: 2021-02-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.7'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
- version: 1.7.0
19
+ version: '0'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
- - - "~>"
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
28
39
  - !ruby/object:Gem::Version
29
- version: '1.7'
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
30
45
  - - ">="
31
46
  - !ruby/object:Gem::Version
32
- version: 1.7.0
33
- description: Simple gem for parsing meta information from websites. It scan all meta-tags
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Simple gem for parsing meta information from websites. It scans all meta-tags
34
70
  by name, itemprop or property attributes.
35
71
  email: kopylov.vlad@gmail.com
36
72
  executables: []
@@ -39,10 +75,11 @@ extra_rdoc_files: []
39
75
  files:
40
76
  - ".gitignore"
41
77
  - ".rspec"
42
- - ".ruby-version"
78
+ - CHANGELOG.md
43
79
  - Gemfile
44
80
  - LICENSE.txt
45
81
  - README.md
82
+ - ROADMAP.md
46
83
  - lib/meta_information.rb
47
84
  - lib/meta_information/version.rb
48
85
  - meta_information.gemspec
@@ -61,15 +98,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
61
98
  requirements:
62
99
  - - ">="
63
100
  - !ruby/object:Gem::Version
64
- version: '0'
101
+ version: 2.1.0
65
102
  required_rubygems_version: !ruby/object:Gem::Requirement
66
103
  requirements:
67
104
  - - ">="
68
105
  - !ruby/object:Gem::Version
69
106
  version: '0'
70
107
  requirements: []
71
- rubyforge_project:
72
- rubygems_version: 2.6.10
108
+ rubygems_version: 3.1.2
73
109
  signing_key:
74
110
  specification_version: 4
75
111
  summary: MetaInformation - Simple gem for parsing meta information
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- ruby-2.3.3