sitemaps_parser 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 7ba72de307e8ad869330f8577085aeb085f9d374
4
- data.tar.gz: bd1a8740911500318dd227ee311fa31f2e457f81
2
+ SHA256:
3
+ metadata.gz: dd9e770cf06954a14d869600cb2c6281c7ee993f2d29d628abb1cf5a5b4809f1
4
+ data.tar.gz: bea190bd461d7eec0e72f092ae56546dce1b4fc2568aa2dd4757abf9b4675067
5
5
  SHA512:
6
- metadata.gz: 8b504a926a9e51611258f93ba88a85db5664e0939f6bc5337599887b6f258738d8560140f20521bbc2698b0322ec2e3d67abfbd57d02f01258875954f6664a5a
7
- data.tar.gz: 2a92c0b888363b473a1cb3d981c3533b4fafe15eacdf7c920aa35e131c8a4a9db2284cef13958b73e52886110c8556b61acf61a6d7272c1e3c375434540a702d
6
+ metadata.gz: d4ee0f7fcd11e03a9f1a031088a0171b6b13e6a2c6bdbba1b9aac1232fe526a1947f7bcabf402714b071efa6fee5561fb413e154b8558d87d77d39fea8932117
7
+ data.tar.gz: 6bfee33a31ecb2680eabe09a9bf39ca19fb8c72e2f45af78275217f9898793039090c7352c7fba0b041331a15ef74fe2540ce200980168b2dfaf346ee24dc1f1
@@ -0,0 +1,75 @@
1
+ version: 2.1
2
+
3
+ executors:
4
+ test_executor:
5
+ working_directory: ~/sitemaps
6
+
7
+ docker:
8
+ - image: circleci/ruby:${RUBY_VERSION}
9
+
10
+ jobs:
11
+ build:
12
+ environment:
13
+ RUBY_VERSION: << parameters.ruby_version >>
14
+ ACTIVESUPPORT_VERSION: << parameters.activesupport_version >>
15
+ executor: test_executor
16
+ parameters:
17
+ ruby_version:
18
+ type: string
19
+ activesupport_version:
20
+ type: string
21
+ default: '~> 5.0'
22
+ steps:
23
+ - checkout
24
+
25
+ - restore_cache:
26
+ keys:
27
+ - sitemaps-cache-v1-{{ arch }}-{{ .Branch }}-{{ .Revision }}
28
+ - sitemaps-cache-v1-{{ arch }}-{{ .Branch }}
29
+ - sitemaps-cache-v1
30
+
31
+ - run:
32
+ name: Bundle Install
33
+ command: |
34
+ bundle check --path=vendor/bundle || bundle install --clean --path vendor/bundle
35
+
36
+ - save_cache:
37
+ key: sitemaps-cache-v1-{{ arch }}-{{ .Branch }}-{{ .Revision }}
38
+ paths:
39
+ - vendor/bundle
40
+
41
+ - run:
42
+ name: RSpec
43
+ command: bundle exec rspec spec
44
+
45
+ # Once lower-level offenses are resolved, the fail-level flags should be removed
46
+ # so that any offense will cause the build to fail.
47
+ - run:
48
+ name: Rubocop
49
+ command: bundle exec rubocop --fail-level warning --display-only-fail-level-offenses
50
+
51
+ workflows:
52
+ build_and_test:
53
+ jobs:
54
+ - build:
55
+ name: 'ruby 2.3.8'
56
+ ruby_version: 2.3.8
57
+ - build:
58
+ name: 'ruby 2.4.6'
59
+ ruby_version: 2.4.6
60
+ - build:
61
+ name: 'ruby 2.5.5'
62
+ ruby_version: 2.5.5
63
+ - build:
64
+ name: 'ruby 2.6.3, activesupport 4'
65
+ ruby_version: 2.6.3
66
+ activesupport_version: '4.0.0'
67
+ - build:
68
+ name: 'ruby 2.6.3, activesupport 5'
69
+ ruby_version: 2.6.3
70
+ activesupport_version: '~> 5.0'
71
+ - build:
72
+ name: 'ruby 2.6.3, activesupport 6'
73
+ ruby_version: 2.6.3
74
+ activesupport_version: '6.0.0.rc1'
75
+
@@ -0,0 +1,19 @@
1
+ # Changelog
2
+ All notable changes to this project will be documented in this file.
3
+
4
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.2.3] - 2019-06-19
10
+
11
+ ### Changed
12
+ - [Run specs on CircleCi instead of Travis](https://github.com/GSA/sitemaps/commit/cd999ecbc6bcee36c20553c2d01b3bf1a24f4fad)
13
+ - [Update gem metadata](https://github.com/GSA/sitemaps/commit/67603d415af3deb989eb2d6f85be2f64461a9be5)
14
+ - [Require 100% code coverage](https://github.com/GSA/sitemaps/commit/032804a6476109150f10672e623b3412eb9dda11)
15
+ - [Depend on activesupport >4, < 7](https://github.com/GSA/sitemaps/commit/02ff67f8e4e5470942105b5dbe0b90bbdbaa5176)
16
+
17
+ ### Fixed
18
+ - [Parse sitemaps including extra whitespace](https://github.com/GSA/sitemaps/commit/1ed7a427eb21a4a37d41b9cbfdfd81107e109c76)
19
+ - [Discover commented sitemaps in robots.txt](https://github.com/GSA/sitemaps/commit/2d3bd84140dc0df15112eb1e4f1f3b27d2ac6224)
@@ -0,0 +1,31 @@
1
+ As a work of the United States government, this project is in the
2
+ public domain within the United States.
3
+
4
+ Additionally, we waive copyright and related rights in the work
5
+ worldwide through the CC0 1.0 Universal public domain dedication.
6
+
7
+ ## CC0 1.0 Universal summary
8
+
9
+ This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
10
+
11
+ ### No copyright
12
+
13
+ The person who associated a work with this deed has dedicated the work to
14
+ the public domain by waiving all rights to the work worldwide
15
+ under copyright law, including all related and neighboring rights, to the
16
+ extent allowed by law.
17
+
18
+ You can copy, modify, distribute and perform the work, even for commercial
19
+ purposes, all without asking permission.
20
+
21
+ ### Other information
22
+
23
+ In no way are the patent or trademark rights of any person affected by CC0,
24
+ nor are the rights that other persons may have in the work or in how the
25
+ work is used, such as publicity or privacy rights.
26
+
27
+ Unless expressly stated otherwise, the person who associated a work with
28
+ this deed makes no warranties about the work, and disclaims liability for
29
+ all uses of the work, to the fullest extent permitted by applicable law.
30
+ When using or citing the work, you should not imply endorsement by the
31
+ author or the affirmer.
data/README.md CHANGED
@@ -1,10 +1,11 @@
1
1
  # Sitemaps
2
2
 
3
3
  [![Gem](https://img.shields.io/gem/v/sitemaps_parser.svg?style=flat-square)](https://rubygems.org/gems/sitemaps_parser)
4
+ [![CircleCI](https://circleci.com/gh/GSA/sitemaps.svg?style=svg)](https://circleci.com/gh/GSA/sitemaps)
4
5
 
5
6
  Discover, retrieve and parse XML sitemaps, according to the spec at [sitemaps.org](http://sitemaps.org).
6
7
 
7
- See [RDOC Documentation](http://lygaret.github.io/sitemaps) for detailed documentation.
8
+ See [RDOC Documentation](https://gsa.github.io/sitemaps) for detailed documentation.
8
9
 
9
10
  ## Installation
10
11
 
@@ -56,7 +57,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
56
57
 
57
58
  ## Contributing
58
59
 
59
- Bug reports and pull requests are welcome on GitHub at https://github.com/lygaret/sitemaps. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
60
+ Bug reports and pull requests are welcome on GitHub at https://github.com/GSA/sitemaps. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
60
61
 
61
62
  ## License
62
63
 
@@ -1,14 +1,15 @@
1
- require "active_support"
2
- require "active_support/core_ext/object/try"
3
- require "active_support/core_ext/object/blank"
1
+ require 'active_support'
2
+ require 'active_support/core_ext/object/try'
3
+ require 'active_support/core_ext/object/blank'
4
4
 
5
- require "set"
6
- require "time"
7
- require "rexml/document"
5
+ require 'set'
6
+ require 'time'
7
+ require 'rexml/document'
8
+ require 'net/http'
8
9
 
9
- require "sitemaps/version"
10
- require "sitemaps/parser"
11
- require "sitemaps/fetcher"
10
+ require 'sitemaps/version'
11
+ require 'sitemaps/parser'
12
+ require 'sitemaps/fetcher'
12
13
 
13
14
  # Discover, fetch and parse XML sitemaps as defined by the `http://sitemaps.org` spec.
14
15
  module Sitemaps
@@ -168,11 +169,13 @@ module Sitemaps
168
169
  def discover_roots(url, fetcher)
169
170
  robots = begin
170
171
  robotsurl = url.clone
171
- robotsurl.path = "/robots.txt"
172
+ robotsurl.path = '/robots.txt'
172
173
  robotstxt = fetcher.call(robotsurl)
173
174
 
174
- discovered = robotstxt.scan(/^Sitemap: (.+)$/).flatten.reverse.map { |u| URI.parse(u.strip) }
175
- discovered.empty? ? nil : discovered
175
+ discovered = robotstxt.scan(/^Sitemap: (\S+)/).flatten.map do |url|
176
+ URI.parse(url.strip)
177
+ end
178
+ discovered.presence
176
179
  rescue
177
180
  nil
178
181
  end
@@ -40,29 +40,33 @@ module Sitemaps
40
40
  # @api private
41
41
  # @private
42
42
  def self.parse_loc(root)
43
- loc = root.get_text("loc").try(:value)
43
+ loc = get_text(root, 'loc')
44
44
  loc && URI.parse(loc) rescue nil
45
45
  end
46
46
 
47
47
  # @api private
48
48
  # @private
49
49
  def self.parse_lastmod(root)
50
- mod = root.get_text("lastmod").try(:value)
50
+ mod = get_text(root, 'lastmod')
51
51
  mod && Time.parse(mod) rescue nil
52
52
  end
53
53
 
54
54
  # @api private
55
55
  # @private
56
56
  def self.parse_changefreq(root)
57
- freq = root.get_text("changefreq").try(:value)
57
+ freq = get_text(root, 'changefreq')
58
58
  freq && VALID_CHANGEFREQ.include?(freq) ? freq.to_sym : nil
59
59
  end
60
60
 
61
61
  # @api private
62
62
  # @private
63
63
  def self.parse_priority(root)
64
- priority = root.get_text("priority").try(:value) || "0.5"
64
+ priority = get_text(root, 'priority') || '0.5'
65
65
  priority && Float(priority) rescue 0.5 # default priority according to spec
66
66
  end
67
+
68
+ def self.get_text(root, key)
69
+ root.get_text(key)&.value&.strip
70
+ end
67
71
  end
68
72
  end
@@ -1,3 +1,3 @@
1
1
  module Sitemaps
2
- VERSION = "0.2.2".freeze
2
+ VERSION = "0.2.3".freeze
3
3
  end
@@ -4,29 +4,31 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'sitemaps/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "sitemaps_parser"
7
+ spec.name = 'sitemaps_parser'
8
8
  spec.version = Sitemaps::VERSION
9
- spec.authors = ["Jonathan Raphaelson"]
10
- spec.email = ["jraphaelson@termscout.com"]
9
+ spec.authors = ['Jonathan Raphaelson']
10
+ spec.email = ['jraphaelson@termscout.com']
11
11
 
12
- spec.summary = "Retrieve and parse sitemaps, according to the sitemaps.org spec."
13
- spec.homepage = "http://github.com/termscout/sitemaps"
14
- spec.license = "MIT"
12
+ spec.summary = 'Retrieve and parse sitemaps, according to the sitemaps.org spec.'
13
+ spec.homepage = 'http://github.com/GSA/sitemaps'
14
+ spec.license = 'CC0 1.0 Universal'
15
15
 
16
16
  files = `git ls-files -z`.split("\x0")
17
17
  files.reject! { |f| f.match(%r{^(test|spec|features)/}) }
18
18
 
19
19
  spec.files = files
20
- spec.require_paths = ["lib"]
20
+ spec.require_paths = ['lib']
21
21
 
22
- spec.add_development_dependency "bundler", "~> 1"
23
- spec.add_development_dependency "rake", "~> 10.0"
24
- spec.add_development_dependency "rspec", "~> 3.0"
25
- spec.add_development_dependency "webmock", "~> 1"
26
- spec.add_development_dependency "vcr", "~> 3"
27
- spec.add_development_dependency "rubocop", "~> 0.38.0"
28
- spec.add_development_dependency "byebug", "~> 8.2"
29
- spec.add_development_dependency "yard", "~> 0.8"
22
+ spec.add_development_dependency 'bundler', '~> 1'
23
+ spec.add_development_dependency 'rake', '~> 10.0'
24
+ spec.add_development_dependency 'rspec', '~> 3.0'
25
+ spec.add_development_dependency 'webmock', '~> 3.0'
26
+ spec.add_development_dependency 'vcr', '~> 3'
27
+ spec.add_development_dependency 'rubocop', '~> 0.71.0'
28
+ spec.add_development_dependency 'byebug', '~> 8.2'
29
+ spec.add_development_dependency 'yard', '~> 0.9.11'
30
+ spec.add_development_dependency 'simplecov', '~> 0.16'
30
31
 
31
- spec.add_runtime_dependency "activesupport", "~> 4"
32
+ spec.add_runtime_dependency 'activesupport',
33
+ (ENV['ACTIVESUPPORT_VERSION'] || ['>= 4', '< 7'])
32
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemaps_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Raphaelson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-25 00:00:00.000000000 Z
11
+ date: 2019-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1'
61
+ version: '3.0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1'
68
+ version: '3.0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: vcr
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 0.38.0
89
+ version: 0.71.0
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 0.38.0
96
+ version: 0.71.0
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: byebug
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -114,28 +114,48 @@ dependencies:
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '0.8'
117
+ version: 0.9.11
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: '0.8'
124
+ version: 0.9.11
125
125
  - !ruby/object:Gem::Dependency
126
- name: activesupport
126
+ name: simplecov
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
129
  - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.16'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.16'
139
+ - !ruby/object:Gem::Dependency
140
+ name: activesupport
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
130
144
  - !ruby/object:Gem::Version
131
145
  version: '4'
146
+ - - "<"
147
+ - !ruby/object:Gem::Version
148
+ version: '7'
132
149
  type: :runtime
133
150
  prerelease: false
134
151
  version_requirements: !ruby/object:Gem::Requirement
135
152
  requirements:
136
- - - "~>"
153
+ - - ">="
137
154
  - !ruby/object:Gem::Version
138
155
  version: '4'
156
+ - - "<"
157
+ - !ruby/object:Gem::Version
158
+ version: '7'
139
159
  description:
140
160
  email:
141
161
  - jraphaelson@termscout.com
@@ -143,13 +163,14 @@ executables: []
143
163
  extensions: []
144
164
  extra_rdoc_files: []
145
165
  files:
166
+ - ".circleci/config.yml"
146
167
  - ".gitignore"
147
168
  - ".rspec"
148
169
  - ".rubocop.yml"
149
- - ".travis.yml"
170
+ - CHANGELOG.md
150
171
  - CODE_OF_CONDUCT.md
151
172
  - Gemfile
152
- - LICENSE.txt
173
+ - LICENSE.md
153
174
  - README.md
154
175
  - Rakefile
155
176
  - bin/console
@@ -159,9 +180,9 @@ files:
159
180
  - lib/sitemaps/parser.rb
160
181
  - lib/sitemaps/version.rb
161
182
  - sitemaps.gemspec
162
- homepage: http://github.com/termscout/sitemaps
183
+ homepage: http://github.com/GSA/sitemaps
163
184
  licenses:
164
- - MIT
185
+ - CC0 1.0 Universal
165
186
  metadata: {}
166
187
  post_install_message:
167
188
  rdoc_options: []
@@ -178,10 +199,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
199
  - !ruby/object:Gem::Version
179
200
  version: '0'
180
201
  requirements: []
181
- rubyforge_project:
182
- rubygems_version: 2.2.2
202
+ rubygems_version: 3.0.3
183
203
  signing_key:
184
204
  specification_version: 4
185
205
  summary: Retrieve and parse sitemaps, according to the sitemaps.org spec.
186
206
  test_files: []
187
- has_rdoc:
@@ -1,4 +0,0 @@
1
- language: ruby
2
- rvm:
3
- - 2.3.0
4
- before_install: gem install bundler -v 1.11.2
@@ -1,21 +0,0 @@
1
- The MIT License (MIT)
2
-
3
- Copyright (c) 2016 TermScout
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in
13
- all copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
- THE SOFTWARE.