sitemaps_parser 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.circleci/config.yml +75 -0
- data/CHANGELOG.md +19 -0
- data/LICENSE.md +31 -0
- data/README.md +3 -2
- data/lib/sitemaps.rb +15 -12
- data/lib/sitemaps/parser.rb +8 -4
- data/lib/sitemaps/version.rb +1 -1
- data/sitemaps.gemspec +18 -16
- metadata +36 -17
- data/.travis.yml +0 -4
- data/LICENSE.txt +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: dd9e770cf06954a14d869600cb2c6281c7ee993f2d29d628abb1cf5a5b4809f1
|
4
|
+
data.tar.gz: bea190bd461d7eec0e72f092ae56546dce1b4fc2568aa2dd4757abf9b4675067
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4ee0f7fcd11e03a9f1a031088a0171b6b13e6a2c6bdbba1b9aac1232fe526a1947f7bcabf402714b071efa6fee5561fb413e154b8558d87d77d39fea8932117
|
7
|
+
data.tar.gz: 6bfee33a31ecb2680eabe09a9bf39ca19fb8c72e2f45af78275217f9898793039090c7352c7fba0b041331a15ef74fe2540ce200980168b2dfaf346ee24dc1f1
|
@@ -0,0 +1,75 @@
|
|
1
|
+
version: 2.1
|
2
|
+
|
3
|
+
executors:
|
4
|
+
test_executor:
|
5
|
+
working_directory: ~/sitemaps
|
6
|
+
|
7
|
+
docker:
|
8
|
+
- image: circleci/ruby:${RUBY_VERSION}
|
9
|
+
|
10
|
+
jobs:
|
11
|
+
build:
|
12
|
+
environment:
|
13
|
+
RUBY_VERSION: << parameters.ruby_version >>
|
14
|
+
ACTIVESUPPORT_VERSION: << parameters.activesupport_version >>
|
15
|
+
executor: test_executor
|
16
|
+
parameters:
|
17
|
+
ruby_version:
|
18
|
+
type: string
|
19
|
+
activesupport_version:
|
20
|
+
type: string
|
21
|
+
default: '~> 5.0'
|
22
|
+
steps:
|
23
|
+
- checkout
|
24
|
+
|
25
|
+
- restore_cache:
|
26
|
+
keys:
|
27
|
+
- sitemaps-cache-v1-{{ arch }}-{{ .Branch }}-{{ .Revision }}
|
28
|
+
- sitemaps-cache-v1-{{ arch }}-{{ .Branch }}
|
29
|
+
- sitemaps-cache-v1
|
30
|
+
|
31
|
+
- run:
|
32
|
+
name: Bundle Install
|
33
|
+
command: |
|
34
|
+
bundle check --path=vendor/bundle || bundle install --clean --path vendor/bundle
|
35
|
+
|
36
|
+
- save_cache:
|
37
|
+
key: sitemaps-cache-v1-{{ arch }}-{{ .Branch }}-{{ .Revision }}
|
38
|
+
paths:
|
39
|
+
- vendor/bundle
|
40
|
+
|
41
|
+
- run:
|
42
|
+
name: RSpec
|
43
|
+
command: bundle exec rspec spec
|
44
|
+
|
45
|
+
# Once lower-level offenses are resolved, the fail-level flags should be removed
|
46
|
+
# so that any offense will cause the build to fail.
|
47
|
+
- run:
|
48
|
+
name: Rubocop
|
49
|
+
command: bundle exec rubocop --fail-level warning --display-only-fail-level-offenses
|
50
|
+
|
51
|
+
workflows:
|
52
|
+
build_and_test:
|
53
|
+
jobs:
|
54
|
+
- build:
|
55
|
+
name: 'ruby 2.3.8'
|
56
|
+
ruby_version: 2.3.8
|
57
|
+
- build:
|
58
|
+
name: 'ruby 2.4.6'
|
59
|
+
ruby_version: 2.4.6
|
60
|
+
- build:
|
61
|
+
name: 'ruby 2.5.5'
|
62
|
+
ruby_version: 2.5.5
|
63
|
+
- build:
|
64
|
+
name: 'ruby 2.6.3, activesupport 4'
|
65
|
+
ruby_version: 2.6.3
|
66
|
+
activesupport_version: '4.0.0'
|
67
|
+
- build:
|
68
|
+
name: 'ruby 2.6.3, activesupport 5'
|
69
|
+
ruby_version: 2.6.3
|
70
|
+
activesupport_version: '~> 5.0'
|
71
|
+
- build:
|
72
|
+
name: 'ruby 2.6.3, activesupport 6'
|
73
|
+
ruby_version: 2.6.3
|
74
|
+
activesupport_version: '6.0.0.rc1'
|
75
|
+
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# Changelog
|
2
|
+
All notable changes to this project will be documented in this file.
|
3
|
+
|
4
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
5
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
|
+
|
7
|
+
## [Unreleased]
|
8
|
+
|
9
|
+
## [0.2.3] - 2019-06-19
|
10
|
+
|
11
|
+
### Changed
|
12
|
+
- [Run specs on CircleCi instead of Travis](https://github.com/GSA/sitemaps/commit/cd999ecbc6bcee36c20553c2d01b3bf1a24f4fad)
|
13
|
+
- [Update gem metadata](https://github.com/GSA/sitemaps/commit/67603d415af3deb989eb2d6f85be2f64461a9be5)
|
14
|
+
- [Require 100% code coverage](https://github.com/GSA/sitemaps/commit/032804a6476109150f10672e623b3412eb9dda11)
|
15
|
+
- [Depend on activesupport >4, < 7](https://github.com/GSA/sitemaps/commit/02ff67f8e4e5470942105b5dbe0b90bbdbaa5176)
|
16
|
+
|
17
|
+
### Fixed
|
18
|
+
- [Parse sitemaps including extra whitespace](https://github.com/GSA/sitemaps/commit/1ed7a427eb21a4a37d41b9cbfdfd81107e109c76)
|
19
|
+
- [Discover commented sitemaps in robots.txt](https://github.com/GSA/sitemaps/commit/2d3bd84140dc0df15112eb1e4f1f3b27d2ac6224)
|
data/LICENSE.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
As a work of the United States government, this project is in the
|
2
|
+
public domain within the United States.
|
3
|
+
|
4
|
+
Additionally, we waive copyright and related rights in the work
|
5
|
+
worldwide through the CC0 1.0 Universal public domain dedication.
|
6
|
+
|
7
|
+
## CC0 1.0 Universal summary
|
8
|
+
|
9
|
+
This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
|
10
|
+
|
11
|
+
### No copyright
|
12
|
+
|
13
|
+
The person who associated a work with this deed has dedicated the work to
|
14
|
+
the public domain by waiving all rights to the work worldwide
|
15
|
+
under copyright law, including all related and neighboring rights, to the
|
16
|
+
extent allowed by law.
|
17
|
+
|
18
|
+
You can copy, modify, distribute and perform the work, even for commercial
|
19
|
+
purposes, all without asking permission.
|
20
|
+
|
21
|
+
### Other information
|
22
|
+
|
23
|
+
In no way are the patent or trademark rights of any person affected by CC0,
|
24
|
+
nor are the rights that other persons may have in the work or in how the
|
25
|
+
work is used, such as publicity or privacy rights.
|
26
|
+
|
27
|
+
Unless expressly stated otherwise, the person who associated a work with
|
28
|
+
this deed makes no warranties about the work, and disclaims liability for
|
29
|
+
all uses of the work, to the fullest extent permitted by applicable law.
|
30
|
+
When using or citing the work, you should not imply endorsement by the
|
31
|
+
author or the affirmer.
|
data/README.md
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
# Sitemaps
|
2
2
|
|
3
3
|
[](https://rubygems.org/gems/sitemaps_parser)
|
4
|
+
[](https://circleci.com/gh/GSA/sitemaps)
|
4
5
|
|
5
6
|
Discover, retrieve and parse XML sitemaps, according to the spec at [sitemaps.org](http://sitemaps.org).
|
6
7
|
|
7
|
-
See [RDOC Documentation](
|
8
|
+
See [RDOC Documentation](https://gsa.github.io/sitemaps) for detailed documentation.
|
8
9
|
|
9
10
|
## Installation
|
10
11
|
|
@@ -56,7 +57,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
56
57
|
|
57
58
|
## Contributing
|
58
59
|
|
59
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
60
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/GSA/sitemaps. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
60
61
|
|
61
62
|
## License
|
62
63
|
|
data/lib/sitemaps.rb
CHANGED
@@ -1,14 +1,15 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/core_ext/object/try'
|
3
|
+
require 'active_support/core_ext/object/blank'
|
4
4
|
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
5
|
+
require 'set'
|
6
|
+
require 'time'
|
7
|
+
require 'rexml/document'
|
8
|
+
require 'net/http'
|
8
9
|
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
10
|
+
require 'sitemaps/version'
|
11
|
+
require 'sitemaps/parser'
|
12
|
+
require 'sitemaps/fetcher'
|
12
13
|
|
13
14
|
# Discover, fetch and parse XML sitemaps as defined by the `http://sitemaps.org` spec.
|
14
15
|
module Sitemaps
|
@@ -168,11 +169,13 @@ module Sitemaps
|
|
168
169
|
def discover_roots(url, fetcher)
|
169
170
|
robots = begin
|
170
171
|
robotsurl = url.clone
|
171
|
-
robotsurl.path =
|
172
|
+
robotsurl.path = '/robots.txt'
|
172
173
|
robotstxt = fetcher.call(robotsurl)
|
173
174
|
|
174
|
-
discovered = robotstxt.scan(/^Sitemap: (
|
175
|
-
|
175
|
+
discovered = robotstxt.scan(/^Sitemap: (\S+)/).flatten.map do |url|
|
176
|
+
URI.parse(url.strip)
|
177
|
+
end
|
178
|
+
discovered.presence
|
176
179
|
rescue
|
177
180
|
nil
|
178
181
|
end
|
data/lib/sitemaps/parser.rb
CHANGED
@@ -40,29 +40,33 @@ module Sitemaps
|
|
40
40
|
# @api private
|
41
41
|
# @private
|
42
42
|
def self.parse_loc(root)
|
43
|
-
loc =
|
43
|
+
loc = get_text(root, 'loc')
|
44
44
|
loc && URI.parse(loc) rescue nil
|
45
45
|
end
|
46
46
|
|
47
47
|
# @api private
|
48
48
|
# @private
|
49
49
|
def self.parse_lastmod(root)
|
50
|
-
mod =
|
50
|
+
mod = get_text(root, 'lastmod')
|
51
51
|
mod && Time.parse(mod) rescue nil
|
52
52
|
end
|
53
53
|
|
54
54
|
# @api private
|
55
55
|
# @private
|
56
56
|
def self.parse_changefreq(root)
|
57
|
-
freq =
|
57
|
+
freq = get_text(root, 'changefreq')
|
58
58
|
freq && VALID_CHANGEFREQ.include?(freq) ? freq.to_sym : nil
|
59
59
|
end
|
60
60
|
|
61
61
|
# @api private
|
62
62
|
# @private
|
63
63
|
def self.parse_priority(root)
|
64
|
-
priority =
|
64
|
+
priority = get_text(root, 'priority') || '0.5'
|
65
65
|
priority && Float(priority) rescue 0.5 # default priority according to spec
|
66
66
|
end
|
67
|
+
|
68
|
+
def self.get_text(root, key)
|
69
|
+
root.get_text(key)&.value&.strip
|
70
|
+
end
|
67
71
|
end
|
68
72
|
end
|
data/lib/sitemaps/version.rb
CHANGED
data/sitemaps.gemspec
CHANGED
@@ -4,29 +4,31 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'sitemaps/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
7
|
+
spec.name = 'sitemaps_parser'
|
8
8
|
spec.version = Sitemaps::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
9
|
+
spec.authors = ['Jonathan Raphaelson']
|
10
|
+
spec.email = ['jraphaelson@termscout.com']
|
11
11
|
|
12
|
-
spec.summary =
|
13
|
-
spec.homepage =
|
14
|
-
spec.license =
|
12
|
+
spec.summary = 'Retrieve and parse sitemaps, according to the sitemaps.org spec.'
|
13
|
+
spec.homepage = 'http://github.com/GSA/sitemaps'
|
14
|
+
spec.license = 'CC0 1.0 Universal'
|
15
15
|
|
16
16
|
files = `git ls-files -z`.split("\x0")
|
17
17
|
files.reject! { |f| f.match(%r{^(test|spec|features)/}) }
|
18
18
|
|
19
19
|
spec.files = files
|
20
|
-
spec.require_paths = [
|
20
|
+
spec.require_paths = ['lib']
|
21
21
|
|
22
|
-
spec.add_development_dependency
|
23
|
-
spec.add_development_dependency
|
24
|
-
spec.add_development_dependency
|
25
|
-
spec.add_development_dependency
|
26
|
-
spec.add_development_dependency
|
27
|
-
spec.add_development_dependency
|
28
|
-
spec.add_development_dependency
|
29
|
-
spec.add_development_dependency
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1'
|
23
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
24
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
25
|
+
spec.add_development_dependency 'webmock', '~> 3.0'
|
26
|
+
spec.add_development_dependency 'vcr', '~> 3'
|
27
|
+
spec.add_development_dependency 'rubocop', '~> 0.71.0'
|
28
|
+
spec.add_development_dependency 'byebug', '~> 8.2'
|
29
|
+
spec.add_development_dependency 'yard', '~> 0.9.11'
|
30
|
+
spec.add_development_dependency 'simplecov', '~> 0.16'
|
30
31
|
|
31
|
-
spec.add_runtime_dependency
|
32
|
+
spec.add_runtime_dependency 'activesupport',
|
33
|
+
(ENV['ACTIVESUPPORT_VERSION'] || ['>= 4', '< 7'])
|
32
34
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemaps_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Raphaelson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-06-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '
|
61
|
+
version: '3.0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '
|
68
|
+
version: '3.0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: vcr
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,14 +86,14 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
89
|
+
version: 0.71.0
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
96
|
+
version: 0.71.0
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: byebug
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,28 +114,48 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
117
|
+
version: 0.9.11
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
124
|
+
version: 0.9.11
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: simplecov
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0.16'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0.16'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: activesupport
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
130
144
|
- !ruby/object:Gem::Version
|
131
145
|
version: '4'
|
146
|
+
- - "<"
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
version: '7'
|
132
149
|
type: :runtime
|
133
150
|
prerelease: false
|
134
151
|
version_requirements: !ruby/object:Gem::Requirement
|
135
152
|
requirements:
|
136
|
-
- - "
|
153
|
+
- - ">="
|
137
154
|
- !ruby/object:Gem::Version
|
138
155
|
version: '4'
|
156
|
+
- - "<"
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '7'
|
139
159
|
description:
|
140
160
|
email:
|
141
161
|
- jraphaelson@termscout.com
|
@@ -143,13 +163,14 @@ executables: []
|
|
143
163
|
extensions: []
|
144
164
|
extra_rdoc_files: []
|
145
165
|
files:
|
166
|
+
- ".circleci/config.yml"
|
146
167
|
- ".gitignore"
|
147
168
|
- ".rspec"
|
148
169
|
- ".rubocop.yml"
|
149
|
-
-
|
170
|
+
- CHANGELOG.md
|
150
171
|
- CODE_OF_CONDUCT.md
|
151
172
|
- Gemfile
|
152
|
-
- LICENSE.
|
173
|
+
- LICENSE.md
|
153
174
|
- README.md
|
154
175
|
- Rakefile
|
155
176
|
- bin/console
|
@@ -159,9 +180,9 @@ files:
|
|
159
180
|
- lib/sitemaps/parser.rb
|
160
181
|
- lib/sitemaps/version.rb
|
161
182
|
- sitemaps.gemspec
|
162
|
-
homepage: http://github.com/
|
183
|
+
homepage: http://github.com/GSA/sitemaps
|
163
184
|
licenses:
|
164
|
-
-
|
185
|
+
- CC0 1.0 Universal
|
165
186
|
metadata: {}
|
166
187
|
post_install_message:
|
167
188
|
rdoc_options: []
|
@@ -178,10 +199,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
199
|
- !ruby/object:Gem::Version
|
179
200
|
version: '0'
|
180
201
|
requirements: []
|
181
|
-
|
182
|
-
rubygems_version: 2.2.2
|
202
|
+
rubygems_version: 3.0.3
|
183
203
|
signing_key:
|
184
204
|
specification_version: 4
|
185
205
|
summary: Retrieve and parse sitemaps, according to the sitemaps.org spec.
|
186
206
|
test_files: []
|
187
|
-
has_rdoc:
|
data/.travis.yml
DELETED
data/LICENSE.txt
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
The MIT License (MIT)
|
2
|
-
|
3
|
-
Copyright (c) 2016 TermScout
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
7
|
-
in the Software without restriction, including without limitation the rights
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
10
|
-
furnished to do so, subject to the following conditions:
|
11
|
-
|
12
|
-
The above copyright notice and this permission notice shall be included in
|
13
|
-
all copies or substantial portions of the Software.
|
14
|
-
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
THE SOFTWARE.
|